In [None]:
# Business understanding

In this notebook we label the data and get deeper knowledge about the data.
We split the task. Each team member labels 50 samples.

In [5]:
# make sure just your username is uncommented
# user = "felix"
# user = "omar"
user = "tobias"

In [4]:
# For labelling the Framework pigeonXT is used: https://github.com/dennisbakhuis/pigeonXT/
# Check the pigeonXT github README, if it does not work out of the box.
!pip install pigeonXT-jupyter

Defaulting to user installation because normal site-packages is not writeable


In [193]:
import json
import matplotlib.pyplot as plt
%matplotlib notebook
import numpy as np
import time
import pandas as pd
import pigeonXT as pixt

In [172]:
# select 150 random indices from our 10000 entries
np.random.seed(0)
import numpy as np
all_indices = np.random.choice(range(10001), 150, replace=False)
#print(all_indices.shape)

# assign 50 labels to each user
indices = { "felix":  all_indices[:50],
            "omar":   all_indices[50:100],
            "tobias": all_indices[100:],
          }
print("samples to label:", indices[user].shape[0])

samples to label: 50


In [173]:
# load data
FILES=["../thermal_raw_20210507_full/20210507_1605_3078.txt", "../thermal_raw_20210507_full/20210507_1605_C088.txt"]
data = list()

for FILE in FILES:
    with open(FILE, "r") as f:
        for line in f:
            record = json.loads(line)
            data.append(record)

In [190]:
# convert data to pandas and do give it appropriate datatypes
df = pd.DataFrame(data)

# fix datatypes, drop sensor_size
df['Timestamp'] = df['Timestamp'].astype('int32')
df['Sensor ID'] = df['Sensor ID'].str.replace(r'Sensor_32x32_', r'')
df['Room Temperature'] = df['Room Temperature'].astype('float32')
df = df.drop(['Sensor size'], axis=1)

# show datatypes
#print(df.dtypes)

# show amount of samples
#print("amount of samples:", df.shape[0])

# show first 3 rows
#df.head(n=3)

In [188]:
# select samples for the user
# if user CSV file exists, continue with this one
df_user = df.loc[indices[user]]
# show amount of samples
print("amount of samples:", df.shape[0])
df_user.head(n=3)

amount of samples: 10000


Unnamed: 0,Timestamp,Sensor ID,Room Temperature,RSSI,data
4323,1620393158,3078,23.0,-81,"[[9.6, 11.4, 11.8, 12.0, 11.4, 11.5, 12.1, 12...."
3232,1620393050,3078,22.5,-67,"[[10.7, 11.4, 10.8, 9.5, 12.4, 12.1, 12.2, 12...."
9985,1620393199,C088,23.1,-68,"[[11.8, 13.0, 12.1, 11.7, 12.8, 13.5, 12.6, 12..."


In [191]:
# function to show the image
def show_image(img):
    fig, ax = plt.subplots(figsize=(2, 2))
    ax.set_axis_off()
    plt.imshow(img, interpolation='nearest', aspect='auto')
    fig, ax = plt.subplots(figsize=(4, 4))
    ax.set_axis_off()
    plt.imshow(img, interpolation='nearest', aspect='auto')

In [192]:
# run annotation
annotations = pixt.annotate(
  df_user,
  example_column='data',
  buttons_in_a_row=5,
  options=['Human', 'Several humans', 'Non-human heat', 'Ambigous heat', 'No heat'],
  display_fn=lambda item: show_image(item)
)

HTML(value='0 of 50 Examples annotated, Current Position: 0 ')

VBox(children=(HBox(children=(Button(description='Human', style=ButtonStyle()), Button(description='Several hu…

Output()

In [185]:
# verify the data
annotations

Unnamed: 0,Timestamp,Sensor ID,Room Temperature,RSSI,data,changed,label
4323,2021-05-07 13:12:38,3078,23.0,-81,"[[9.6, 11.4, 11.8, 12.0, 11.4, 11.5, 12.1, 12....",False,
3232,2021-05-07 13:10:50,3078,22.5,-67,"[[10.7, 11.4, 10.8, 9.5, 12.4, 12.1, 12.2, 12....",False,
9985,2021-05-07 13:13:19,C088,23.1,-68,"[[11.8, 13.0, 12.1, 11.7, 12.8, 13.5, 12.6, 12...",False,
6285,2021-05-07 13:07:34,C088,21.700001,-58,"[[10.6, 12.2, 13.3, 13.0, 12.3, 12.9, 13.1, 12...",False,
7261,2021-05-07 13:09:05,C088,22.1,-58,"[[11.3, 13.5, 12.8, 11.6, 11.9, 12.2, 13.5, 13...",False,
3932,2021-05-07 13:11:59,3078,22.799999,-76,"[[10.5, 10.6, 10.6, 10.5, 11.2, 11.8, 11.5, 11...",False,
9673,2021-05-07 13:12:50,C088,23.0,-58,"[[10.5, 11.4, 12.0, 12.5, 11.5, 11.8, 12.4, 12...",False,
1312,2021-05-07 13:07:43,3078,21.700001,-82,"[[9.1, 11.1, 10.9, 10.6, 12.0, 11.3, 11.4, 11....",False,
1942,2021-05-07 13:08:46,3078,22.0,-78,"[[11.7, 11.2, 11.3, 10.8, 10.4, 11.4, 11.2, 11...",False,
841,2021-05-07 13:06:55,3078,21.5,-62,"[[11.6, 12.1, 11.3, 10.9, 11.7, 12.1, 11.6, 11...",False,


In [180]:
# write to CSV
annotations.to_json(user+int(time.time())"_labels.json")