# Business understanding

In this notebook we label the data and get deeper knowledge about the data.
We split the task. Each team member labels 50 samples.

In [1]:
# make sure just your username is uncommented
# user = "felix"
# user = "omar"
user = "tobias"

In [2]:
# For labelling the Framework pigeonXT is used: https://github.com/dennisbakhuis/pigeonXT/
# Check the pigeonXT github README, if it does not work out of the box.
!pip install pigeonXT-jupyter

Defaulting to user installation because normal site-packages is not writeable


In [3]:
import json
import matplotlib.pyplot as plt
%matplotlib notebook
import numpy as np
import time
import pandas as pd
import pigeonXT as pixt

In [4]:
# select 150 random indices from our 10000 entries
np.random.seed(0)
import numpy as np
all_indices = np.random.choice(range(10001), 150, replace=False)
#print(all_indices.shape)

# assign 50 labels to each user
indices = { "felix":  all_indices[:50],
            "omar":   all_indices[50:100],
            "tobias": all_indices[100:],
          }
print("samples to label:", indices[user].shape[0])

samples to label: 50


In [5]:
# load data
FILES=["../thermal_raw_20210507_full/20210507_1605_3078.txt", "../thermal_raw_20210507_full/20210507_1605_C088.txt"]
data = list()

for FILE in FILES:
    with open(FILE, "r") as f:
        for line in f:
            record = json.loads(line)
            data.append(record)

In [6]:
# convert data to pandas and do give it appropriate datatypes
df = pd.DataFrame(data)

# fix datatypes, drop sensor_size
df['Timestamp'] = df['Timestamp'].astype('int32')
df['Sensor ID'] = df['Sensor ID'].str.replace(r'Sensor_32x32_', r'')
df['Room Temperature'] = df['Room Temperature'].astype('float32')
df = df.drop(['Sensor size'], axis=1)

# reset index to get it as column
df.reset_index(level=0, inplace=True)

# show datatypes
#print(df.dtypes)

# show amount of samples
#print("amount of samples:", df.shape[0])

# show first 3 rows
#df.head(n=3)

In [7]:
# select samples for the user
# if user CSV file exists, continue with this one
df_user = df.loc[indices[user]]

# show amount of samples
print("amount of samples:", df.shape[0])
df_user.head(n=3)

amount of samples: 10000


Unnamed: 0,index,Timestamp,Sensor ID,Room Temperature,RSSI,data
4323,4323,1620393158,3078,23.0,-81,"[[9.6, 11.4, 11.8, 12.0, 11.4, 11.5, 12.1, 12...."
3232,3232,1620393050,3078,22.5,-67,"[[10.7, 11.4, 10.8, 9.5, 12.4, 12.1, 12.2, 12...."
9985,9985,1620393199,C088,23.1,-68,"[[11.8, 13.0, 12.1, 11.7, 12.8, 13.5, 12.6, 12..."


In [8]:
def show_image(index):
    """ show single image for annotation
    
    Args:
    index: index of the image in df Dataframe
    """
    fig, ax = plt.subplots(figsize=(2, 2))
    ax.set_axis_off()
    plt.imshow(df.iloc[index].data, interpolation='nearest', aspect='auto')

In [9]:
# run annotation
annotations = pixt.annotate(
  df_user,
  example_column='index',
  buttons_in_a_row=5,
  options=['Human', 'Several humans', 'Non-human heat', 'Ambigous heat', 'No heat'],
  display_fn=lambda index: show_image(index)
)

HTML(value='0 of 50 Examples annotated, Current Position: 0 ')

VBox(children=(HBox(children=(Button(description='Human', style=ButtonStyle()), Button(description='Several hu…

Output()

In [10]:
def show_image_with_surrounding(index, n=4, step=25):
    """ show the image and surround images
    
    Args:
    index: index of the image in df Dataframe
    n: amount of pics to show prior and past the frame of interest
    step: amount of frames to skip between the preceeding / succeeding frames
    """
    fig, axes = plt.subplots(nrows=1, ncols=n, figsize=(n*2, 2), num="preceeding frames")
    for ax, i in zip(axes, range(index-(n*step), index, step)):
        ax.set_axis_off()
        ax.set_title('Index: %i' % i)
        ax.imshow(df.iloc[i].data, interpolation='nearest', aspect='auto')
    
    fig, ax = plt.subplots(figsize=(2, 2))
    ax.set_axis_off()
    ax.imshow(df.iloc[index].data, interpolation='nearest', aspect='auto')
    
    fig, axes = plt.subplots(nrows=1, ncols=n, figsize=(n*2, 2))
    for ax, i in zip(axes, range(index+1, index+(n*step)+1)):
        ax.set_axis_off()
        ax.set_title('Index: %i' % i)
        ax.imshow(df.iloc[i].data, interpolation='nearest', aspect='auto')

In [11]:
# run annotation and show surround images
annotations = pixt.annotate(
  df_user,
  example_column='index',
  buttons_in_a_row=5,
  options=['Human', 'Several humans', 'Non-human heat', 'Ambigous heat', 'No heat'],
  display_fn=lambda index: show_image_with_surrounding(index)
)

HTML(value='0 of 50 Examples annotated, Current Position: 0 ')

VBox(children=(HBox(children=(Button(description='Human', style=ButtonStyle()), Button(description='Several hu…

Output()

In [12]:
# verify the data
annotations

Unnamed: 0,index,Timestamp,Sensor ID,Room Temperature,RSSI,data,changed,label
4323,4323,1620393158,3078,23.0,-81,"[[9.6, 11.4, 11.8, 12.0, 11.4, 11.5, 12.1, 12....",False,
3232,3232,1620393050,3078,22.5,-67,"[[10.7, 11.4, 10.8, 9.5, 12.4, 12.1, 12.2, 12....",False,
9985,9985,1620393199,C088,23.1,-68,"[[11.8, 13.0, 12.1, 11.7, 12.8, 13.5, 12.6, 12...",False,
6285,6285,1620392854,C088,21.700001,-58,"[[10.6, 12.2, 13.3, 13.0, 12.3, 12.9, 13.1, 12...",False,
7261,7261,1620392945,C088,22.1,-58,"[[11.3, 13.5, 12.8, 11.6, 11.9, 12.2, 13.5, 13...",False,
3932,3932,1620393119,3078,22.799999,-76,"[[10.5, 10.6, 10.6, 10.5, 11.2, 11.8, 11.5, 11...",False,
9673,9673,1620393170,C088,23.0,-58,"[[10.5, 11.4, 12.0, 12.5, 11.5, 11.8, 12.4, 12...",False,
1312,1312,1620392863,3078,21.700001,-82,"[[9.1, 11.1, 10.9, 10.6, 12.0, 11.3, 11.4, 11....",False,
1942,1942,1620392926,3078,22.0,-78,"[[11.7, 11.2, 11.3, 10.8, 10.4, 11.4, 11.2, 11...",False,
841,841,1620392815,3078,21.5,-62,"[[11.6, 12.1, 11.3, 10.9, 11.7, 12.1, 11.6, 11...",False,


In [13]:
# write to CSV
annotations.to_json(user+"_"+str(int(time.time()))+"_labels.json")