# Sensor Based Activity Recoginition 
Challenge: cdl1 - Sensor based Activity Recognition  
Team: Lea Bütler, Manjavy Kirupa, Etienne Roulet, Si Ben Tran  

Aufgabe: Visualisierungen der gemessenen Sensordaten

Hier in diesem Notebook visualisieren wir alle gemessenen Sensordaten und versuchen so zu identifizieren, welche aufgenommenen Sensordaten von relevanz sein können bzw. welche widerum nicht. 

In [22]:
# Laden von Libraries
%pip install -r requirements.txt
from data_ingestion.pipeline_manager import PipelineRunner as PipelineRunner 
from data_ingestion import utilities as util
import tensorflow as tf
import pathlib
import matplotlib.pyplot as plt
import seaborn as snsb
import polars as pl
import pandas as pd

Note: you may need to restart the kernel to use updated packages.


## Import von Daten

- Einlesen der Daten mittels Pipeline oder
- Einlesen der gefilterten Daten von 'all-sensormeasurement-plots'

In [70]:
# Daten einlesen
data = pd.read_csv('Alle_Messungen.csv', sep=',')
data

Unnamed: 0.1,Unnamed: 0,time,Accelerometer_x,Accelerometer_y,Accelerometer_z,Gyroscope_x,Gyroscope_y,Gyroscope_z,Magnetometer_x,Magnetometer_y,Magnetometer_z,Orientation_qx,Orientation_qy,Orientation_qz,id,user,class,id_combined
0,0,2023-02-27 15:02:04.548,0.000000,-3.552000,-9.140000,,,,,,,0.161765,-0.088290,-0.470671,01_SamsungA22-2023-02-27_15-02-03,Ben_Tran,Laufen,01_SamsungA22-2023-02-27_15-02-03Ben_TranLaufen
1,1,2023-02-27 15:02:17.156,0.000000,9.722000,1.278000,,,,,,,-0.217750,-0.719579,0.631111,01_SamsungA22-2023-02-27_15-02-03,Ben_Tran,Laufen,01_SamsungA22-2023-02-27_15-02-03Ben_TranLaufen
2,2,2023-02-27 15:02:31.574,-0.728000,9.779000,0.000000,,,,,,,-0.420650,0.568356,-0.598058,01_SamsungA22-2023-02-27_15-02-03,Ben_Tran,Laufen,01_SamsungA22-2023-02-27_15-02-03Ben_TranLaufen
3,3,2023-02-27 15:02:16.685,,,,,,,0.000000,43.500000,-13.143750,,,,01_SamsungA22-2023-02-27_15-02-03,Ben_Tran,Laufen,01_SamsungA22-2023-02-27_15-02-03Ben_TranLaufen
4,4,2023-02-27 15:02:32.295,,,,,,,-13.012501,0.000000,20.587502,,,,01_SamsungA22-2023-02-27_15-02-03,Ben_Tran,Laufen,01_SamsungA22-2023-02-27_15-02-03Ben_TranLaufen
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2539528,2539528,2023-03-11 13:12:01.906,-0.165031,-0.047750,-0.002080,0.036409,0.031078,0.033035,-31.916840,-10.083267,-49.236946,0.120060,0.065630,0.988451,9_iphone13mini-2023-03-11_13-09-56,Weiping_Zhang,Velofahren,9_iphone13mini-2023-03-11_13-09-56Weiping_Zhan...
2539529,2539529,2023-03-11 13:12:01.916,-0.198710,-0.076285,0.047025,0.023099,0.046894,0.033637,-31.769516,-10.042038,-49.300545,0.119959,0.065806,0.988438,9_iphone13mini-2023-03-11_13-09-56,Weiping_Zhang,Velofahren,9_iphone13mini-2023-03-11_13-09-56Weiping_Zhan...
2539530,2539530,2023-03-11 13:12:01.926,-0.207555,-0.116692,0.068004,0.009885,0.053780,0.034982,-31.764618,-10.130127,-49.406891,0.119799,0.065911,0.988438,9_iphone13mini-2023-03-11_13-09-56,Weiping_Zhang,Velofahren,9_iphone13mini-2023-03-11_13-09-56Weiping_Zhan...
2539531,2539531,2023-03-11 13:12:01.936,-0.203920,-0.147709,0.035921,0.009365,0.045132,0.033097,-31.766403,-10.006668,-49.255692,0.119646,0.065983,0.988439,9_iphone13mini-2023-03-11_13-09-56,Weiping_Zhang,Velofahren,9_iphone13mini-2023-03-11_13-09-56Weiping_Zhan...


# NA-Werte behandeln

In [71]:
# Count of NA values per column
data.isna().sum()

Unnamed: 0             0
time                   0
Accelerometer_x    24219
Accelerometer_y    24219
Accelerometer_z    24219
Gyroscope_x         3420
Gyroscope_y         3420
Gyroscope_z         3420
Magnetometer_x     31374
Magnetometer_y     31374
Magnetometer_z     31374
Orientation_qx     31206
Orientation_qy     31206
Orientation_qz     31206
id                     0
user                   0
class                  0
id_combined            0
dtype: int64

In [72]:
# length of the dataframe
print('data_trimmed: ', len(data))

# na values in the dataframe
data.isna().sum()

# show NA values per user
data[data.isna().any(axis=1)].value_counts("user")

data_trimmed:  2539533


user
Ben_Tran        20325
Lea_Buetler     17317
Tobias_Buess     1098
dtype: int64

In [83]:
# group by user and show all id_combied values
gabo = list(data.groupby("user")["id_combined"].first())[2]
gabo
ben = list(data.groupby("user")["id_combined"].first())[0]
ben
etienne = list(data.groupby("user")["id_combined"].first())[1]
etienne

'01_iPhone13pro-2023-03-21_16-55-47Etienne_RouletLaufen'

In [82]:
# show data with id_combined == gabo
display(data[data["id_combined"] == gabo])

# show data with id_combined == ben
display(data[data["id_combined"] == ben])

# show data with id_combined == etienne
display(data[data["id_combined"] == etienne])

Unnamed: 0.1,Unnamed: 0,time,Accelerometer_x,Accelerometer_y,Accelerometer_z,Gyroscope_x,Gyroscope_y,Gyroscope_z,Magnetometer_x,Magnetometer_y,Magnetometer_z,Orientation_qx,Orientation_qy,Orientation_qz,id,user,class,id_combined
53408,53408,2023-02-23 17:00:29.394,-0.144757,0.036711,0.183466,-0.018995,0.076601,-0.044259,24.735619,-7.820059,-42.659683,-0.209151,0.010802,0.246126,01_iPhone13ProMax-2023-02-23_17-00-29,Gabriel_Torres,Laufen,01_iPhone13ProMax-2023-02-23_17-00-29Gabriel_T...
53409,53409,2023-02-23 17:00:29.404,-0.112878,0.006307,0.154997,0.006513,0.055522,-0.047334,24.719410,-7.804165,-42.499985,-0.209286,0.010388,0.246277,01_iPhone13ProMax-2023-02-23_17-00-29,Gabriel_Torres,Laufen,01_iPhone13ProMax-2023-02-23_17-00-29Gabriel_T...
53410,53410,2023-02-23 17:00:29.414,-0.066828,0.008270,0.070211,0.021283,0.043709,-0.041569,24.669020,-7.865181,-42.141647,-0.209457,0.010104,0.246439,01_iPhone13ProMax-2023-02-23_17-00-29,Gabriel_Torres,Laufen,01_iPhone13ProMax-2023-02-23_17-00-29Gabriel_T...
53411,53411,2023-02-23 17:00:29.424,-0.005038,-0.032582,0.041082,0.018581,0.035648,-0.034993,24.658150,-7.719778,-41.732162,-0.209622,0.009958,0.246566,01_iPhone13ProMax-2023-02-23_17-00-29,Gabriel_Torres,Laufen,01_iPhone13ProMax-2023-02-23_17-00-29Gabriel_T...
53412,53412,2023-02-23 17:00:29.434,0.061892,-0.052410,0.031340,0.001435,0.032175,-0.032162,24.937943,-8.028658,-42.288513,-0.209718,0.009887,0.246669,01_iPhone13ProMax-2023-02-23_17-00-29,Gabriel_Torres,Laufen,01_iPhone13ProMax-2023-02-23_17-00-29Gabriel_T...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
108752,108752,2023-02-23 17:09:39.863,0.093452,0.047862,0.239359,0.517598,-0.420894,-0.425964,20.922943,-37.190308,-24.816460,-0.123001,-0.100178,-0.517383,01_iPhone13ProMax-2023-02-23_17-00-29,Gabriel_Torres,Laufen,01_iPhone13ProMax-2023-02-23_17-00-29Gabriel_T...
108753,108753,2023-02-23 17:09:39.873,0.019223,0.807030,0.087325,0.403230,-0.342363,-0.373326,20.994629,-37.273422,-24.688194,-0.125754,-0.099997,-0.515230,01_iPhone13ProMax-2023-02-23_17-00-29,Gabriel_Torres,Laufen,01_iPhone13ProMax-2023-02-23_17-00-29Gabriel_T...
108754,108754,2023-02-23 17:09:39.883,-0.035634,1.525645,-0.065677,0.247031,-0.160364,-0.347026,21.123034,-37.353165,-24.713539,-0.127584,-0.099987,-0.513419,01_iPhone13ProMax-2023-02-23_17-00-29,Gabriel_Torres,Laufen,01_iPhone13ProMax-2023-02-23_17-00-29Gabriel_T...
108755,108755,2023-02-23 17:09:39.893,-0.207727,1.238632,-0.259368,0.135277,-0.031176,-0.371296,20.894768,-37.419495,-24.734642,-0.128432,-0.100327,-0.511777,01_iPhone13ProMax-2023-02-23_17-00-29,Gabriel_Torres,Laufen,01_iPhone13ProMax-2023-02-23_17-00-29Gabriel_T...


Unnamed: 0.1,Unnamed: 0,time,Accelerometer_x,Accelerometer_y,Accelerometer_z,Gyroscope_x,Gyroscope_y,Gyroscope_z,Magnetometer_x,Magnetometer_y,Magnetometer_z,Orientation_qx,Orientation_qy,Orientation_qz,id,user,class,id_combined
0,0,2023-02-27 15:02:04.548,0.0,-3.552,-9.14,,,,,,,0.161765,-0.08829,-0.470671,01_SamsungA22-2023-02-27_15-02-03,Ben_Tran,Laufen,01_SamsungA22-2023-02-27_15-02-03Ben_TranLaufen
1,1,2023-02-27 15:02:17.156,0.0,9.722,1.278,,,,,,,-0.21775,-0.719579,0.631111,01_SamsungA22-2023-02-27_15-02-03,Ben_Tran,Laufen,01_SamsungA22-2023-02-27_15-02-03Ben_TranLaufen
2,2,2023-02-27 15:02:31.574,-0.728,9.779,0.0,,,,,,,-0.42065,0.568356,-0.598058,01_SamsungA22-2023-02-27_15-02-03,Ben_Tran,Laufen,01_SamsungA22-2023-02-27_15-02-03Ben_TranLaufen
3,3,2023-02-27 15:02:16.685,,,,,,,0.0,43.5,-13.14375,,,,01_SamsungA22-2023-02-27_15-02-03,Ben_Tran,Laufen,01_SamsungA22-2023-02-27_15-02-03Ben_TranLaufen
4,4,2023-02-27 15:02:32.295,,,,,,,-13.012501,0.0,20.587502,,,,01_SamsungA22-2023-02-27_15-02-03,Ben_Tran,Laufen,01_SamsungA22-2023-02-27_15-02-03Ben_TranLaufen
5,5,2023-02-27 15:02:33.285,,,,,,,-22.612501,0.0,20.268751,,,,01_SamsungA22-2023-02-27_15-02-03,Ben_Tran,Laufen,01_SamsungA22-2023-02-27_15-02-03Ben_TranLaufen
6,6,2023-02-27 15:02:35.195,,,,,,,-12.1125,0.0,20.75625,,,,01_SamsungA22-2023-02-27_15-02-03,Ben_Tran,Laufen,01_SamsungA22-2023-02-27_15-02-03Ben_TranLaufen
7,7,2023-02-27 15:02:45.315,,,,,,,-21.243752,0.0,16.9125,,,,01_SamsungA22-2023-02-27_15-02-03,Ben_Tran,Laufen,01_SamsungA22-2023-02-27_15-02-03Ben_TranLaufen
8,8,2023-02-27 15:02:48.985,,,,,,,-41.100002,0.0,-3.24375,,,,01_SamsungA22-2023-02-27_15-02-03,Ben_Tran,Laufen,01_SamsungA22-2023-02-27_15-02-03Ben_TranLaufen
9,9,2023-02-27 15:02:49.085,,,,,,,-40.612503,0.0,-10.6125,,,,01_SamsungA22-2023-02-27_15-02-03,Ben_Tran,Laufen,01_SamsungA22-2023-02-27_15-02-03Ben_TranLaufen


In [65]:
# show 5 sample rows with NA values
data_na = data[data.isna().any(axis=1)]

# group_by id_combined and select first col
na_file_list = list(data_na.groupby('id_combined').first().reset_index().id_combined)
display(na_file_list)

['01-SamsungA22-2023-02-27_14-54-28Ben_TranTreppenlaufen',
 '01_GalaxyS21Tobias_BuessLaufen',
 '01_GalaxyS21Tobias_BuessRennen',
 '01_GalaxyS21Tobias_BuessSitzen',
 '01_GalaxyS21Tobias_BuessTreppenlaufen',
 '01_GalaxyS21Tobias_BuessVelofahren',
 '01_Huawei_Fahrrad-2023-02-27_19-23-04Lea_BuetlerVelofahren',
 '01_Huawei_Laufen-2023-03-03_19-15-46Lea_BuetlerLaufen',
 '01_Huawei_Rennen-2023-03-15_17-22-07Lea_BuetlerRennen',
 '01_Huawei_Sitzen-2023-02-23_19-10-32Lea_BuetlerSitzen',
 '01_Huawei_Stehen-2023-03-01_14-27-19Lea_BuetlerStehen',
 '01_Huawei_Treppe-2023-03-01_14-59-00Lea_BuetlerTreppenlaufen',
 '01_SamsungA22-2023-02-27_15-02-03Ben_TranLaufen',
 '01_SamsungA22-2023-02-27_15-37-24Ben_TranVelofahren',
 '01_SamsungA22-2023-02-27_17-48-41Ben_TranSitzen',
 '01_SamsungA22-2023-02-28_07-17-05Ben_TranStehen',
 '01_SamsungA22-2023-03-01_17-06-47Ben_TranRennen',
 '02-SamsungA22-2023-02-27_14-57-04Ben_TranTreppenlaufen',
 '02-SamsungA22-2023-02-28_07-22-08Ben_TranStehen',
 '02_GalaxyS21Tobias

erklärung wieso NA = 0:
Bei einigen

In [68]:
# replace Na Values in Data with 0
data = data.fillna(0)

# Daten trimmen

In [69]:
# function to trim all files in all_ids
def trim_all_files(df, measurement_ids, cut_size):
    new_df = pd.DataFrame()
    for i in measurement_ids:
        file_i = df[df['id_combined'] == i].copy()
        file_i['time'] = pd.to_datetime(file_i['time'])
        time_min = file_i['time'].min()
        time_max = file_i['time'].max()
        time_min_trimmed = time_min + pd.Timedelta(seconds=cut_size)
        time_max_trimmed = time_max - pd.Timedelta(seconds=cut_size)
        file_i_trimmed = file_i[(file_i['time'] > time_min_trimmed) & (file_i['time'] < time_max_trimmed)]
        # add file to new df
        new_df = pd.concat([new_df, file_i_trimmed], axis=0)
    return new_df

# list of all unique values in id_combined
all_ids = data['id_combined'].unique()

# trim all files
data_trimmed = trim_all_files(data, all_ids, 5)
data_trimmed.head()

Unnamed: 0.1,Unnamed: 0,time,Accelerometer_x,Accelerometer_y,Accelerometer_z,Gyroscope_x,Gyroscope_y,Gyroscope_z,Magnetometer_x,Magnetometer_y,Magnetometer_z,Orientation_qx,Orientation_qy,Orientation_qz,id,user,class,id_combined
1,1,2023-02-27 15:02:17.156,0.0,9.722,1.278,0.0,0.0,0.0,0.0,0.0,0.0,-0.21775,-0.719579,0.631111,01_SamsungA22-2023-02-27_15-02-03,Ben_Tran,Laufen,01_SamsungA22-2023-02-27_15-02-03Ben_TranLaufen
2,2,2023-02-27 15:02:31.574,-0.728,9.779,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.42065,0.568356,-0.598058,01_SamsungA22-2023-02-27_15-02-03,Ben_Tran,Laufen,01_SamsungA22-2023-02-27_15-02-03Ben_TranLaufen
3,3,2023-02-27 15:02:16.685,0.0,0.0,0.0,0.0,0.0,0.0,0.0,43.5,-13.14375,0.0,0.0,0.0,01_SamsungA22-2023-02-27_15-02-03,Ben_Tran,Laufen,01_SamsungA22-2023-02-27_15-02-03Ben_TranLaufen
4,4,2023-02-27 15:02:32.295,0.0,0.0,0.0,0.0,0.0,0.0,-13.012501,0.0,20.587502,0.0,0.0,0.0,01_SamsungA22-2023-02-27_15-02-03,Ben_Tran,Laufen,01_SamsungA22-2023-02-27_15-02-03Ben_TranLaufen
5,5,2023-02-27 15:02:33.285,0.0,0.0,0.0,0.0,0.0,0.0,-22.612501,0.0,20.268751,0.0,0.0,0.0,01_SamsungA22-2023-02-27_15-02-03,Ben_Tran,Laufen,01_SamsungA22-2023-02-27_15-02-03Ben_TranLaufen


In [18]:
# export to csv
data_trimmed.to_csv('Alle_Messungen_trimmed.csv')

In [85]:
# find file with timestamp Monday, 27. February 2023 14:54:28.768
data_trimmed[data_trimmed['time'] == '2023-02-27 14:54:28.768']

Unnamed: 0.1,Unnamed: 0,time,Accelerometer_x,Accelerometer_y,Accelerometer_z,Gyroscope_x,Gyroscope_y,Gyroscope_z,Magnetometer_x,Magnetometer_y,Magnetometer_z,Orientation_qx,Orientation_qy,Orientation_qz,id,user,class,id_combined
