In [9]:
%pip install tqdm

Collecting tqdm
  Downloading tqdm-4.67.1-py3-none-any.whl.metadata (57 kB)
Downloading tqdm-4.67.1-py3-none-any.whl (78 kB)
Installing collected packages: tqdm
Successfully installed tqdm-4.67.1
Note: you may need to restart the kernel to use updated packages.


In [10]:
import sys
import os
import json
from enum import IntEnum, Enum
import numpy as np
import statistics as stat
import scipy.signal
import scipy
from typing import Optional
import pandas as pd
from tqdm import tqdm
        
import json

In [11]:
from gua_enums import SensorEnum,SamplingRateEnum,InterpolModesEnum

In [12]:
class GestureDataset():
    """
    Class for handling the gesture dataset
    """
    def __init__(self):
        self.AllMeasurements = pd.DataFrame()
        self.folderprefix = None
        self.ids = None
    class OneSensorMeasurement:
        def __init__(self, path: str):
            with open(path) as in_file:
                #print(path)
                fname = path.split(os.sep)[-1]
                self.uid = fname.split("-")[0].split("_")[0]
                self.nid:int = (int)(fname.split("-")[0].split("_")[1])
                try:
                    filedata:list = json.load(in_file)
                    self.sensorType = SensorEnum(filedata[0])
                    self.oldName = filedata[1]
                    self.stamp = filedata[1].split('_')[2]
                    self.toFlag = fname.__contains__("timeOut") or fname.__contains__("timeout")
                    self.selfFlag = fname.__contains__("self")
                    if(filedata.__len__() > 3):
                        self.samplingRate:SamplingRateEnum =  SamplingRateEnum.get_enum_from_string(filedata[2])
                    else:
                        self.samplingRate = SamplingRateEnum.FASTEST.name
                    array_of_dicts = filedata[filedata.__len__() - 1]
                    if(array_of_dicts.__len__()!=0):
                        self.timestamps=np.zeros((array_of_dicts.__len__()),dtype=np.longlong)
                        self.sensorData =  np.zeros((len(array_of_dicts), len(array_of_dicts[0]['values'])))
                        #t0 = array_of_dicts[0]['timestamp']
                        for index, d in enumerate(array_of_dicts):
                            self.timestamps[index]=d['timestamp']
                            self.sensorData[index]=d['values']
                    else:
                        self.sensorData = {}
                except:
                    self.sensorType=SensorEnum.from_short_string(SensorEnum,short_string=(fname.split('.')[-2])[-3:])
                    self.oldName = None
                    self.toFlag = fname.__contains__("timedOut")
                    self.selfFlag = fname.__contains__("self")
                    self.samplingRate = None
                    self.timestamps=np.empty(())
                    self.sensorData = np.empty(())

        def __flat__(self,filterTO=False):
            if self.timestamps.size ==0:
                return None
            if (not filterTO):
                    return [self.uid,self.nid,self.sensorType,self.samplingRate,self.toFlag,self.selfFlag,self.oldName,self.timestamps,self.sensorData]
            elif not self.toFlag:
                    return [self.uid,self.nid,self.sensorType,self.samplingRate,self.toFlag,self.selfFlag,self.oldName,self.timestamps,self.sensorData]
    
    def load(self, path: str, filterTo = False) -> None: 
        self.folderprefix = path
        self.ids = os.listdir(self.folderprefix)
        self.ids = [d for d in self.ids if os.path.isdir(os.path.join(self.folderprefix, d))]
        mlist = []
        for uid in self.ids:
            filePath = os.path.join(self.folderprefix, uid)
            #filePath is the uid folders
            for filename in tqdm(os.listdir(filePath)):
                if os.path.isfile(os.path.join(filePath, filename)):
                    #print(os.path.join(filePath, filename))
                    tmp = self.OneSensorMeasurement(os.path.join(filePath, filename)).__flat__(filterTo)
                    if tmp != None:
                        mlist.append(tmp)
        newMeasurements = pd.DataFrame(mlist,columns=['uid','mid','stype','samplerate','timeoutFlag','selftriggerFlag','oldname','timestamp','sensordata'])
        self.AllMeasurements = pd.concat([self.AllMeasurements,newMeasurements],ignore_index=True)



    def export(self, output_folder: str) -> pd.DataFrame:
        combined_measurements = self.AllMeasurements.groupby(['uid', 'mid']).agg({
            'stype': lambda x: list(x),
            'samplerate': lambda x: list(x),
            'timeoutFlag': lambda x: list(x),
            'selftriggerFlag': lambda x: list(x),
            'oldname': lambda x: list(x),
            'timestamp': lambda x: list(x),
            'sensordata': lambda x: list(x),
        }).reset_index()
        for _, row in combined_measurements.iterrows():
            different_sensor_measurements = []
            for stype, samplerate, timeoutFlag, selftriggerFlag, oldname, timestamp, sensordata in zip(
                    row['stype'], row['samplerate'], row['timeoutFlag'], row['selftriggerFlag'],
                    row['oldname'], row['timestamp'], row['sensordata']):
                sensor_events = []
                for ts, data in zip(timestamp, sensordata):
                    sensor_events.append({
                        "timestamp": int(ts),
                        "values": data.tolist()
                    })
                different_sensor_measurements.append({
                    "rateEnum": SamplingRateEnum(samplerate).name,
                    "sensorEnum": stype.name,
                    "sensorEvents": sensor_events
                })
            single_measurement = {
                "filename": row['oldname'][0],
                "uid": row['uid'],
                "mid": row['mid'],
                "selfTriggerFlag": selftriggerFlag,
                "timeoutFlag": timeoutFlag,
                "sensorMeasurements": different_sensor_measurements
            }
            newname = (row['oldname'][0].split('.')[0])[:-3] + '_all.json'
            if not os.path.exists(output_folder + row['uid']):
                os.makedirs(output_folder + row['uid'])
            with open(output_folder + row['uid'] + '/' +  newname, 'w') as fw:
                json.dump(single_measurement, fw, indent=4)
        



In [13]:
mydataset = GestureDataset()
mydataset.load("./files/session2/", True)
mydataset.load("./files/session1/", True)
mydataset.load("./files/diy_50/", True)

  0%|          | 0/64 [00:00<?, ?it/s]

100%|██████████| 64/64 [00:00<00:00, 203.93it/s]
100%|██████████| 232/232 [00:01<00:00, 222.27it/s]
100%|██████████| 232/232 [00:01<00:00, 199.74it/s]
100%|██████████| 127/127 [00:00<00:00, 190.20it/s]
100%|██████████| 223/223 [00:01<00:00, 212.36it/s]
100%|██████████| 224/224 [00:00<00:00, 243.21it/s]
100%|██████████| 122/122 [00:00<00:00, 205.63it/s]
100%|██████████| 86/86 [00:00<00:00, 184.76it/s]
100%|██████████| 70/70 [00:00<00:00, 173.14it/s]
100%|██████████| 112/112 [00:00<00:00, 155.33it/s]
100%|██████████| 63/63 [00:00<00:00, 174.23it/s]
100%|██████████| 112/112 [00:00<00:00, 172.71it/s]
100%|██████████| 77/77 [00:00<00:00, 199.96it/s]
100%|██████████| 416/416 [00:02<00:00, 171.12it/s]
100%|██████████| 416/416 [00:02<00:00, 167.07it/s]
100%|██████████| 424/424 [00:02<00:00, 159.13it/s]


In [15]:
mydataset.AllMeasurements['uid'].unique()


array(['90001', '90002', '90003', '90004', '90005', '90006', '90007',
       '90010', '0112', '0113', '0114'], dtype=object)

In [17]:
df = mydataset.AllMeasurements.copy()
df['counter']=df.groupby(['uid','selftriggerFlag','mid','stype']).cumcount()
df.set_index(keys=['uid','selftriggerFlag','mid','stype','counter'], inplace=True, drop=False)
df.sort_index(inplace=True)
df.to_pickle('combined_all.pkl')

In [17]:
combined = mydataset.export("./files/jsonout/combined/")

In [47]:
combined.iloc[8]

#for column in combined.columns:
#    print(column, ":  ",combined.iloc[1][column])

uid                                                            90002
mid                                                                2
stype              [SensorEnum.GRV, SensorEnum.PRX, SensorEnum.LG...
samplerate                                  [0, 0, 0, 0, 0, 0, 0, 0]
timeoutFlag        [False, False, False, False, False, False, Fal...
selftriggerFlag    [False, False, False, False, False, False, Fal...
oldname            [0_2_1683108727165, 0_2_1683108727165, 0_2_168...
timestamp          [[3837683382295, 3837687947969, 3837692502168,...
sensordata         [[[0.74060726, -0.32715246, 9.776532], [0.7744...
Name: 8, dtype: object