In [2]:
import sys
import os
import json
from enum import IntEnum, Enum
import numpy as np
import matplotlib.mlab as mlab
import statistics as stat
import scipy.signal
import scipy
from typing import Optional
import pandas as pd
from tqdm import tqdm


In [3]:
from gua_enums import SensorEnum,SamplingRateEnum,InterpolModesEnum

In [13]:
class GestureDataset():
    """
    Class for handling the gesture dataset
    """

    class OneSensorMeasurement:
        def __init__(self, path: str):
            with open(path) as in_file:
                #print(path)
                fname = path.split(os.sep)[-1]
                self.uid = fname.split("-")[0].split("_")[0]
                self.nid:int = (int)(fname.split("-")[0].split("_")[1])
                try:
                    filedata:list = json.load(in_file)
                    self.sensorType = SensorEnum(filedata[0])
                    self.oldName = filedata[1]
                    self.stamp = filedata[1].split('_')[2]
                    self.toFlag = fname.__contains__("timeOut") or fname.__contains__("timeout")
                    self.selfFlag = fname.__contains__("self")
                    if(filedata.__len__() > 3):
                        self.samplingRate:SamplingRateEnum =  SamplingRateEnum.get_enum_from_string(filedata[2])
                    else:
                        self.samplingRate = SamplingRateEnum.FASTEST
                    array_of_dicts = filedata[filedata.__len__() - 1]
                    if(array_of_dicts.__len__()!=0):
                        self.timestamps=np.zeros((array_of_dicts.__len__()),dtype=np.longlong)
                        self.sensorData =  np.zeros((len(array_of_dicts), len(array_of_dicts[0]['values'])))
                        #t0 = array_of_dicts[0]['timestamp']
                        for index, d in enumerate(array_of_dicts):
                            self.timestamps[index]=d['timestamp']
                            self.sensorData[index]=d['values']
                        
                        #linear interpolation
                        self.lin_timestamps = np.linspace(self.timestamps[0], self.timestamps[-1], len(self.timestamps))
                        self.lin_sensorData = np.zeros((len(array_of_dicts), len(array_of_dicts[0]['values'])))
                        for i in range(self.sensorData.shape[1]):
                            self.lin_sensorData[:, i] = np.interp(self.lin_timestamps, self.timestamps, self.sensorData[:, i])
                    else:
                        self.sensorData = {}
                except:
                    self.sensorType=SensorEnum.from_short_string(SensorEnum,short_string=(fname.split('.')[-2])[-3:])
                    self.oldName = None
                    self.toFlag = fname.__contains__("timedOut")
                    self.selfFlag = fname.__contains__("self")
                    self.samplingRate = None
                    self.timestamps=np.empty(())
                    self.sensorData = np.empty(())
                    self.lin_timestamps=np.empty(())
                    self.lin_sensorData = np.empty(())
                
                




        def __flat__(self,filterTO=False):
            if self.timestamps.size ==0:
                return None
            if (not filterTO):
                    return [self.uid,self.nid,self.sensorType,self.samplingRate,self.toFlag,self.selfFlag,self.oldName,self.timestamps,self.sensorData,self.lin_timestamps,self.lin_sensorData]
            elif not self.toFlag:
                    return [self.uid,self.nid,self.sensorType,self.samplingRate,self.toFlag,self.selfFlag,self.oldName,self.timestamps,self.sensorData,self.lin_timestamps,self.lin_sensorData]
    def load(self, path: str, filterTo = False) -> None:
        
        self.folderprefix = path
        self.ids = os.listdir(self.folderprefix)
        self.ids = [d for d in self.ids if os.path.isdir(os.path.join(self.folderprefix, d))]
        mlist = []
        for uid in self.ids:
            filePath = os.path.join(self.folderprefix, uid)
            #filePath is the uid folders
            for filename in tqdm(os.listdir(filePath)):
                if os.path.isfile(os.path.join(filePath, filename)):
                    #print(os.path.join(filePath, filename))
                    tmp = self.OneSensorMeasurement(os.path.join(filePath, filename)).__flat__(filterTo)
                    if tmp != None:
                        mlist.append(tmp)
        self.AllMeasurements = pd.DataFrame(mlist,columns=['uid','mid','stype','samplerate','timeoutFlag','selftriggerFlag','oldname','timestamp','sensordata','lin_timestamp','lin_sensordata'])
        
        



['session1', 'session2']

In [None]:
from datetime import datetime

# List of paths
paths = ["./files/session1/", "./files/session2/", "./files/diy_50/"]
path_ends = [q.split('/')[-2] for q in paths]

current_time = datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
pickle_name = '_'.join(path_ends) + '_' + current_time + '.pkl'
#pickle_name = 'merged_df.pkl'

# Initialize an empty dataframe
merged_df = pd.DataFrame()

# Load each dataset and merge
for path in paths:
    dataset = GestureDataset()
    dataset.load(path, True)
    df = dataset.AllMeasurements.copy()
    df['counter']=df.groupby(['uid','selftriggerFlag','mid','stype']).cumcount()
    df.set_index(keys=['uid','selftriggerFlag','mid','stype','counter'], inplace=True, drop=False)
    df.sort_index(inplace=True)
    if merged_df.empty:
        merged_df = df.copy()
    else:
        merged_df = pd.concat([merged_df, df.copy()], ignore_index=True)

# Reset the index if needed
# merged_df.reset_index(drop=True, inplace=True)

print(merged_df['uid'].unique())
df.to_pickle(pickle_name)

100%|██████████| 86/86 [00:00<00:00, 625.23it/s]
100%|██████████| 70/70 [00:00<00:00, 1160.18it/s]
100%|██████████| 112/112 [00:00<00:00, 655.48it/s]
100%|██████████| 63/63 [00:00<00:00, 812.46it/s]
100%|██████████| 112/112 [00:00<00:00, 673.65it/s]
100%|██████████| 77/77 [00:00<00:00, 961.76it/s]
100%|██████████| 64/64 [00:00<00:00, 874.97it/s]
100%|██████████| 232/232 [00:00<00:00, 1207.76it/s]
100%|██████████| 232/232 [00:00<00:00, 579.20it/s]
100%|██████████| 127/127 [00:00<00:00, 1061.07it/s]
100%|██████████| 223/223 [00:00<00:00, 926.94it/s]
100%|██████████| 224/224 [00:00<00:00, 1162.77it/s]
100%|██████████| 122/122 [00:00<00:00, 1217.93it/s]
100%|██████████| 416/416 [00:00<00:00, 661.14it/s]
100%|██████████| 416/416 [00:00<00:00, 531.23it/s]
100%|██████████| 424/424 [00:00<00:00, 431.51it/s]


In [23]:
print(merged_df['uid'].unique())


['90001' '90002' '90004' '90005' '90006' '90010' '90003' '90007' '0112'
 '0113' '0114']


In [None]:
mydataset = GestureDataset()
mydataset.load("./files/session2/", True)

100%|██████████| 232/232 [00:01<00:00, 231.31it/s]
100%|██████████| 232/232 [00:00<00:00, 415.72it/s]
100%|██████████| 224/224 [00:00<00:00, 401.85it/s]
100%|██████████| 127/127 [00:00<00:00, 444.84it/s]
100%|██████████| 64/64 [00:00<00:00, 307.73it/s]
100%|██████████| 122/122 [00:00<00:00, 404.36it/s]
100%|██████████| 223/223 [00:00<00:00, 280.91it/s]


In [None]:
df = mydataset.AllMeasurements.copy()
df['counter']=df.groupby(['uid','selftriggerFlag','mid','stype']).cumcount()
df.set_index(keys=['uid','selftriggerFlag','mid','stype','counter'], inplace=True, drop=False)
df.sort_index(inplace=True)

In [9]:
df['uid'].unique()

array(['90001', '90002', '90003', '90004', '90005', '90006', '90007'],
      dtype=object)

In [10]:
df.to_pickle('sess2_base.pkl')

Eddig.

In [93]:
df.set_index(keys=['uid','mid','counter','stype'], inplace=True, drop=True)
df.sort_index(inplace=True)

In [3]:
df.sort_index(inplace=True)

NameError: name 'df' is not defined

In [2]:
class GestureDataset():
    """
    Class for handling the gesture dataset
    """

    class OneSensorMeasurement:
        def __init__(self, path: str):
            with open(path) as in_file:
                #print(path)
                fname = path.split(os.sep)[-1]
                self.uid = fname.split("-")[0].split("_")[0]
                self.nid:int = (int)(fname.split("-")[0].split("_")[1])
                try:
                    filedata:list = json.load(in_file)
                    self.sensorType = SensorEnum(filedata[0])
                    self.oldName = filedata[1]
                    self.stamp = filedata[1].split('_')[2]
                    self.toFlag = fname.__contains__("timedOut")
                    self.selfFlag = fname.__contains__("self")
                    if(filedata.__len__() > 3):
                        self.samplingRate:SamplingRateEnum =  SamplingRateEnum.get_enum_from_string(filedata[2])
                    else:
                        self.samplingRate = SamplingRateEnum.FASTEST
                    array_of_dicts = filedata[filedata.__len__() - 1]
                    if(array_of_dicts.__len__()!=0):
                        self.timestamps=[]
                        self.sensorData = []
                        for d in array_of_dicts:
                            self.timestamps.append(d['timestamp'])
                            self.sensorData.append(d['values'])
                    else:
                        self.sensorData = {}
                except:
                    self.sensorType=SensorEnum.from_short_string(SensorEnum,short_string=(fname.split('.')[-2])[-3:])
                    self.oldName = None
                    self.toFlag = fname.__contains__("timedOut")
                    self.selfFlag = fname.__contains__("self")
                    self.samplingRate = None
                    self.timestamps=[]
                    self.sensorData = []
                




        def __flat__(self):
            return [self.uid,self.nid,self.sensorType,self.samplingRate,self.toFlag,self.selfFlag,self.oldName,self.timestamps,self.sensorData]
        
    def load(self, path: str) -> None:
        
        self.folderprefix = path
        self.ids = os.listdir(self.folderprefix)
        mlist = []
        for uid in self.ids:
            filePath = os.path.join(self.folderprefix, uid)
            #filePath is the uid folders
            for filename in os.listdir(filePath):
                if os.path.isfile(os.path.join(filePath, filename)):
                    mlist.append(self.OneSensorMeasurement(os.path.join(filePath, filename)).__flat__())
        self.AllMeasurements = pd.DataFrame(mlist,columns=['uid','mid','stype','samplerate','timeoutFlag','selftriggerFlag','oldname','timestamp','sensordata'])
        
        

   

In [None]:
"""
class GestureDataset():

    class OneSensorMeasurement:
        def __init__(self, path: str):
            with open(path) as in_file:
                try:
                    filedata:list = json.load(in_file)
                    fname = path.split(os.sep)[-1]
                    self.uid = fname.split("-")[0].split("_")[0]
                    self.nid:int = (int)(fname.split("-")[0].split("_")[1])
                    self.sensorType = SensorEnum(filedata[0])
                    self.oldName = filedata[1]
                    self.stamp = filedata[1].split('_')[2]
                    self.toFlag = fname.__contains__("timedOut")
                    self.selfFlag = fname.__contains__("self")
                    if(filedata.__len__() > 3):
                        self.samplingRate:SamplingRateEnum =  SamplingRateEnum.get_enum_from_string(filedata[2])
                    else:
                        self.samplingRate = SamplingRateEnum.FASTEST
                    array_of_dicts = filedata[filedata.__len__() - 1]
                    if(array_of_dicts.__len__()!=0):
                        self.sensorDataDict = {}
                        for d in array_of_dicts:
                            self.sensorDataDict[d['timestamp']]=d['values']
                    else:
                        self.sensorDataDict = {}
                except:
                    fname = path.split(os.sep)[-1].split('.')[0]
                    self.uid = fname.split("-")[0].split("_")[0]
                    self.nid:int = (int)(fname.split("-")[0].split("_")[1])
                    self.sensorType=SensorEnum.from_short_string(SensorEnum,short_string=(fname.split('_')[-1])[-3:])
                    self.oldName = None
                    self.stamp = re.split(r'([a-zA-Z]+)',fname.split("_")[2])[0]
                    self.toFlag = fname.__contains__("timedOut")
                    self.selfFlag = fname.__contains__("self")
                    self.samplingRate = None
                    self.sensorDataDict = None


        def __flat__(self):
            return [self.uid,self.nid,self.sensorType,self.toFlag,self.selfFlag,self.stamp,self.oldName,self.sensorDataDict]
        
    def load(self, path: str) -> None:
        
        self.folderprefix = path
        self.ids = os.listdir(self.folderprefix)
        mlist = []
        for uid in self.ids:
            filePath = os.path.join(self.folderprefix, uid)
            #filePath is the uid folders
            for filename in os.listdir(filePath):
                if os.path.isfile(os.path.join(filePath, filename)):
                    mlist.append(self.OneSensorMeasurement(os.path.join(filePath, filename)).__flat__())
        self.AllMeasurements = pd.DataFrame(mlist,columns=['uid','mid','stype','timeoutFlag','selftriggerFlag','timestamp','fname','mdata'])
        
        

   
"""