In [16]:
import pandas as pd
import numpy as np
import glob
import matplotlib.pyplot as plt
from tqdm import tqdm
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchmetrics
from scipy import signal

In [17]:
data_folder = "../../data/william/"

In [18]:
rng = np.random.default_rng(seed=123)

In [19]:
def preprocessing(data):

    # Remove First Subcarrier
    data = data.iloc[1:]
    
    # Flip sencond half of the subcarriers
    datalow = data.iloc[0:27]
    datahigh = data.iloc[27:54] *-1
    data = pd.concat([datalow, datahigh])


    # Take the average of all the subcarriers
    data = data.mean(axis=0)
    data = data.to_frame() 
    data = data.T 


    # Take the average of value all 2000 rows then subtract the average from each column to scale the variations from zero
    AVG = data.mean(axis=1)
    AVG = AVG.to_frame() 
    AVG = AVG.T
    AVG = AVG.iloc[0][0]
    data = data - AVG

    # Multiple the data to make it look better on a graph
    data = data* 10000

    # Apply median filter to reduce occational spikes in the data
    columns = len(data.columns)
    data = pd.DataFrame.to_numpy(data)
    data = np.reshape(data,columns)
    data = signal.medfilt(data, kernel_size=3)
    data = pd.DataFrame(data).T
    
    return data

In [20]:
StandingData = glob.glob(f"{data_folder}/Standing//*.csv")
SittingData = glob.glob(f"{data_folder}/Sitting//*.csv")

print("StandingData n_files", len(StandingData))
print("SittingData n_files", len(SittingData))

li = []

for files in StandingData:
    data = pd.read_csv(files, index_col=False, header=None)
    data = preprocessing(data)
    label = 'standing'
    data.insert(0, 'label', label)
    li.append(data)

for files in SittingData:
    data = pd.read_csv(files, index_col=False, header=None)
    preprocessing(data)
    label = 'sitting'
    data.insert(0, 'label', label)
    li.append(data)

rng.shuffle(li)
    
full = pd.concat(li, axis=0, ignore_index=True,sort=False)

StandingData n_files 178
SittingData n_files 172


In [21]:
full

Unnamed: 0,label,0,1,2,3,4,5,6,7,8,...,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999
0,standing,7.591890,7.606061,7.606061,7.583291,7.524393,7.573645,7.573645,7.581924,7.659597,...,-18.503573,-18.536409,-18.536409,-18.471819,-18.433457,-18.433457,-18.380144,-18.292997,-18.344547,-18.292997
1,standing,7.702555,7.735842,7.822729,7.822729,7.856182,7.752189,7.720504,7.720504,7.720504,...,-6.181843,-6.181843,-6.174147,-6.073277,-5.966628,-5.966628,-6.146685,-6.234708,-6.235848,-6.235848
2,sitting,0.012078,0.012669,0.012284,0.012150,0.012223,0.012318,0.012212,0.012217,0.012149,...,0.013404,0.013455,0.012818,0.013740,0.012953,0.013090,0.013345,0.013388,0.013119,0.013231
3,sitting,0.012646,0.012866,0.012754,0.012776,0.012669,0.012870,0.012797,0.012843,0.012804,...,0.013866,0.014092,0.013692,0.013918,0.013755,0.013800,0.013868,0.013811,0.013830,0.013684
4,sitting,0.013075,0.013512,0.013367,0.013415,0.013271,0.013346,0.013404,0.013346,0.013364,...,0.014323,0.014582,0.014399,0.014377,0.014251,0.014276,0.014385,0.014405,0.014334,0.014064
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9633,sitting,0.013357,0.013403,0.013540,0.013456,0.013639,0.013336,0.013756,0.013460,0.013954,...,0.011328,0.011408,0.011262,0.011466,0.011461,0.011660,0.011262,0.011286,0.011527,0.011714
9634,sitting,0.013161,0.013229,0.013166,0.012729,0.013185,0.013159,0.013193,0.012868,0.013381,...,0.010794,0.011072,0.010817,0.010999,0.010787,0.010852,0.010832,0.010975,0.010924,0.011170
9635,sitting,0.012931,0.013241,0.012639,0.012518,0.012807,0.012952,0.012685,0.012385,0.012644,...,0.010741,0.010491,0.010739,0.010647,0.010723,0.010776,0.010596,0.010578,0.010701,0.010571
9636,sitting,0.012002,0.012240,0.011962,0.012199,0.011952,0.012052,0.012052,0.012377,0.011633,...,0.009823,0.009810,0.009729,0.010107,0.009420,0.010108,0.009994,0.010150,0.010032,0.010083


In [22]:
idx = full.columns[1:] 
data_full = full[idx].to_numpy()
data_full -= data_full.mean()
data_full /= data_full.std()
full[idx] = data_full
full

Unnamed: 0,label,0,1,2,3,4,5,6,7,8,...,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999
0,standing,8.333775,8.349366,8.349366,8.324313,8.259507,8.313698,8.313698,8.322808,8.408273,...,-20.379246,-20.415375,-20.415375,-20.344307,-20.302096,-20.302096,-20.243436,-20.147547,-20.204269,-20.147547
1,standing,8.455540,8.492166,8.587768,8.587768,8.624577,8.510152,8.475289,8.475289,8.475289,...,-6.821561,-6.821561,-6.813092,-6.702105,-6.584758,-6.584758,-6.782875,-6.879728,-6.880982,-6.880982
2,sitting,-0.006346,-0.005696,-0.006120,-0.006267,-0.006186,-0.006082,-0.006199,-0.006193,-0.006268,...,-0.004887,-0.004831,-0.005532,-0.004517,-0.005383,-0.005232,-0.004952,-0.004905,-0.005201,-0.005077
3,sitting,-0.005721,-0.005479,-0.005603,-0.005579,-0.005696,-0.005474,-0.005555,-0.005505,-0.005548,...,-0.004378,-0.004130,-0.004570,-0.004321,-0.004501,-0.004451,-0.004377,-0.004439,-0.004418,-0.004579
4,sitting,-0.005249,-0.004768,-0.004928,-0.004875,-0.005033,-0.004951,-0.004887,-0.004950,-0.004931,...,-0.003875,-0.003591,-0.003792,-0.003816,-0.003955,-0.003927,-0.003807,-0.003786,-0.003864,-0.004161
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9633,sitting,-0.004938,-0.004888,-0.004737,-0.004830,-0.004628,-0.004962,-0.004500,-0.004825,-0.004282,...,-0.007171,-0.007083,-0.007244,-0.007019,-0.007025,-0.006806,-0.007244,-0.007218,-0.006952,-0.006746
9634,sitting,-0.005154,-0.005080,-0.005149,-0.005630,-0.005128,-0.005156,-0.005119,-0.005477,-0.004912,...,-0.007759,-0.007453,-0.007733,-0.007534,-0.007766,-0.007695,-0.007717,-0.007560,-0.007616,-0.007345
9635,sitting,-0.005408,-0.005066,-0.005729,-0.005861,-0.005544,-0.005384,-0.005678,-0.006009,-0.005724,...,-0.007817,-0.008092,-0.007819,-0.007921,-0.007837,-0.007779,-0.007976,-0.007997,-0.007861,-0.008004
9636,sitting,-0.006430,-0.006168,-0.006474,-0.006213,-0.006485,-0.006374,-0.006375,-0.006018,-0.006835,...,-0.008828,-0.008841,-0.008930,-0.008515,-0.009271,-0.008514,-0.008639,-0.008468,-0.008598,-0.008542


In [28]:
f_name = f'{data_folder}/preprocessed_data.csv'
full.to_csv(f_name, index=True, header=True)

In [29]:
data = pd.read_csv(f_name, index_col=0)
data

Unnamed: 0,label,0,1,2,3,4,5,6,7,8,...,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999
0,standing,8.333775,8.349366,8.349366,8.324313,8.259507,8.313698,8.313698,8.322808,8.408273,...,-20.379246,-20.415375,-20.415375,-20.344307,-20.302096,-20.302096,-20.243436,-20.147547,-20.204269,-20.147547
1,standing,8.455540,8.492166,8.587768,8.587768,8.624577,8.510152,8.475289,8.475289,8.475289,...,-6.821561,-6.821561,-6.813092,-6.702105,-6.584758,-6.584758,-6.782875,-6.879728,-6.880982,-6.880982
2,sitting,-0.006346,-0.005696,-0.006120,-0.006267,-0.006186,-0.006082,-0.006199,-0.006193,-0.006268,...,-0.004887,-0.004831,-0.005532,-0.004517,-0.005383,-0.005232,-0.004952,-0.004905,-0.005201,-0.005077
3,sitting,-0.005721,-0.005479,-0.005603,-0.005579,-0.005696,-0.005474,-0.005555,-0.005505,-0.005548,...,-0.004378,-0.004130,-0.004570,-0.004321,-0.004501,-0.004451,-0.004377,-0.004439,-0.004418,-0.004579
4,sitting,-0.005249,-0.004768,-0.004928,-0.004875,-0.005033,-0.004951,-0.004887,-0.004950,-0.004931,...,-0.003875,-0.003591,-0.003792,-0.003816,-0.003955,-0.003927,-0.003807,-0.003786,-0.003864,-0.004161
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9633,sitting,-0.004938,-0.004888,-0.004737,-0.004830,-0.004628,-0.004962,-0.004500,-0.004825,-0.004282,...,-0.007171,-0.007083,-0.007244,-0.007019,-0.007025,-0.006806,-0.007244,-0.007218,-0.006952,-0.006746
9634,sitting,-0.005154,-0.005080,-0.005149,-0.005630,-0.005128,-0.005156,-0.005119,-0.005477,-0.004912,...,-0.007759,-0.007453,-0.007733,-0.007534,-0.007766,-0.007695,-0.007717,-0.007560,-0.007616,-0.007345
9635,sitting,-0.005408,-0.005066,-0.005729,-0.005861,-0.005544,-0.005384,-0.005678,-0.006009,-0.005724,...,-0.007817,-0.008092,-0.007819,-0.007921,-0.007837,-0.007779,-0.007976,-0.007997,-0.007861,-0.008004
9636,sitting,-0.006430,-0.006168,-0.006474,-0.006213,-0.006485,-0.006374,-0.006375,-0.006018,-0.006835,...,-0.008828,-0.008841,-0.008930,-0.008515,-0.009271,-0.008514,-0.008639,-0.008468,-0.008598,-0.008542


In [30]:
actions = list(data.label.unique())
actions

['standing', 'sitting']

In [31]:
data.label = pd.Categorical(data.label)

In [33]:
data.label.cat.codes.to_numpy()

array([1, 1, 0, ..., 0, 0, 1], dtype=int8)

In [36]:
data.groupby("label").size()

label
sitting     9460
standing     178
dtype: int64