In [2]:
import pandas as pd
import numpy as np
import glob
import matplotlib.pyplot as plt
from tqdm import tqdm
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchmetrics
from scipy import signal

In [3]:
data_folder = "../../data/william/"

In [4]:
fig_folder = "../../fig/william/"

In [5]:
file_for_export = f'{data_folder}/preprocessed_data.csv'

In [6]:
rng = np.random.default_rng(seed=123)

In [7]:
def preprocessing(data):

    # Remove First Subcarrier
    data = data.iloc[1:]
    
    # Flip sencond half of the subcarriers
    datalow = data.iloc[0:27]
    datahigh = data.iloc[27:54] *-1
    data = pd.concat([datalow, datahigh])


    # Take the average of all the subcarriers
    data = data.mean(axis=0)
    data = data.to_frame() 
    data = data.T 


    # Take the average of value all 2000 rows then subtract the average from each column to scale the variations from zero
    AVG = data.mean(axis=1)
    AVG = AVG.to_frame() 
    AVG = AVG.T
    AVG = AVG.iloc[0][0]
    data = data - AVG

    # Multiple the data to make it look better on a graph
    data = data* 10000

    # Apply median filter to reduce occational spikes in the data
    columns = len(data.columns)
    data = pd.DataFrame.to_numpy(data)
    data = np.reshape(data,columns)
    data = signal.medfilt(data, kernel_size=3)
    data = pd.DataFrame(data).T
    
    return data

In [8]:
StandingData = glob.glob(f"{data_folder}/Standing//*.csv")
SittingData = glob.glob(f"{data_folder}/Sitting//*.csv")

print("StandingData n_files", len(StandingData))
print("SittingData n_files", len(SittingData))

StandingData n_files 178
SittingData n_files 172


In [9]:
li = []

for files in StandingData:
    data = pd.read_csv(files, index_col=False, header=None)
    data = preprocessing(data)
    li.append(data)
    
data = pd.concat(li, axis=0, ignore_index=True,sort=False)
label = 'standing'
data.insert(0, 'label', label)

data_stand = data

In [10]:
li = []

for files in SittingData:
    data = pd.read_csv(files, index_col=False, header=None)
    data = preprocessing(data)
    li.append(data)
    
data = pd.concat(li, axis=0, ignore_index=True,sort=False)
label = 'sitting'
data.insert(0, 'label', label)

data_sit = data

In [11]:
data = pd.concat([data_sit, data_stand], ignore_index=True, sort=False)
data

Unnamed: 0,label,0,1,2,3,4,5,6,7,8,...,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999
0,sitting,-2.765315,-2.773235,-2.792850,-2.850304,-2.792850,-2.755526,-2.755526,-2.828954,-2.887963,...,3.652913,3.693894,3.693894,3.586089,3.327449,3.586089,3.615411,3.615411,3.615411,3.604214
1,sitting,-6.481415,-6.481415,-6.481415,-6.354682,-6.372582,-6.372582,-6.372582,-6.246382,-6.209179,...,9.475628,9.633154,9.633154,9.686605,9.714546,9.714546,9.741522,9.879390,9.879390,9.438135
2,sitting,-0.500630,-0.500630,-0.207738,-0.207738,-0.291950,-0.357956,-0.357956,-0.251849,-0.251849,...,-2.995442,-3.048498,-3.048498,-2.906026,-2.853467,-2.853467,-3.089772,-3.089772,-2.908626,-2.593068
3,sitting,-8.938822,-9.029880,-9.029880,-9.029880,-9.041670,-9.041670,-9.307008,-9.350704,-9.307008,...,11.402346,11.402346,11.402550,11.587207,11.587207,11.396627,11.396627,11.555389,11.555389,11.504857
4,sitting,-6.464198,-6.697730,-6.767156,-6.767156,-7.055773,-7.055773,-7.042009,-6.818914,-6.818914,...,12.950481,12.950481,12.950481,12.524212,12.660916,12.660916,12.660916,12.754839,12.696585,12.696585
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
345,standing,2.410249,3.075905,3.109557,3.109557,2.935175,2.935175,2.935175,2.949612,2.949612,...,-3.992085,-4.024168,-4.134200,-4.134200,-4.235893,-3.971354,-4.235893,-3.971354,-4.268035,-3.364959
346,standing,-0.169173,-0.169173,-0.132426,-0.086081,-0.109054,-0.035464,-0.109054,0.150781,-0.017704,...,0.480992,0.314674,0.314674,0.370741,0.370741,0.405706,0.565744,0.657128,0.565744,0.000000
347,standing,6.287340,6.335310,6.401113,6.401113,6.322783,6.167185,6.167185,6.491922,6.521754,...,-8.762976,-8.886951,-9.116513,-9.119482,-9.125473,-9.119482,-9.125473,-9.052910,-9.133845,-8.984107
348,standing,6.754449,6.754449,6.756038,6.835916,6.835916,6.728938,6.709035,6.728938,6.811392,...,-10.965625,-10.965625,-11.030573,-11.164072,-11.164072,-11.089095,-11.071787,-11.071787,-10.782348,-10.625323


In [12]:
data.label.value_counts()

standing    178
sitting     172
Name: label, dtype: int64

In [13]:
data

Unnamed: 0,label,0,1,2,3,4,5,6,7,8,...,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999
0,sitting,-2.765315,-2.773235,-2.792850,-2.850304,-2.792850,-2.755526,-2.755526,-2.828954,-2.887963,...,3.652913,3.693894,3.693894,3.586089,3.327449,3.586089,3.615411,3.615411,3.615411,3.604214
1,sitting,-6.481415,-6.481415,-6.481415,-6.354682,-6.372582,-6.372582,-6.372582,-6.246382,-6.209179,...,9.475628,9.633154,9.633154,9.686605,9.714546,9.714546,9.741522,9.879390,9.879390,9.438135
2,sitting,-0.500630,-0.500630,-0.207738,-0.207738,-0.291950,-0.357956,-0.357956,-0.251849,-0.251849,...,-2.995442,-3.048498,-3.048498,-2.906026,-2.853467,-2.853467,-3.089772,-3.089772,-2.908626,-2.593068
3,sitting,-8.938822,-9.029880,-9.029880,-9.029880,-9.041670,-9.041670,-9.307008,-9.350704,-9.307008,...,11.402346,11.402346,11.402550,11.587207,11.587207,11.396627,11.396627,11.555389,11.555389,11.504857
4,sitting,-6.464198,-6.697730,-6.767156,-6.767156,-7.055773,-7.055773,-7.042009,-6.818914,-6.818914,...,12.950481,12.950481,12.950481,12.524212,12.660916,12.660916,12.660916,12.754839,12.696585,12.696585
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
345,standing,2.410249,3.075905,3.109557,3.109557,2.935175,2.935175,2.935175,2.949612,2.949612,...,-3.992085,-4.024168,-4.134200,-4.134200,-4.235893,-3.971354,-4.235893,-3.971354,-4.268035,-3.364959
346,standing,-0.169173,-0.169173,-0.132426,-0.086081,-0.109054,-0.035464,-0.109054,0.150781,-0.017704,...,0.480992,0.314674,0.314674,0.370741,0.370741,0.405706,0.565744,0.657128,0.565744,0.000000
347,standing,6.287340,6.335310,6.401113,6.401113,6.322783,6.167185,6.167185,6.491922,6.521754,...,-8.762976,-8.886951,-9.116513,-9.119482,-9.125473,-9.119482,-9.125473,-9.052910,-9.133845,-8.984107
348,standing,6.754449,6.754449,6.756038,6.835916,6.835916,6.728938,6.709035,6.728938,6.811392,...,-10.965625,-10.965625,-11.030573,-11.164072,-11.164072,-11.089095,-11.071787,-11.071787,-10.782348,-10.625323


In [14]:
idx = data.columns[1:] 
x = data[idx].to_numpy()
x -= x.mean()
x /= x.std()
data[idx] = x
data

Unnamed: 0,label,0,1,2,3,4,5,6,7,8,...,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999
0,sitting,-0.416300,-0.417491,-0.420441,-0.429083,-0.420441,-0.414827,-0.414827,-0.425872,-0.434747,...,0.549077,0.555241,0.555241,0.539026,0.500123,0.539026,0.543436,0.543436,0.543436,0.541752
1,sitting,-0.975244,-0.975244,-0.975244,-0.956182,-0.958875,-0.958875,-0.958875,-0.939893,-0.934297,...,1.424881,1.448575,1.448575,1.456614,1.460817,1.460817,1.464875,1.485612,1.485612,1.419242
2,sitting,-0.075665,-0.075665,-0.031610,-0.031610,-0.044277,-0.054205,-0.054205,-0.038245,-0.038245,...,-0.450913,-0.458894,-0.458894,-0.437464,-0.429559,-0.429559,-0.465102,-0.465102,-0.437855,-0.390392
3,sitting,-1.344867,-1.358563,-1.358563,-1.358563,-1.360337,-1.360337,-1.400246,-1.406819,-1.400246,...,1.714682,1.714682,1.714712,1.742487,1.742487,1.713822,1.713822,1.737701,1.737701,1.730101
4,sitting,-0.972655,-1.007781,-1.018223,-1.018223,-1.061634,-1.061634,-1.059564,-1.026008,-1.026008,...,1.947539,1.947539,1.947539,1.883424,1.903985,1.903985,1.903985,1.918113,1.909350,1.909350
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
345,standing,0.362166,0.462288,0.467350,0.467350,0.441121,0.441121,0.441121,0.443292,0.443292,...,-0.600820,-0.605646,-0.622196,-0.622196,-0.637492,-0.597702,-0.637492,-0.597702,-0.642326,-0.506493
346,standing,-0.025810,-0.025810,-0.020282,-0.013312,-0.016767,-0.005698,-0.016767,0.022315,-0.003027,...,0.071983,0.046967,0.046967,0.055400,0.055400,0.060659,0.084731,0.098476,0.084731,-0.000364
347,standing,0.945325,0.952541,0.962438,0.962438,0.950656,0.927253,0.927253,0.976097,0.980584,...,-1.318418,-1.337065,-1.371594,-1.372040,-1.372942,-1.372040,-1.372942,-1.362027,-1.374201,-1.351678
348,standing,1.015584,1.015584,1.015823,1.027837,1.027837,1.011747,1.008753,1.011747,1.024149,...,-1.649722,-1.649722,-1.659491,-1.679571,-1.679571,-1.668293,-1.665690,-1.665690,-1.622155,-1.598536


In [15]:
data.to_csv(file_for_export, index=True, header=True)