## visualize data

In [2]:
import pandas as pd

path = "./data/motion_sense_data.csv"

data = pd.read_csv(path)
data

Unnamed: 0,rotationRate.x,rotationRate.y,rotationRate.z,userAcceleration.x,userAcceleration.y,userAcceleration.z,code,weight,height,age,gender,activity
0,0.316738,0.778180,1.082764,0.294894,-0.184493,0.377542,1.0,102.0,188.0,46.0,1.0,downstairs
1,0.842032,0.424446,0.643574,0.219405,0.035846,0.114866,1.0,102.0,188.0,46.0,1.0,downstairs
2,-0.138143,-0.040741,0.343563,0.010714,0.134701,-0.167808,1.0,102.0,188.0,46.0,1.0,downstairs
3,-0.025005,-1.048717,0.035860,-0.008389,0.136788,0.094958,1.0,102.0,188.0,46.0,1.0,downstairs
4,0.114253,-0.912890,0.047341,0.199441,0.353996,-0.044299,1.0,102.0,188.0,46.0,1.0,downstairs
...,...,...,...,...,...,...,...,...,...,...,...,...
1412860,1.446270,2.409207,-1.052698,0.034354,-0.574501,0.060144,24.0,74.0,173.0,18.0,0.0,walking
1412861,0.602010,2.675064,-0.472482,0.547953,-0.481645,-0.232273,24.0,74.0,173.0,18.0,0.0,walking
1412862,1.125564,4.413143,1.581002,0.910034,0.194606,-0.770222,24.0,74.0,173.0,18.0,0.0,walking
1412863,0.086910,0.436147,1.762577,0.811333,0.424518,-0.716784,24.0,74.0,173.0,18.0,0.0,walking


## balance data

In [7]:
### balance data with oversampling SMOTE

from imblearn.over_sampling import SMOTE

X = data.drop('activity', axis=1)
y = data['activity']

smote = SMOTE(sampling_strategy='auto', random_state=42)
X_sm, y_sm = smote.fit_resample(X, y)

data_sm = pd.concat([X_sm, y_sm], axis=1)
data_sm.to_csv("./data/motion_sense_data_sm.csv", index=False)

In [8]:
data_sm["activity"].value_counts()

activity
downstairs    344288
jogging       344288
sitting       344288
standing      344288
upstairs      344288
walking       344288
Name: count, dtype: int64

## separate by gender

In [3]:
import pandas as pd

data_sm = pd.read_csv("./data/motion_sense_data_sm.csv")

In [4]:
data_sm["gender"].value_counts()

gender
1.0    1179888
0.0     885840
Name: count, dtype: int64

In [5]:
data_female = data_sm[data_sm["gender"] == 0]
data_male = data_sm[data_sm["gender"] == 1]
data_male

Unnamed: 0,rotationRate.x,rotationRate.y,rotationRate.z,userAcceleration.x,userAcceleration.y,userAcceleration.z,code,weight,height,age,gender,activity
0,0.316738,0.778180,1.082764,0.294894,-0.184493,0.377542,1.0,102.0,188.0,46.0,1.0,downstairs
1,0.842032,0.424446,0.643574,0.219405,0.035846,0.114866,1.0,102.0,188.0,46.0,1.0,downstairs
2,-0.138143,-0.040741,0.343563,0.010714,0.134701,-0.167808,1.0,102.0,188.0,46.0,1.0,downstairs
3,-0.025005,-1.048717,0.035860,-0.008389,0.136788,0.094958,1.0,102.0,188.0,46.0,1.0,downstairs
4,0.114253,-0.912890,0.047341,0.199441,0.353996,-0.044299,1.0,102.0,188.0,46.0,1.0,downstairs
...,...,...,...,...,...,...,...,...,...,...,...,...
2065721,1.844414,-0.709689,-0.393713,-0.338553,0.309578,-0.399193,17.0,76.0,180.0,26.0,1.0,upstairs
2065723,-0.120025,3.900765,0.105771,0.389296,-0.060636,-0.017247,17.0,76.0,180.0,26.0,1.0,upstairs
2065724,1.149925,0.967907,1.011933,-0.147735,0.003548,0.186704,11.0,70.0,178.0,24.0,1.0,upstairs
2065726,1.190189,6.012605,1.744576,0.469483,-0.299237,-0.572555,14.0,70.0,180.0,35.0,1.0,upstairs


In [6]:
### split data into train and test

from sklearn.model_selection import train_test_split

X = data_male.drop('activity', axis=1)
y = data_male['activity']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

train = pd.concat([X_train, y_train], axis=1)
test = pd.concat([X_test, y_test], axis=1)
test.__len__() * 0.20

47195.600000000006

In [7]:
### remove random 47195 rows from test data

test = test.sample(frac=1).reset_index(drop=True)
test = test.drop(test.index[:47195])
test

Unnamed: 0,rotationRate.x,rotationRate.y,rotationRate.z,userAcceleration.x,userAcceleration.y,userAcceleration.z,code,weight,height,age,gender,activity
47195,4.366893,2.207309,2.299202,-0.446153,1.963449,0.479663,4.0,90.0,176.0,31.0,1.0,jogging
47196,-0.002399,-0.047407,-0.009092,-0.001770,0.018943,0.005061,1.0,102.0,188.0,46.0,1.0,standing
47197,-1.502791,-0.056119,0.276151,-0.263147,-0.166368,0.111439,2.0,72.0,180.0,28.0,1.0,walking
47198,0.374253,1.786846,-1.908562,-1.230395,-1.357767,0.294906,2.0,72.0,180.0,28.0,1.0,walking
47199,-0.000307,0.001509,0.000003,-0.017862,0.000904,-0.020304,17.0,76.0,180.0,26.0,1.0,sitting
...,...,...,...,...,...,...,...,...,...,...,...,...
235973,2.330959,-4.351005,0.168665,-0.664348,-0.622367,1.300041,12.0,60.0,167.0,33.0,1.0,jogging
235974,0.008204,-0.018031,-0.001018,-0.012084,-0.017817,-0.013204,17.0,76.0,180.0,26.0,1.0,sitting
235975,-1.461461,0.710073,-0.964444,-0.271553,-0.202720,0.328407,20.0,88.0,180.0,25.0,1.0,walking
235976,1.872079,-0.798971,0.819361,0.083564,-0.830921,-0.507966,14.0,70.0,180.0,35.0,1.0,downstairs


In [8]:
### get random 47195 rows from data_female

data_female_test = data_female.sample(n=47195)
data_female_test

Unnamed: 0,rotationRate.x,rotationRate.y,rotationRate.z,userAcceleration.x,userAcceleration.y,userAcceleration.z,code,weight,height,age,gender,activity
600714,-0.000267,-0.001686,-0.000862,0.003421,-0.000764,-0.007780,23.0,68.0,170.0,25.0,0.0,sitting
1130947,-1.955043,-0.063000,-0.869339,-0.160834,-0.160143,0.112801,10.0,72.0,164.0,31.0,0.0,walking
143265,1.736673,0.867970,-0.710277,-0.181503,2.272840,-0.356535,3.0,48.0,161.0,28.0,0.0,jogging
341826,-0.008286,0.003895,-0.010451,0.008397,-0.002090,-0.006242,8.0,52.0,161.0,24.0,0.0,sitting
5128,-1.394190,-0.230316,-0.474152,-0.204658,-0.568137,0.027319,3.0,48.0,161.0,28.0,0.0,downstairs
...,...,...,...,...,...,...,...,...,...,...,...,...
328407,0.003752,-0.004943,-0.000359,-0.001184,0.013688,0.001847,7.0,62.0,175.0,30.0,0.0,sitting
1455484,-1.840732,-0.489108,0.890782,0.326658,-0.361339,0.035586,23.0,68.0,170.0,25.0,0.0,downstairs
1820077,4.198162,-0.769483,1.198085,0.099044,-0.033732,0.455693,5.0,48.0,164.0,23.0,0.0,jogging
361540,-0.000052,0.001233,0.001754,0.010617,-0.012302,-0.013287,10.0,72.0,164.0,31.0,0.0,sitting


In [9]:
test = pd.concat([test, data_female_test], axis=0)
test

Unnamed: 0,rotationRate.x,rotationRate.y,rotationRate.z,userAcceleration.x,userAcceleration.y,userAcceleration.z,code,weight,height,age,gender,activity
47195,4.366893,2.207309,2.299202,-0.446153,1.963449,0.479663,4.0,90.0,176.0,31.0,1.0,jogging
47196,-0.002399,-0.047407,-0.009092,-0.001770,0.018943,0.005061,1.0,102.0,188.0,46.0,1.0,standing
47197,-1.502791,-0.056119,0.276151,-0.263147,-0.166368,0.111439,2.0,72.0,180.0,28.0,1.0,walking
47198,0.374253,1.786846,-1.908562,-1.230395,-1.357767,0.294906,2.0,72.0,180.0,28.0,1.0,walking
47199,-0.000307,0.001509,0.000003,-0.017862,0.000904,-0.020304,17.0,76.0,180.0,26.0,1.0,sitting
...,...,...,...,...,...,...,...,...,...,...,...,...
328407,0.003752,-0.004943,-0.000359,-0.001184,0.013688,0.001847,7.0,62.0,175.0,30.0,0.0,sitting
1455484,-1.840732,-0.489108,0.890782,0.326658,-0.361339,0.035586,23.0,68.0,170.0,25.0,0.0,downstairs
1820077,4.198162,-0.769483,1.198085,0.099044,-0.033732,0.455693,5.0,48.0,164.0,23.0,0.0,jogging
361540,-0.000052,0.001233,0.001754,0.010617,-0.012302,-0.013287,10.0,72.0,164.0,31.0,0.0,sitting


In [10]:
### order by activity and code

train = train.sort_values(by=['activity', 'code'])
test = test.sort_values(by=['activity', 'code'])

In [11]:
### label encode activity

from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
train['activity'] = le.fit_transform(train['activity'])
test['activity'] = le.transform(test['activity'])

le.classes_

array(['downstairs', 'jogging', 'sitting', 'standing', 'upstairs',
       'walking'], dtype=object)

In [16]:
train = train.drop(columns=["code", "weight", "height", "age"])
test = test.drop(columns=["code", "weight", "height", "age"])

In [20]:
test

Unnamed: 0,rotationRate.x,rotationRate.y,rotationRate.z,userAcceleration.x,userAcceleration.y,userAcceleration.z,gender,activity
47249,-0.702027,1.816030,0.869197,0.662776,0.617448,0.147426,1.0,0
47277,-0.127212,-0.581997,0.467010,0.034859,-0.227692,0.110728,1.0,0
47305,-0.548854,4.831874,-2.182188,-0.105475,0.095405,-0.189149,1.0,0
47397,1.397476,-1.163798,0.098087,0.237726,0.055993,-0.251518,1.0,0
47517,1.900301,-1.131043,0.298332,0.253327,0.061950,-0.376362,1.0,0
...,...,...,...,...,...,...,...,...
1347441,0.258462,0.785251,0.024718,-0.714383,1.293127,-0.938433,0.0,5
1412546,1.238141,1.128726,1.248391,0.336987,-0.377715,0.263689,0.0,5
1346128,0.541536,1.434980,1.330623,-0.524233,-0.198933,0.029961,0.0,5
1411441,-0.460049,-4.325561,1.931726,0.567419,2.189623,0.482497,0.0,5


In [21]:
train["gender"].value_counts()

gender
1.0    943910
Name: count, dtype: int64

In [22]:
test["gender"].value_counts()

gender
1.0    188783
0.0     47195
Name: count, dtype: int64

## standardize data

In [23]:
### normalize sensor data 

from sklearn.preprocessing import StandardScaler

sensor_columns = ["userAcceleration.x", "userAcceleration.y", "userAcceleration.z", "rotationRate.x", "rotationRate.y", "rotationRate.z"]

scaler = StandardScaler()
train[sensor_columns] = scaler.fit_transform(train[sensor_columns])
test[sensor_columns] = scaler.transform(test[sensor_columns])

train.to_csv("./data/motion_sense_train.csv", index=False)
test.to_csv("./data/motion_sense_test.csv", index=False)

In [24]:
import numpy as np

sensor_data = pd.read_csv("./data/motion_sense_train.csv", usecols=["rotationRate.x", "rotationRate.y", "rotationRate.z", "userAcceleration.x", "userAcceleration.y", "userAcceleration.z", "activity"])

buffer_size = 256  # buffer size
frequency_hz = 50 

def apply_fft_on_window(window_data):
    processed_data = []
    
    fft_gx = np.fft.fft(window_data[:, 0])  # gyro x
    fft_gy = np.fft.fft(window_data[:, 1])  # gyro y
    fft_gz = np.fft.fft(window_data[:, 2])  # gyro z
    fft_ax = np.fft.fft(window_data[:, 3])  # acc x
    fft_ay = np.fft.fft(window_data[:, 4])  # acc y
    fft_az = np.fft.fft(window_data[:, 5])  # acc z
    
    freq = np.fft.fftfreq(buffer_size, d=1/frequency_hz)

    class_label = pd.DataFrame(window_data[:, 6]).mode()[0][0]
    
    return freq, np.abs(fft_ax), np.abs(fft_ay), np.abs(fft_az), np.abs(fft_gx), np.abs(fft_gy), np.abs(fft_gz), class_label

num_windows = (len(sensor_data) - buffer_size) + 1

In [25]:
fft_data = []

for i in range(0, num_windows, buffer_size):
    window_data = sensor_data.values[i:i + buffer_size]
    
    freq, fft_ax, fft_ay, fft_az, fft_gx, fft_gy, fft_gz, class_labels = apply_fft_on_window(window_data)
    
    fft_features = {
        'window': i // buffer_size + 1,       
        'freq': freq,                         
        'fft_acc_x': fft_ax,
        'fft_acc_y': fft_ay,
        'fft_acc_z': fft_az,
        'fft_gyro_x': fft_gx,
        'fft_gyro_y': fft_gy,
        'fft_gyro_z': fft_gz,
        'class': class_labels
    }
    
    fft_data.append(fft_features)

df_fft = pd.DataFrame(fft_data)

df_fft.to_csv("processed_train_data_activity.csv", index=False)

print("Dados FFT armazenados com sucesso.")

Dados FFT armazenados com sucesso.
