# Bonus Experiments

In [1]:
import pandas as pd
import numpy as np

from sklearn.exceptions import ConvergenceWarning
import warnings

from sklearn.ensemble import IsolationForest

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier

from keras.models import Sequential
from keras.layers import Dense
from tensorflow.keras.utils import to_categorical

from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from xgboost import XGBClassifier
from scikeras.wrappers import KerasClassifier

# Ensemble Methods
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier

from sklearn.decomposition import PCA

from sklearn import metrics
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score

from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.model_selection import StratifiedKFold

from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV

import os
import math

from tsfresh.feature_extraction import extract_features
from tsfresh.feature_extraction import MinimalFCParameters, EfficientFCParameters, ComprehensiveFCParameters
from tsfresh.utilities.dataframe_functions import impute

# Feature extraction using TS Fresh with 100 features

In [2]:
sensore_meta = pd.read_csv("meta_data_with_path.csv")

sensore_meta.head()

Unnamed: 0,path,file_name,exp_id,sensor,frequency,activity_id,activity,activity_details
0,DataSet2/User25/316_MetaWear_2021-08-01T14.52....,316_MetaWear_2021-08-01T14.52.00.189_EB942CED9...,316,Accelerometer,100.000Hz,5,While sitting,"Moving head, body"
1,DataSet2/User25/321_MetaWear_2021-08-01T15.09....,321_MetaWear_2021-08-01T15.09.34.379_EB942CED9...,321,Accelerometer,100.000Hz,11,Standing,Taking stairs
2,DataSet2/User25/318_MetaWear_2021-08-01T14.59....,318_MetaWear_2021-08-01T14.59.48.670_EB942CED9...,318,Gyroscope,100.000Hz,7,Sitting,Stand up from sitting
3,DataSet2/User25/316_MetaWear_2021-08-01T14.52....,316_MetaWear_2021-08-01T14.52.00.189_EB942CED9...,316,Gyroscope,100.000Hz,5,While sitting,"Moving head, body"
4,DataSet2/User25/315_MetaWear_2021-08-01T14.48....,315_MetaWear_2021-08-01T14.48.37.034_EB942CED9...,315,Gyroscope,100.000Hz,4,Using computer,Browsing


In [16]:
def extract_feature(window_size, overlap):
    step_size = int(math.ceil(window_size * (1 - overlap)))

    window_list = []
    activity_list = []
    id = 0

    for exp_id in sensore_meta['exp_id'].value_counts().index:
        temp_df = sensore_meta[sensore_meta['exp_id'] == exp_id]
        
        acc_df = pd.read_csv(temp_df[temp_df['sensor'] == 'Accelerometer'].iloc[0 , 0])
        acc_df.columns = ['epoch', 'time', 'elapsed', 'x', 'y', 'z']
        acc_df = acc_df[['x', 'y', 'z']]

        gyro_df = pd.read_csv(temp_df[temp_df['sensor'] == 'Gyroscope'].iloc[0 , 0])
        gyro_df.columns = ['epoch', 'time', 'elapsed', 'x', 'y', 'z']
        gyro_df = gyro_df[['x', 'y', 'z']]

        if gyro_df.isna().any().any():
            print(temp_df[temp_df['sensor'] == 'Gyroscope'].iloc[0 , 0])

        for i in range(0, len(acc_df) - window_size, step_size):
            activity_id = temp_df[temp_df['sensor'] == 'Accelerometer'].iloc[0, 5]

            window_acc_data = acc_df.iloc[i : i + window_size].copy()
            window_acc_data['id'] = id
            window_list.append(window_acc_data)
            activity_list.append({'id': id, 'activity_id' : activity_id})
            id += 1

            window_gyro_data = gyro_df.iloc[i : i + window_size].copy()
            window_gyro_data['id'] = id
            window_list.append(window_gyro_data)
            activity_list.append({'id': id, 'activity_id' : activity_id})
            id += 1

    window_df = pd.concat(window_list)
    activiteis =  pd.DataFrame(activity_list)

    # TS Fresh part
    extraction_df = window_df.melt(id_vars=['id'], 
                                   value_vars=['x', 'y', 'z'], 
                                   var_name='axis', 
                                   value_name='value')
    
    comprehensive_settings = ComprehensiveFCParameters()
    first_100_features = dict(list(comprehensive_settings.items())[:100])

    features = extract_features(extraction_df, 
                                column_id='id', 
                                column_kind='axis', 
                                column_value='value', 
                                default_fc_parameters=ComprehensiveFCParameters())
    
    features = impute(features)
    
    features.index.name = 'id'  
    features = features.reset_index()
    features = features.merge(activiteis, on='id')
    features.drop(columns = ['id'])

    print(f"Window size: {window_size}, Overlap: {overlap*100} Step size: {step_size}, Shape: {features.shape}")
    
    features.to_csv(f"tsfresh_features/w{window_size}_o{int(overlap*100)}_ts_features.csv")
