In [4]:
import pandas as pd
import numpy as np
import os
from glob import glob

In [6]:
from pycaret.classification import *

In [7]:
df_activity = pd.read_csv("activities.csv")

In [8]:
def segmentation(x_data,y,overlap_rate,time_window):
    
    seg_data = []
    overlap = int((1 - overlap_rate)*time_window)
    y_segmented_list = []
    
    for i in range(0,x_data.shape[0],overlap):
        seg_data.append(x_data[i:i+time_window])
        y_segmented_list.append(y)

    return seg_data,y_segmented_list

In [9]:
def handle_missing_values(df):
    df['x']=df['x'].replace(0, np.nan)
    df['y']=df['y'].replace(0, np.nan)
    df['z']=df['z'].replace(0, np.nan)
    return df

In [10]:
def get_act_id(seg_id):
    seg = df_activity[df_activity["segment_id"]==seg_id]
    activity_id = seg["activity_id"].values
    return int(activity_id)

In [11]:
def load_data(csv_file):

    y_list = []
    x_data_list = []

    csv_df = pd.read_csv(csv_file)
    csv_df = handle_missing_values(csv_df)
    csv_df.dropna(inplace=True)
    x_data = csv_df.values
    act_id = get_act_id(int(os.path.splitext(os.path.basename(csv_file))[0].replace("segment","")))
    
    return x_data,act_id

In [12]:
def get_features(x_data):
    features = []
    for i in range(x_data.shape[1]):
        # std
        features.append(x_data.T[i].std(ddof=0))
        # avg
        features.append(np.average(x_data.T[i]))
        # max
        features.append(np.max(x_data.T[i]))
        # min
        features.append(np.min(x_data.T[i]))
    return features

In [13]:
csv_files = glob("train/*")

In [14]:
X_feature_data_list = []
y_list = []
for csv_file in csv_files:
    x,y = load_data(csv_file)
    X_feature_data_list.append(get_features(x))
    y_list.append(y)

In [21]:
col = ["{}_{}".format(axis,feature) for axis in ["x","y","z"] for feature in ["std","avg","maz","min"]]

In [23]:
data = pd.DataFrame(X_feature_data_list,columns=col)

In [24]:
data["activity"] = y_list

In [25]:
data

Unnamed: 0,x_std,x_avg,x_maz,x_min,y_std,y_avg,y_maz,y_min,z_std,z_avg,z_maz,z_min,activity
0,1.639543,-0.851189,3.869,-3.486,2.141813,-8.786306,1.455,-10.496,2.591852,1.170306,10.454,0.001,9
1,1.750402,-0.795858,5.554,-4.941,2.778815,-8.650841,2.336,-11.415,2.715883,1.199549,10.627,0.001,9
2,1.678688,-0.174342,3.294,-4.137,1.616603,-8.053288,-5.286,-10.381,3.429656,3.249198,8.147,-0.218,2
3,1.637309,0.218487,4.826,-2.298,1.461345,-8.397076,-4.137,-10.151,2.799719,2.670798,8.469,0.006,2
4,1.541011,-1.357983,4.06,-4.941,2.136664,-7.973831,0.804,-9.883,3.236027,2.862653,9.735,0.004,3
5,2.176053,-0.354033,8.044,-3.217,1.708563,-7.943238,-1.608,-11.492,2.843903,3.556664,9.413,0.031,4
6,1.839214,0.032792,5.976,-3.026,1.433165,-7.794864,-2.375,-9.921,2.5287,4.253304,8.967,0.014,2
7,1.448357,0.247292,3.179,-4.443,1.61257,-8.575921,-0.612,-10.534,2.680812,-2.117056,-0.024,-10.553,12
8,1.824756,-1.229313,3.869,-3.945,1.967547,-8.012161,-0.536,-10.228,3.106213,2.686955,9.711,0.013,12
9,1.592918,0.02619,4.405,-4.865,1.703651,-8.851849,-3.715,-12.182,2.736181,1.368008,9.343,0.001,6


In [26]:
exp_mclf101 = setup(data = data, target = 'activity', session_id=123) 

Unnamed: 0,Description,Value
0,session_id,123
1,Target,activity
2,Target Type,Multiclass
3,Label Encoded,
4,Original Data,"(178, 13)"
5,Missing Values,False
6,Numeric Features,12
7,Categorical Features,0
8,Ordinal Features,False
9,High Cardinality Features,False


In [27]:
best = compare_models()

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
et,Extra Trees Classifier,0.8135,0.9752,0.8,0.821,0.794,0.7706,0.785,0.327
rf,Random Forest Classifier,0.7571,0.9452,0.7389,0.7522,0.7243,0.6981,0.718,0.323
xgboost,Extreme Gradient Boosting,0.7513,0.9284,0.7389,0.7877,0.7328,0.6954,0.7159,0.343
lightgbm,Light Gradient Boosting Machine,0.7103,0.9302,0.6861,0.7241,0.6879,0.6448,0.6639,0.06
gbc,Gradient Boosting Classifier,0.7032,0.8954,0.6778,0.7059,0.6746,0.6349,0.6578,0.64
knn,K Neighbors Classifier,0.6782,0.9181,0.65,0.6671,0.644,0.6055,0.6227,0.084
lr,Logistic Regression,0.6692,0.8988,0.6278,0.6539,0.6236,0.59,0.6158,0.866
dt,Decision Tree Classifier,0.6442,0.779,0.625,0.6182,0.6045,0.5575,0.5749,0.023
ridge,Ridge Classifier,0.6135,0.0,0.5694,0.5791,0.5644,0.5228,0.5463,0.018
lda,Linear Discriminant Analysis,0.6038,0.8671,0.5917,0.6078,0.5666,0.5199,0.5441,0.022
