In [1]:
import pandas as pd
import numpy as np
import os
from glob import glob

In [2]:
df_activity = pd.read_csv("activities.csv")

In [3]:
def segmentation(x_data,y,overlap_rate,time_window):
    
    seg_data = []
    overlap = int((1 - overlap_rate)*time_window)
    y_segmented_list = []
    
    for i in range(0,x_data.shape[0],overlap):
        seg_data.append(x_data[i:i+time_window])
        y_segmented_list.append(y)

    return seg_data,y_segmented_list

In [4]:
def handle_missing_values(df):
    df['x']=df['x'].replace(0, np.nan)
    df['y']=df['y'].replace(0, np.nan)
    df['z']=df['z'].replace(0, np.nan)
    return df

In [5]:
def load_data(csv_file):

    y_list = []
    x_data_list = []

    csv_df = pd.read_csv(csv_file)
    csv_df = handle_missing_values(csv_df)
    csv_df.dropna(inplace=True)
    x_data = csv_df.values
    act_id = get_act_id(int(os.path.splitext(os.path.basename(csv_files[0]))[0].replace("segment","")))
    
    return x_data,act_id

In [6]:
def get_act_id(seg_id):
    seg = df_activity[df_activity["segment_id"]==seg_id]
    activity_id = seg["activity_id"].values
    return int(activity_id)

In [7]:
# get features (std,avg,max,min)
def get_features(x_data):
    features = []
    for i in range(x_data.shape[1]):
        # std
        features.append(x_data.T[i].std(ddof=0))
        # avg
        features.append(np.average(x_data.T[i]))
        # max
        features.append(np.max(x_data.T[i]))
        # min
        features.append(np.min(x_data.T[i]))
    return features

In [8]:
csv_files = glob("train/*")

In [9]:
x_data,y = load_data(csv_files[0])

# overroll window feature extraction

In [10]:
x_feature = get_features(x_data)
print(x_feature)

[1.6395426934016482, -0.8511891891891893, 3.8689999999999998, -3.486, 2.1418133169244293, -8.786306306306306, 1.455, -10.495999999999999, 2.591852250101581, 1.1703063063063066, 10.454, 0.001]


# feature extraction after segmentetion 

In [11]:
seg_data_list,y_seg_list = segmentation(x_data,y,overlap_rate=0.5,time_window=10)

In [12]:
x_feature_list = []
for seg_data in seg_data_list:
    x_feature_list.append(get_features(seg_data))

In [13]:
for x_feature in zip(x_feature_list):
    print(x_feature)

([1.0754768291320833, -0.7467, 1.608, -2.145, 4.320982667171902, -5.6117, 1.034, -9.652999999999999, 3.7309114489625728, 5.1486, 9.747, 0.067],)
([0.7604673563013734, 0.13799999999999996, 1.608, -0.8420000000000001, 4.423569866069711, -3.8268, 1.455, -9.73, 3.9595239057240206, 6.916200000000001, 10.454, 0.012],)
([1.110200653035297, -0.6511, 0.7659999999999999, -1.992, 3.7452361153871196, -7.3012, 1.455, -9.883, 4.557675872635087, 3.0268, 10.454, 0.012],)
([0.6160402908901332, -1.6085999999999998, 0.191, -1.992, 0.17869306086135478, -9.492299999999998, -9.308, -9.883, 0.0305785872793365, 0.053500000000000006, 0.106, 0.016],)
([0.2286225929342942, -1.6659, -1.264, -1.915, 0.15927385221686574, -9.5728, -9.385, -9.921, 0.028662867965365924, 0.028800000000000003, 0.098, 0.001],)
([0.6184760706769503, -1.2675, -0.38299999999999995, -2.1830000000000003, 0.4105244937881298, -9.362200000000001, -8.389, -9.921, 1.1968831229489367, 0.8036999999999999, 3.03, 0.001],)
([0.7281239523597615, -0.4670