In [2]:
import numpy as np
from matplotlib import pyplot as plt
import pandas as pd
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2, f_classif, mutual_info_regression

In [10]:
# Load preprocessed data and targets
data = pd.read_csv('../data/train/00001/columns_1000ms.csv')
targets = pd.read_csv('../data/train/00001/targets.csv')

In [11]:
# Create list of labels for each 1 second interval using argmax on the targets from the annotators
labels = []
for i in range(len(targets)):
    
    # Determine the index of the label which has the largest target value in the list of actions
    label_ind = np.argmax(targets.iloc[i][2:])
    
    if label_ind == -1:
        label = 'Unknown'
    # If not unknown, assign to label the name of the column which has the largest target value
    else:
        label = targets.columns[label_ind + 2]
    labels.append(label)

In [12]:
data_labels = data.copy() # data and labels combined
data_labels['labels'] = labels

In [13]:
data_labels

Unnamed: 0,acceleration_x_mean,acceleration_x_std,acceleration_x_min,acceleration_x_median,acceleration_x_max,acceleration_x_sum,acceleration_y_mean,acceleration_y_std,acceleration_y_min,acceleration_y_median,...,video_hallway_bb_3d_flt_y_median,video_hallway_bb_3d_flt_y_max,video_hallway_bb_3d_flt_y_sum,video_hallway_bb_3d_flt_z_mean,video_hallway_bb_3d_flt_z_std,video_hallway_bb_3d_flt_z_min,video_hallway_bb_3d_flt_z_median,video_hallway_bb_3d_flt_z_max,video_hallway_bb_3d_flt_z_sum,labels
0,0.939200,0.012238,0.896,0.944,0.952,18.784,-0.279700,0.007022,-0.292,-0.280,...,,,,,,,,,,Unknown
1,0.938000,0.010881,0.904,0.943,0.946,18.760,-0.281400,0.008200,-0.298,-0.282,...,,,,,,,,,,Unknown
2,0.940400,0.010461,0.910,0.942,0.952,18.808,-0.276400,0.005713,-0.286,-0.276,...,,,,,,,,,,Unknown
3,0.944100,0.005272,0.926,0.944,0.954,18.882,-0.268300,0.008057,-0.282,-0.270,...,,,,,,,,,,Unknown
4,0.942400,0.011430,0.902,0.945,0.952,18.848,-0.260300,0.025685,-0.282,-0.266,...,,,,,,,,,,Unknown
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1819,0.925700,0.046529,0.878,0.903,1.026,18.514,-0.355400,0.043235,-0.456,-0.355,...,,,,,,,,,,Unknown
1820,0.914500,0.015734,0.892,0.911,0.944,18.290,-0.363900,0.024662,-0.398,-0.368,...,,,,,,,,,,Unknown
1821,0.913800,0.024404,0.848,0.919,0.952,18.276,-0.347200,0.013746,-0.372,-0.345,...,,,,,,,,,,Unknown
1822,0.925200,0.057881,0.806,0.932,1.020,18.504,-0.356800,0.101979,-0.644,-0.340,...,,,,,,,,,,Unknown


In [14]:
data_labels_clean = data_labels.fillna(0)
data_labels_clean

Unnamed: 0,acceleration_x_mean,acceleration_x_std,acceleration_x_min,acceleration_x_median,acceleration_x_max,acceleration_x_sum,acceleration_y_mean,acceleration_y_std,acceleration_y_min,acceleration_y_median,...,video_hallway_bb_3d_flt_y_median,video_hallway_bb_3d_flt_y_max,video_hallway_bb_3d_flt_y_sum,video_hallway_bb_3d_flt_z_mean,video_hallway_bb_3d_flt_z_std,video_hallway_bb_3d_flt_z_min,video_hallway_bb_3d_flt_z_median,video_hallway_bb_3d_flt_z_max,video_hallway_bb_3d_flt_z_sum,labels
0,0.939200,0.012238,0.896,0.944,0.952,18.784,-0.279700,0.007022,-0.292,-0.280,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Unknown
1,0.938000,0.010881,0.904,0.943,0.946,18.760,-0.281400,0.008200,-0.298,-0.282,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Unknown
2,0.940400,0.010461,0.910,0.942,0.952,18.808,-0.276400,0.005713,-0.286,-0.276,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Unknown
3,0.944100,0.005272,0.926,0.944,0.954,18.882,-0.268300,0.008057,-0.282,-0.270,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Unknown
4,0.942400,0.011430,0.902,0.945,0.952,18.848,-0.260300,0.025685,-0.282,-0.266,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Unknown
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1819,0.925700,0.046529,0.878,0.903,1.026,18.514,-0.355400,0.043235,-0.456,-0.355,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Unknown
1820,0.914500,0.015734,0.892,0.911,0.944,18.290,-0.363900,0.024662,-0.398,-0.368,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Unknown
1821,0.913800,0.024404,0.848,0.919,0.952,18.276,-0.347200,0.013746,-0.372,-0.345,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Unknown
1822,0.925200,0.057881,0.806,0.932,1.020,18.504,-0.356800,0.101979,-0.644,-0.340,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Unknown


In [16]:
# Remove rows with unknown labels
final = data_labels_clean[data_labels_clean.labels != 'Unknown']
final

Unnamed: 0,acceleration_x_mean,acceleration_x_std,acceleration_x_min,acceleration_x_median,acceleration_x_max,acceleration_x_sum,acceleration_y_mean,acceleration_y_std,acceleration_y_min,acceleration_y_median,...,video_hallway_bb_3d_flt_y_median,video_hallway_bb_3d_flt_y_max,video_hallway_bb_3d_flt_y_sum,video_hallway_bb_3d_flt_z_mean,video_hallway_bb_3d_flt_z_std,video_hallway_bb_3d_flt_z_min,video_hallway_bb_3d_flt_z_median,video_hallway_bb_3d_flt_z_max,video_hallway_bb_3d_flt_z_sum,labels
46,0.9668,0.112356,0.678,0.955,1.186,19.336,-0.4514,0.319798,-1.050,-0.321,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,a_walk
47,0.3769,0.404018,-0.228,0.376,1.660,7.538,-0.4088,0.362220,-1.064,-0.326,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,a_walk
48,0.6807,0.472910,-0.158,0.838,1.632,13.614,-0.3598,0.202494,-0.994,-0.350,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,a_walk
49,0.8571,0.109839,0.470,0.879,1.020,17.142,-0.4462,0.119504,-0.820,-0.428,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,p_stand
50,0.6775,0.233024,0.104,0.802,0.888,13.550,-0.6250,0.227178,-1.112,-0.538,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,p_stand
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1792,0.0057,0.418736,-0.696,-0.005,1.174,0.114,-0.8624,0.626968,-2.164,-0.600,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,a_walk
1793,0.4475,0.386425,-0.526,0.640,0.856,8.950,-0.8707,0.428976,-2.020,-0.745,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,p_stand
1794,0.7935,0.109710,0.412,0.780,0.988,15.870,-0.5360,0.187481,-0.722,-0.609,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,p_stand
1795,0.8617,0.112627,0.664,0.868,1.186,17.234,-0.4202,0.094151,-0.548,-0.449,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,a_walk


In [29]:
bestfeatures = SelectKBest(score_func=f_classif, k='all')
fit = bestfeatures.fit(final.iloc[:,range(0,366)],final['labels'])
dfscores = pd.DataFrame(fit.scores_)
dfcolumns = pd.DataFrame(final.columns)
#concat two dataframes for better visualization 
featureScores = pd.concat([dfcolumns,dfscores],axis=1)
featureScores.columns = ['Specs','Score']  #naming the dataframe columns

  f = msb / msw


In [30]:
featureScores.nlargest(366,'Score')

Unnamed: 0,Specs,Score
128,video_living_room_bb_2d_tl_y_min,77.972985
126,video_living_room_bb_2d_tl_y_mean,70.055529
129,video_living_room_bb_2d_tl_y_median,69.726827
104,video_living_room_centre_2d_y_min,63.400054
131,video_living_room_bb_2d_tl_y_sum,60.424607
105,video_living_room_centre_2d_y_median,60.097152
102,video_living_room_centre_2d_y_mean,59.998447
182,video_living_room_bb_3d_flt_z_min,57.418476
146,video_living_room_centre_3d_z_min,57.314789
164,video_living_room_bb_3d_brb_z_min,57.213392
