In [26]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier 
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

In [3]:
df = pd.read_csv("master_data.csv")

In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 59125500 entries, 0 to 59125499
Data columns (total 10 columns):
target         int64
subject        int64
chest_ACC_x    float64
chest_ACC_y    float64
chest_ACC_z    float64
chest_ECG      float64
chest_EMG      float64
chest_EDA      float64
chest_Temp     float64
chest_Resp     float64
dtypes: float64(8), int64(2)
memory usage: 4.4 GB


In [5]:
df['subject'].value_counts()

6     4825799
3     4400200
4     4393200
5     4250400
2     4165000
17    4022201
16    3826200
13    3794000
14    3763200
10    3740100
8     3719799
15    3576300
7     3563700
11    3556701
9     3528700
Name: subject, dtype: int64

In [6]:
feature_importances_list = []

In [27]:
%%time
for subject in df['subject'].unique():
    print (subject)
    temp = df[df['subject'] == subject]
    y = temp['target']
    X = temp.drop('target', 1)
    
    rf = RandomForestClassifier() 
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
    
    rf.fit(X_train, y_train)
    print (rf.score(X_test, y_test))
    
    print(classification_report(y_test, rf.predict(X_test)))
    
    feature_importances = pd.DataFrame(rf.feature_importances_,index = X_train.columns,columns=[str(subject)])
    feature_importances_dict = feature_importances.to_dict()
    feature_importances_list.append(feature_importances_dict)
       

6




0.9981689328947815
              precision    recall  f1-score   support

           0       1.00      1.00      1.00    902929
           1       1.00      1.00      1.00    271725
           2       1.00      1.00      1.00    150059
           3       1.00      0.99      1.00     86152
           4       1.00      1.00      1.00    181649

    accuracy                           1.00   1592514
   macro avg       1.00      1.00      1.00   1592514
weighted avg       1.00      1.00      1.00   1592514

11




0.9880933312430988
              precision    recall  f1-score   support

           0       0.99      0.99      0.99    475759
           1       1.00      1.00      1.00    272833
           2       1.00      0.99      1.00    157039
           3       0.98      0.98      0.98     85341
           4       0.98      0.98      0.98    182740

    accuracy                           0.99   1173712
   macro avg       0.99      0.99      0.99   1173712
weighted avg       0.99      0.99      0.99   1173712

14




0.9876434948979592
              precision    recall  f1-score   support

           0       0.99      0.99      0.99    543791
           1       1.00      1.00      1.00    272719
           2       0.98      0.98      0.98    155792
           3       0.98      0.97      0.97     85954
           4       0.99      0.99      0.99    183600

    accuracy                           0.99   1241856
   macro avg       0.99      0.98      0.98   1241856
weighted avg       0.99      0.99      0.99   1241856

8




0.99311057779255
              precision    recall  f1-score   support

           0       0.99      0.99      0.99    533239
           1       1.00      1.00      1.00    270630
           2       0.99      0.99      0.99    155051
           3       0.99      0.99      0.99     85349
           4       0.99      0.99      0.99    183265

    accuracy                           0.99   1227534
   macro avg       0.99      0.99      0.99   1227534
weighted avg       0.99      0.99      0.99   1227534

15




0.9935704668529096
              precision    recall  f1-score   support

           0       0.99      0.99      0.99    481153
           1       1.00      1.00      1.00    271736
           2       1.00      1.00      1.00    158288
           3       0.99      0.99      0.99     85801
           4       1.00      1.00      1.00    183201

    accuracy                           0.99   1180179
   macro avg       0.99      0.99      0.99   1180179
weighted avg       0.99      0.99      0.99   1180179

9




0.9942205516496332
              precision    recall  f1-score   support

           0       0.99      0.99      0.99    473496
           1       1.00      1.00      1.00    272821
           2       1.00      1.00      1.00    148833
           3       0.98      0.99      0.99     86077
           4       0.99      0.99      0.99    183244

    accuracy                           0.99   1164471
   macro avg       0.99      0.99      0.99   1164471
weighted avg       0.99      0.99      0.99   1164471

10




0.9887095872497332
              precision    recall  f1-score   support

           0       0.99      0.98      0.99    524557
           1       1.00      1.00      1.00    272919
           2       0.99      0.99      0.99    167514
           3       0.97      0.98      0.98     85925
           4       0.98      0.99      0.99    183318

    accuracy                           0.99   1234233
   macro avg       0.99      0.99      0.99   1234233
weighted avg       0.99      0.99      0.99   1234233

2




0.9942551566080978
              precision    recall  f1-score   support

           0       0.99      0.99      0.99    707930
           1       1.00      1.00      1.00    264291
           2       0.99      0.99      0.99    141472
           3       0.99      0.99      0.99     83882
           4       0.99      0.99      0.99    176875

    accuracy                           0.99   1374450
   macro avg       0.99      0.99      0.99   1374450
weighted avg       0.99      0.99      0.99   1374450

16




0.9909420375940683
              precision    recall  f1-score   support

           0       0.99      0.99      0.99    566286
           1       1.00      1.00      1.00    273173
           2       1.00      0.99      1.00    155403
           3       0.97      0.97      0.97     84900
           4       0.99      0.99      0.99    182884

    accuracy                           0.99   1262646
   macro avg       0.99      0.99      0.99   1262646
weighted avg       0.99      0.99      0.99   1262646

4




0.9948501678903209
              precision    recall  f1-score   support

           0       1.00      0.99      1.00    763459
           1       1.00      1.00      1.00    267545
           2       1.00      1.00      1.00    147129
           3       0.97      0.99      0.98     86069
           4       1.00      1.00      1.00    185554

    accuracy                           0.99   1449756
   macro avg       0.99      0.99      0.99   1449756
weighted avg       0.99      0.99      0.99   1449756

13




0.9953618951773934
              precision    recall  f1-score   support

           0       0.99      1.00      0.99    554003
           1       1.00      1.00      1.00    273008
           2       1.00      1.00      1.00    153832
           3       0.99      0.99      0.99     88025
           4       1.00      0.99      1.00    183152

    accuracy                           1.00   1252020
   macro avg       1.00      0.99      0.99   1252020
weighted avg       1.00      1.00      1.00   1252020

3




0.9984215593506081
              precision    recall  f1-score   support

           0       1.00      1.00      1.00    774757
           1       1.00      1.00      1.00    263230
           2       1.00      1.00      1.00    147381
           3       1.00      1.00      1.00     86832
           4       1.00      1.00      1.00    179866

    accuracy                           1.00   1452066
   macro avg       1.00      1.00      1.00   1452066
weighted avg       1.00      1.00      1.00   1452066

17




0.9960514628271707
              precision    recall  f1-score   support

           0       1.00      1.00      1.00    632614
           1       1.00      1.00      1.00    273250
           2       1.00      1.00      1.00    167191
           3       0.99      0.99      0.99     85889
           4       0.99      0.99      0.99    168383

    accuracy                           1.00   1327327
   macro avg       0.99      1.00      1.00   1327327
weighted avg       1.00      1.00      1.00   1327327

5




0.9962292319011686
              precision    recall  f1-score   support

           0       1.00      1.00      1.00    706523
           1       1.00      1.00      1.00    276704
           2       0.99      0.99      0.99    149255
           3       1.00      1.00      1.00     86753
           4       1.00      1.00      1.00    183397

    accuracy                           1.00   1402632
   macro avg       1.00      1.00      1.00   1402632
weighted avg       1.00      1.00      1.00   1402632

7




0.9961871429166657
              precision    recall  f1-score   support

           0       1.00      1.00      1.00    486045
           1       1.00      1.00      1.00    274312
           2       1.00      1.00      1.00    147644
           3       0.99      0.99      0.99     85832
           4       1.00      1.00      1.00    182188

    accuracy                           1.00   1176021
   macro avg       1.00      1.00      1.00   1176021
weighted avg       1.00      1.00      1.00   1176021

CPU times: user 1h 2min 21s, sys: 35.1 s, total: 1h 2min 56s
Wall time: 1h 2min 56s


In [8]:
feature_importances_list

[{'6': {'subject': 0.0,
   'chest_ACC_x': 0.1300669764504841,
   'chest_ACC_y': 0.1968860170043269,
   'chest_ACC_z': 0.24581151008517166,
   'chest_ECG': 0.0032265901153278563,
   'chest_EMG': 0.0036803586156519685,
   'chest_EDA': 0.24669042461389065,
   'chest_Temp': 0.15944455526109153,
   'chest_Resp': 0.014193567854055431}},
 {'11': {'subject': 0.0,
   'chest_ACC_x': 0.04024757292461694,
   'chest_ACC_y': 0.1565318618203392,
   'chest_ACC_z': 0.11830625071942938,
   'chest_ECG': 0.007922838269133713,
   'chest_EMG': 0.006348978002092723,
   'chest_EDA': 0.36328293249601246,
   'chest_Temp': 0.2618496659465971,
   'chest_Resp': 0.045509899821778596}},
 {'14': {'subject': 0.0,
   'chest_ACC_x': 0.10375227203200195,
   'chest_ACC_y': 0.1853095383931324,
   'chest_ACC_z': 0.215453524103329,
   'chest_ECG': 0.008595033425546683,
   'chest_EMG': 0.0051670030825727055,
   'chest_EDA': 0.25382475947777666,
   'chest_Temp': 0.2077869105964587,
   'chest_Resp': 0.020110958889181855}},
 {'8

In [9]:
len(feature_importances_list)

15

In [15]:
list_df = []
for val in feature_importances_list:
    list_df.append(pd.DataFrame.from_dict(val).T)
    

In [16]:
list_df

[   chest_ACC_x  chest_ACC_y  chest_ACC_z  chest_ECG  chest_EDA  chest_EMG  \
 6     0.130067     0.196886     0.245812   0.003227    0.24669    0.00368   
 
    chest_Resp  chest_Temp  subject  
 6    0.014194    0.159445      0.0  ,
     chest_ACC_x  chest_ACC_y  chest_ACC_z  chest_ECG  chest_EDA  chest_EMG  \
 11     0.040248     0.156532     0.118306   0.007923   0.363283   0.006349   
 
     chest_Resp  chest_Temp  subject  
 11     0.04551     0.26185      0.0  ,
     chest_ACC_x  chest_ACC_y  chest_ACC_z  chest_ECG  chest_EDA  chest_EMG  \
 14     0.103752      0.18531     0.215454   0.008595   0.253825   0.005167   
 
     chest_Resp  chest_Temp  subject  
 14    0.020111    0.207787      0.0  ,
    chest_ACC_x  chest_ACC_y  chest_ACC_z  chest_ECG  chest_EDA  chest_EMG  \
 8     0.143474     0.240658     0.267763   0.005712   0.238707   0.003918   
 
    chest_Resp  chest_Temp  subject  
 8    0.015633    0.084135      0.0  ,
     chest_ACC_x  chest_ACC_y  chest_ACC_z  chest_EC

In [18]:
feature_importance_all_subjects = pd.concat(list_df)

In [19]:
feature_importance_all_subjects

Unnamed: 0,chest_ACC_x,chest_ACC_y,chest_ACC_z,chest_ECG,chest_EDA,chest_EMG,chest_Resp,chest_Temp,subject
6,0.130067,0.196886,0.245812,0.003227,0.24669,0.00368,0.014194,0.159445,0.0
11,0.040248,0.156532,0.118306,0.007923,0.363283,0.006349,0.04551,0.26185,0.0
14,0.103752,0.18531,0.215454,0.008595,0.253825,0.005167,0.020111,0.207787,0.0
8,0.143474,0.240658,0.267763,0.005712,0.238707,0.003918,0.015633,0.084135,0.0
15,0.112318,0.082203,0.208225,0.003087,0.413194,0.002598,0.009943,0.168433,0.0
9,0.125616,0.097026,0.243847,0.003995,0.368804,0.005418,0.012257,0.143038,0.0
10,0.228821,0.047327,0.232215,0.004801,0.311581,0.004512,0.012627,0.158116,0.0
2,0.116685,0.135971,0.246757,0.004993,0.225879,0.002487,0.015667,0.251563,0.0
16,0.079223,0.061554,0.2439,0.012126,0.410867,0.003252,0.015071,0.174006,0.0
4,0.188891,0.151533,0.237827,0.003605,0.252502,0.002869,0.012963,0.149809,0.0


In [21]:
feature_importance_all_subjects.sum(axis = 1, skipna = True) 

6     1.0
11    1.0
14    1.0
8     1.0
15    1.0
9     1.0
10    1.0
2     1.0
16    1.0
4     1.0
13    1.0
3     1.0
17    1.0
5     1.0
7     1.0
dtype: float64

In [23]:
import seaborn as sns

cm = sns.light_palette("green", as_cmap=True)

s = feature_importance_all_subjects.style.background_gradient(cmap=cm,axis = 1)
s

Unnamed: 0,chest_ACC_x,chest_ACC_y,chest_ACC_z,chest_ECG,chest_EDA,chest_EMG,chest_Resp,chest_Temp,subject
6,0.130067,0.196886,0.245812,0.00322659,0.24669,0.00368036,0.0141936,0.159445,0
11,0.0402476,0.156532,0.118306,0.00792284,0.363283,0.00634898,0.0455099,0.26185,0
14,0.103752,0.18531,0.215454,0.00859503,0.253825,0.005167,0.020111,0.207787,0
8,0.143474,0.240658,0.267763,0.00571155,0.238707,0.00391849,0.0156329,0.0841353,0
15,0.112318,0.0822026,0.208225,0.00308661,0.413194,0.00259768,0.00994311,0.168433,0
9,0.125616,0.0970257,0.243847,0.00399517,0.368804,0.00541761,0.012257,0.143038,0
10,0.228821,0.0473266,0.232215,0.00480146,0.311581,0.00451235,0.0126273,0.158116,0
2,0.116685,0.135971,0.246757,0.00499274,0.225879,0.00248697,0.0156665,0.251563,0
16,0.079223,0.0615543,0.2439,0.0121258,0.410867,0.003252,0.0150713,0.174006,0
4,0.188891,0.151533,0.237827,0.0036051,0.252502,0.00286907,0.0129631,0.149809,0


In [25]:
feature_importance_all_subjects.to_csv('feature_importance_all_subjects.csv' )