# Scores for Random Forest classifier

## Importing necessary libraries

In [1]:
import pandas as pd 
import numpy as np 
from sklearn.metrics import roc_auc_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.ensemble import RandomForestClassifier

## The train and test set

The train and test set have already been provided seperately. Hence, we'll simply read them using __fordTrain.csv__ and __fordTest.csv__.

Also, we'll drop the __TrialID__ and __ObsNum__, because they are of no significance to our classification model.

We'll also drop __P8__,__V7__,__V9__ because they are zero for every data point.

In [2]:
df=pd.read_csv('data/fordTrain.csv')
df1=pd.read_csv('data/fordTest.csv')

df=df.drop(['TrialID','ObsNum','P8','V7','V9'],axis=1) 
df1=df1.drop(['TrialID','ObsNum','P8','V7','V9'],axis=1)

In [3]:
Y_train=df['IsAlert']  # The labels for the train set
X_train=df.drop(['IsAlert'],axis=1) # The features of the train set

Y_test=df1['IsAlert'] # The labels for the test set
X_test=df1.drop(['IsAlert'],axis=1) # The features of the test set

## Feature set for different modalities

We will now construct four types of feature sets:
* Physiological
* Environmental
* Vehicular
* Physiological, Environmental and Vehicular

In [4]:
F1_train=X_train.iloc[:,0:7].copy()

In [5]:
F2_train=X_train.iloc[:,7:18].copy()

In [6]:
F3_train=X_train.iloc[:,18:].copy()

In [7]:
F1_test=X_test.iloc[:,0:7].copy()
F2_test=X_test.iloc[:,7:18].copy()
F3_test=X_test.iloc[:,18:].copy()

In [8]:
F123_train=pd.concat([F1_train,F2_train,F3_train],axis=1)
F123_test=pd.concat([F1_test,F2_test,F3_test],axis=1)

## Training and evaluating our Random Forest model

In [9]:
model=RandomForestClassifier()

### Utility function
This is a utility function to find out the specificity and sensitivity given the confusion matrix.

In [10]:
def specandsens(a):
    spec=a[1][1]/(a[1][1]+a[1][0])
    sens=a[0][0]/(a[0][0]+a[0][1])
    return spec,sens

### Fitting and evaluating our model

__Fitting the model on Physiological (P) feature set__

In [11]:
model.fit(F1_train,Y_train)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_split=1e-07, min_samples_leaf=1,
            min_samples_split=2, min_weight_fraction_leaf=0.0,
            n_estimators=10, n_jobs=1, oob_score=False, random_state=None,
            verbose=0, warm_start=False)

__Evaluating the model__

In [12]:
print("train accuracy for P: ",accuracy_score(model.predict(F1_train),Y_train))
print("test accuracy for P: ",accuracy_score(model.predict(F1_test),Y_test))

train accuracy for P:  0.997115809435
test accuracy for P:  0.585468387951


In [13]:
confmatrix=confusion_matrix(Y_test, model.predict(F1_test))
spec,sens=specandsens(confmatrix)
Y_pred_prob=model.predict_proba(F1_test)[:,1]

In [14]:
print(roc_auc_score(Y_test,Y_pred_prob))
print(spec,sens)

0.477790540885
0.687174185601 0.276325466337


__Fitting the model on Environmental (E) feature set__

In [15]:
model.fit(F2_train,Y_train)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_split=1e-07, min_samples_leaf=1,
            min_samples_split=2, min_weight_fraction_leaf=0.0,
            n_estimators=10, n_jobs=1, oob_score=False, random_state=None,
            verbose=0, warm_start=False)

__Evaluating the model__

In [16]:
print("train accuracy for E: ",accuracy_score(model.predict(F2_train),Y_train))
print("test accuracy for E: ",accuracy_score(model.predict(F2_test),Y_test))

train accuracy for E:  0.997244878204
test accuracy for E:  0.653028798411


In [17]:
confmatrix=confusion_matrix(Y_test, model.predict(F2_test))
spec,sens=specandsens(confmatrix)
Y_pred_prob=model.predict_proba(F2_test)[:,1]

In [18]:
print(spec,sens)
print(roc_auc_score(Y_test,Y_pred_prob))

0.645063018279 0.67724142542
0.743158198279


__Fitting the model on Vehicular (V) feature set__

In [19]:
model.fit(F3_train,Y_train)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_split=1e-07, min_samples_leaf=1,
            min_samples_split=2, min_weight_fraction_leaf=0.0,
            n_estimators=10, n_jobs=1, oob_score=False, random_state=None,
            verbose=0, warm_start=False)

__Evaluating the model__

In [20]:
print("train accuracy for V: ",accuracy_score(model.predict(F3_train),Y_train))
print("test accuracy for V: ",accuracy_score(model.predict(F3_test),Y_test))

train accuracy for V:  0.997645322333
test accuracy for V:  0.658722277392


In [21]:
confmatrix=confusion_matrix(Y_test, model.predict(F3_test))
spec,sens=specandsens(confmatrix)
Y_pred_prob=model.predict_proba(F3_test)[:,1]

In [22]:
print(spec,sens)# specificity and sensitivity
print(roc_auc_score(Y_test,Y_pred_prob))

0.787332556145 0.267801029618
0.554907363716


__Fitting the model on the combined (P, E, V) feature set__

In [23]:
model.fit(F123_train,Y_train)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_split=1e-07, min_samples_leaf=1,
            min_samples_split=2, min_weight_fraction_leaf=0.0,
            n_estimators=10, n_jobs=1, oob_score=False, random_state=None,
            verbose=0, warm_start=False)

__Evaluating the model__

In [24]:
print("train accuracy for P, E, V : ",accuracy_score(model.predict(F123_train),Y_train))
print("test accuracy for P, E, V : ",accuracy_score(model.predict(F123_test),Y_test))

train accuracy for P, E, V :  0.999937120343
test accuracy for P, E, V :  0.800736511089


In [25]:
confmatrix=confusion_matrix(Y_test, model.predict(F123_test))
spec,sens=specandsens(confmatrix)
Y_pred_prob=model.predict_proba(F123_test)[:,1]

In [26]:
print(spec,sens)# specificity and sensitivity
print(roc_auc_score(Y_test,Y_pred_prob))

0.871246948068 0.586414387912
0.780446115721
