In [39]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

### Loading Data

In [40]:
#Loading data
train_df = pd.read_csv('/Users/saurabh/Documents/PythonData/train.csv')
test_df = pd.read_csv('/Users/saurabh/Documents/PythonData/test.csv')

In [41]:
# concatingating both test and train database
df = pd.concat([train_df, test_df], axis=0).reset_index(drop=True)

In [42]:
## Create datasets
model_data = df.copy()
activity_data = model_data.pop('Activity')
subject_data = model_data.pop('subject')
y = df

# Split training testing data
X_train, X_test, y_train, y_test = train_test_split(model_data, activity_data, random_state=3)

### MLPClassifier

In [33]:
# Import the model.
from sklearn.neural_network import MLPClassifier

# Establish and fit the model, with a single, 1000 perceptron layer.
mlp = MLPClassifier(hidden_layer_sizes=(1000,))
mlp.fit(X_train, y_train)

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(1000,), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)

In [34]:
mlp.score(X_train, y_train)

0.9835577421025375

### Tunning MLP

In [51]:
# Import the model.
from sklearn.neural_network import MLPClassifier

# Establish and fit the model, with a single, 1000 perceptron layer.
mlp = MLPClassifier(hidden_layer_sizes=(1000,1000,), max_iter=300,learning_rate_init=.0001)
mlp.fit(X_train, y_train)
mlp.score(X_train, y_train)

0.9954686690833765

In [37]:
y_train.value_counts()/len(y_train)

STANDING              0.187597
LAYING                0.186691
SITTING               0.172708
WALKING               0.165199
WALKING_UPSTAIRS      0.150181
WALKING_DOWNSTAIRS    0.137623
Name: Activity, dtype: float64

### Other models (DecisionTreeClassifier, RandomForestClassifier, GradientBoostingClassifier)

In [38]:
classifiers = [
    DecisionTreeClassifier(random_state=3),
    RandomForestClassifier(random_state=3),
    GradientBoostingClassifier(random_state=3)
]


names = []
scores = []

for clf in classifiers:
    clf = clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    
    names.append(clf.__class__.__name__)
    scores.append(accuracy_score(y_pred, y_test))

score_df = pd.DataFrame({'Model': names, 'Score': scores}).set_index('Model')
score_df.sort_values(by='Score', ascending=False)

Unnamed: 0_level_0,Score
Model,Unnamed: 1_level_1
GradientBoostingClassifier,0.984466
RandomForestClassifier,0.966602
DecisionTreeClassifier,0.929709


### Conclusion

- This shows that MLPClassifier is performing better after tunnning compared to GradientBoostingClassifier, RandomForestClassifier
- Also MLPClassifier is faster as well as complexity and accuracy is better compared to boosted tree or a random forest model