## Import Libraries

In [51]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split , RandomizedSearchCV
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
import m2cgen
import os

## Load Data

In [22]:
dataset_path = os.getcwd().replace('/notebook' , '') + "/dataset/"

# Read training data
train_df = pd.read_csv(dataset_path + "train.csv")
x = train_df.iloc[:,:-1]
y = train_df.iloc[:,-1]
scaler = MinMaxScaler() # Load Scaler module
x_scaled = scaler.fit_transform(x)
x_train , x_val , y_train , y_val = train_test_split(x_scaled ,y ,test_size= 0.2, stratify = y)

# Read testing data
test_df = pd.read_csv(dataset_path + "test.csv")
x_test = test_df.iloc[:,:-1]
y_test = test_df.iloc[:,-1]
x_test_scaled = scaler.transform(x_test)

## Modeling

### Random Forest

In [None]:
rf = RandomForestClassifier()

param_dist = {
    'n_estimators' : [1 , 2 , 3 , 5 , 7 , 10],
    'max_depth' : [1 , 2 , 3  , 5 , 7]
    }

random_search = RandomizedSearchCV(estimator = rf,
                                    param_distributions = param_dist,
                                    cv = 5,
                                    random_state=42,
                                    n_jobs=-1
                                    )
random_search.fit(x_train, y_train)

cv_results = pd.DataFrame(random_search.cv_results_)
cv_results = cv_results[["mean_test_score", "params"]].sort_values(by="mean_test_score", ascending=False)

cv_results 

Unnamed: 0,mean_test_score,params
6,0.948817,"{'n_estimators': 7, 'max_depth': 7}"
0,0.938955,"{'n_estimators': 5, 'max_depth': 7}"
2,0.922803,"{'n_estimators': 10, 'max_depth': 5}"
7,0.873659,"{'n_estimators': 1, 'max_depth': 7}"
1,0.869581,"{'n_estimators': 5, 'max_depth': 3}"
3,0.861422,"{'n_estimators': 10, 'max_depth': 3}"
5,0.750394,"{'n_estimators': 5, 'max_depth': 2}"
8,0.737637,"{'n_estimators': 1, 'max_depth': 3}"
4,0.641373,"{'n_estimators': 3, 'max_depth': 2}"
9,0.360484,"{'n_estimators': 1, 'max_depth': 1}"


In [47]:
rf = RandomForestClassifier(n_estimators = 5 , max_depth = 3)
rf.fit(x_train , y_train)
rf.score(x_val , y_val)
rf_report = classification_report(rf.predict(x_val) , y_val)
print(rf_report)

                    precision    recall  f1-score   support

            LAYING       1.00      1.00      1.00       282
           SITTING       0.87      0.87      0.87       255
          STANDING       0.88      0.88      0.88       277
           WALKING       0.95      0.73      0.83       316
WALKING_DOWNSTAIRS       0.86      0.86      0.86       198
  WALKING_UPSTAIRS       0.59      0.88      0.70       143

          accuracy                           0.87      1471
         macro avg       0.86      0.87      0.86      1471
      weighted avg       0.89      0.87      0.87      1471



In [48]:
rf_train_report = classification_report(rf.predict(x_train) , y_train)
print(rf_train_report)

                    precision    recall  f1-score   support

            LAYING       1.00      1.00      1.00      1123
           SITTING       0.85      0.88      0.86       988
          STANDING       0.89      0.86      0.88      1139
           WALKING       0.95      0.74      0.83      1251
WALKING_DOWNSTAIRS       0.82      0.90      0.86       723
  WALKING_UPSTAIRS       0.66      0.87      0.75       657

          accuracy                           0.87      5881
         macro avg       0.86      0.88      0.86      5881
      weighted avg       0.88      0.87      0.87      5881



In [49]:
rf_test_report = classification_report(rf.predict(x_test_scaled) , y_test)
print(rf_test_report)

                    precision    recall  f1-score   support

            LAYING       1.00      1.00      1.00       536
           SITTING       0.82      0.88      0.85       459
          STANDING       0.89      0.84      0.87       564
           WALKING       0.93      0.67      0.78       688
WALKING_DOWNSTAIRS       0.67      0.93      0.78       303
  WALKING_UPSTAIRS       0.65      0.77      0.71       397

          accuracy                           0.84      2947
         macro avg       0.83      0.85      0.83      2947
      weighted avg       0.85      0.84      0.84      2947



In [50]:
with open(os.getcwd().replace('/notebook' , '') + "/model/rf_model.py" , "w") as f:
  f.write(m2cgen.export_to_python(rf))
print(os.path.getsize(os.getcwd().replace('/notebook' , '') + "/model/rf_model.py" ))

5016


### Decison Tree

In [58]:
dt = DecisionTreeClassifier()

param_dist = {
    'max_depth' : [1 , 2 , 3  , 5 , 7 , 10 , 15 , 20],
    'criterion' : ['gini' , 'entropy']
    }

random_search = RandomizedSearchCV(estimator = dt,
                                    param_distributions = param_dist,
                                    cv = 5,
                                    random_state=42,
                                    n_jobs=-1
                                    )
random_search.fit(x_train, y_train)

cv_results = pd.DataFrame(random_search.cv_results_)
cv_results = cv_results[["mean_test_score", "params"]].sort_values(by="mean_test_score", ascending=False)

cv_results 

Unnamed: 0,mean_test_score,params
2,0.938616,"{'max_depth': 10, 'criterion': 'gini'}"
3,0.937765,"{'max_depth': 15, 'criterion': 'entropy'}"
9,0.936405,"{'max_depth': 20, 'criterion': 'entropy'}"
4,0.936234,"{'max_depth': 10, 'criterion': 'entropy'}"
5,0.911577,"{'max_depth': 5, 'criterion': 'entropy'}"
8,0.786091,"{'max_depth': 3, 'criterion': 'gini'}"
7,0.65261,"{'max_depth': 2, 'criterion': 'entropy'}"
1,0.544465,"{'max_depth': 2, 'criterion': 'gini'}"
0,0.377827,"{'max_depth': 1, 'criterion': 'gini'}"
6,0.357762,"{'max_depth': 1, 'criterion': 'entropy'}"


In [59]:
dt = DecisionTreeClassifier(criterion = 'gini' , max_depth = 10)
dt.fit(x_train , y_train)
dt.score(x_val , y_val)
dt_report = classification_report(dt.predict(x_val) , y_val)
print(dt_report)

                    precision    recall  f1-score   support

            LAYING       1.00      1.00      1.00       282
           SITTING       0.93      0.95      0.94       253
          STANDING       0.95      0.94      0.95       279
           WALKING       0.94      0.91      0.92       253
WALKING_DOWNSTAIRS       0.92      0.95      0.93       191
  WALKING_UPSTAIRS       0.92      0.92      0.92       213

          accuracy                           0.95      1471
         macro avg       0.94      0.94      0.94      1471
      weighted avg       0.95      0.95      0.95      1471



In [60]:
dt_train_report = classification_report(dt.predict(x_train) , y_train)
print(dt_train_report)

                    precision    recall  f1-score   support

            LAYING       1.00      1.00      1.00      1125
           SITTING       0.99      0.99      0.99      1025
          STANDING       0.99      0.99      0.99      1103
           WALKING       0.99      0.99      0.99       983
WALKING_DOWNSTAIRS       0.99      0.99      0.99       787
  WALKING_UPSTAIRS       0.99      0.99      0.99       858

          accuracy                           0.99      5881
         macro avg       0.99      0.99      0.99      5881
      weighted avg       0.99      0.99      0.99      5881



In [61]:
dt_test_report = classification_report(dt.predict(x_test_scaled) , y_test)
print(dt_test_report)

                    precision    recall  f1-score   support

            LAYING       1.00      1.00      1.00       537
           SITTING       0.79      0.83      0.81       469
          STANDING       0.85      0.82      0.83       554
           WALKING       0.93      0.81      0.86       568
WALKING_DOWNSTAIRS       0.77      0.86      0.81       376
  WALKING_UPSTAIRS       0.76      0.81      0.78       443

          accuracy                           0.85      2947
         macro avg       0.85      0.85      0.85      2947
      weighted avg       0.86      0.85      0.85      2947



In [62]:
with open(os.getcwd().replace('/notebook' , '') + "/model/dt_model.py" , "w") as f:
  f.write(m2cgen.export_to_python(dt))
print(os.path.getsize(os.getcwd().replace('/notebook' , '') + "/model/dt_model.py" ))

19063
