## Notebook Content

This notebook contains:<br>
    1. Defining important reusable methods.<br>
    2. Reading data from sql.<br>
    3. Dropping some columns.<br>
    4. Merging the train & validation sets for refitting.<br>
    5. Refitting the model.<br>
    6. Reporting on test data.<br>
    7. Confusion matrix for the final result.<br>

### Importing Needed Library 

In [1]:
import pandas as pd
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix
import sqlite3 as sql
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from mlxtend.classifier import StackingClassifier
from ipywidgets import interactive, FloatSlider
import matplotlib.pyplot as plt
import seaborn as sns

### Used Methods

In [2]:
def getScores(model_,X_train_val_, y_train_val_, X_test_, y_test_, y_pred_):
     """
    This method gets all the needed scores (accuracy, precision, recall and f1) 
    from the training/validation sets after getting the model, x train and
    validation, y train and validation, and the y predicted
    """
    print("All Data Score ",model_.score(X_train_val_, y_train_val_))
    print("Testing Score ",model_.score(X_test_, y_test_))
    print("Precision: {:6.4f},   Recall: {:6.4f}, f1: {:6.4f}".format(precision_score(y_test_, y_pred_), 
                                                         recall_score(y_test_, y_pred_),f1_score(y_test_,y_pred_)))

In [3]:
def make_confusion_matrix(model, threshold=0.5):
      """
    This method draws the confusion matrixs for each given model
    """
    y_predict = (model.predict_proba(X_test)[:, 1] >= threshold)
    avtivity_confusion = confusion_matrix(y_test, y_predict)
    plt.figure(dpi=80)
    sns.heatmap(avtivity_confusion, cmap=plt.cm.Blues, annot=True, square=True, fmt='d',
           xticklabels=['Stationary', 'Moving'],
           yticklabels=['Stationary', 'Moving']);
    plt.xlabel('prediction')
    plt.ylabel('actual')

### Read data from SQL

In [4]:
X_train_conn = sql.connect('Data/X_train_after_FE.db')
X_train = pd.read_sql('SELECT * FROM X_train_after_FE', X_train_conn)

X_val_conn = sql.connect('Data/X_val_after_FE.db')
X_val = pd.read_sql('SELECT * FROM X_val_after_FE', X_val_conn)


X_test_conn = sql.connect('Data/X_test_after_FE.db')
X_test = pd.read_sql('SELECT * FROM X_test_after_FE', X_test_conn)


y_train_conn = sql.connect('Data/y_train_after_FE.db')
y_train = pd.read_sql('SELECT * FROM y_train_after_FE', y_train_conn)


y_val_conn = sql.connect('Data/y_val_after_FE.db')
y_val = pd.read_sql('SELECT * FROM y_val_after_FE', y_val_conn)


y_test_conn = sql.connect('Data/y_test_after_FE.db')
y_test = pd.read_sql('SELECT * FROM y_test_after_FE', y_test_conn)

### Drop index column

In [5]:
#drop index column
X_train.drop('index', axis='columns', inplace = True)
X_val.drop('index', axis='columns', inplace = True)
X_test.drop('index', axis='columns', inplace = True)
y_train.drop('index', axis='columns', inplace = True)
y_val.drop('index', axis='columns', inplace = True)
y_test.drop('index', axis='columns', inplace = True)

### Merge train and validation data

Merging both the training and validation sets for the purpose of refitting the whole data in the model.

In [6]:
X_train_val = pd.concat([X_train, X_val])
y_train_val = pd.concat([y_train, y_val])

### Refit The final model 

In [7]:
et_model = ExtraTreesClassifier(n_estimators=100, random_state=77)
et_model.fit(X_train_val, y_train_val)

randomforest = RandomForestClassifier(n_estimators=100, random_state = 77)
randomforest.fit(X_train_val, y_train_val)

decisiontree = DecisionTreeClassifier(max_depth=17, random_state = 77)
decisiontree.fit(X_train_val, y_train_val)

model_names = ["randomforest", "decisiontree" ,"et_model"]
model_vars = [eval(n) for n in model_names]

  et_model.fit(X_train_val, y_train_val)
  randomforest.fit(X_train_val, y_train_val)


In [8]:
stacked = StackingClassifier(
    classifiers=model_vars, meta_classifier=RandomForestClassifier(n_estimators=100), use_probas=False)
stacked.fit(X_train_val, y_train_val)

  clf.fit(X, y)
  clf.fit(X, y)
  self.meta_clf_.fit(meta_features, y)


StackingClassifier(classifiers=[RandomForestClassifier(random_state=77),
                                DecisionTreeClassifier(max_depth=17,
                                                       random_state=77),
                                ExtraTreesClassifier(random_state=77)],
                   meta_classifier=RandomForestClassifier())

### Reporting the final scores

In [9]:
y_pred_ss = stacked.predict(X_test)

In [10]:
getScores(stacked, X_train_val, y_train_val, X_test, y_test, y_pred_ss)

All Data Score  0.9991787187291753
Testing Score  0.9636855862584018
Precision: 0.9680,   Recall: 0.9772, f1: 0.9726


|        Model          | Whole Data Accuracy  | Tesing Accuracy | F1      | Recall  | Precision  |
|-----------------------|--------------------|-------------------|---------|---------|------------|
| Stacking              | 0.9991         | 0.9636                | 0.9726  | 0.9772  | 0.9680     |

In [12]:
interactive(lambda threshold: make_confusion_matrix(stacked, threshold), threshold=(0.0,1.0,0.02))

interactive(children=(FloatSlider(value=0.5, description='threshold', max=1.0, step=0.02), Output()), _dom_claâ€¦