## Predication with Different Classification Method to The Dataset

In [30]:
import pandas as pd
import numpy as np

from sklearn.exceptions import ConvergenceWarning
import warnings

from sklearn.ensemble import IsolationForest

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier

from keras.models import Sequential
from keras.layers import Dense

from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from xgboost import XGBClassifier
from scikeras.wrappers import KerasClassifier

# Ensemble Methods
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier

from sklearn.decomposition import PCA

from sklearn import metrics
from sklearn.metrics import classification_report

from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.model_selection import StratifiedKFold

from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV

import os

In [31]:
# Suppress specific warning
warnings.filterwarnings("ignore", category=ConvergenceWarning)

warnings.filterwarnings("ignore")

os.environ["PYTHONWARNINGS"] = "ignore"

In [32]:
# Set max output lines before scrolling
from IPython.core.interactiveshell import InteractiveShell

InteractiveShell.instance().display_formatter.formatters['text/plain'].for_type(
    type, lambda obj, p, cycle: p.text(repr(obj)[:10000])
)

<function __main__.<lambda>(obj, p, cycle)>

### Metrics Generation

In [33]:
#Metric Calculations

def calculate_metrics(classifier, y_val, y_pred):
    print(f"{classifier} metrics: ")

    print(classification_report(y_val, y_pred))

In [34]:
def train_and_accuracy_gen(model, label, X_train, X_test, y_train, y_test):
    model.fit(X_train, y_train)
    calculate_metrics(label, y_test, model.predict(X_test))

In [None]:
class ModelEvaluationPipeline:

    param_grid_logistic_regression = {
        'C': [0.01, 1, 10, 100],
        'solver': ['lbfgs', 'liblinear', 'saga'],
        'penalty': ['l2'],
        'max_iter': [100, 500, 1000]
    }

    param_grid_decission_tree_classifier = {
        'max_depth': [None, 5, 20, 50],
        'min_samples_split': [2, 5, 10, 20],
        'criterion': ['gini', 'entropy'],
    }

    param_grid_random_forest_classifier = {
        'n_estimators': [100, 200, 500],
        'max_depth': [None, 10, 20, 50],
        'bootstrap': [True, False],
        'criterion': ['gini', 'entropy']
    }

    param_grid_gaussian_naive_bias = {
        'var_smoothing': [1e-9, 1e-8, 1e-7, 1e-6, 1e-5, 1e-4]
    }

    param_grid_svc = {
        'C': [0.1, 1, 10, 100, 1000],
        'gamma': [1, 0.1, 0.01, 0.001, 0.0001],
        'kernel': ['rbf', 'poly']
    }

    param_grid_knn = {
        'n_neighbors': [100, 500, 700, 900, 1100, 1500],
        'weights': ['uniform', 'distance'],
        'metric': ['minkowski'],
        'p': [1, 2]
    }

    param_grid_ada_boost = {
        'n_estimators': [50, 100, 200],
        'learning_rate': [0.01, 0.5, 1.0],
        'estimator': [
            DecisionTreeClassifier(max_depth=1),
            DecisionTreeClassifier(max_depth=3)
        ],
    }

    param_grid_xgb = {
        'n_estimators': [50, 100, 200],
        'learning_rate': [0.01, 0.1, 0.2], 
        'subsample': [0.6, 0.8, 1.0],     
        'gamma': [0, 0.1, 0.3, 0.5],           
    }

    param_grid_ann = {
        'model__n_neurons': [32, 64, 128],
        'model__activation': ['relu', 'tanh'],
        'epochs': [10, 20],
        'batch_size': [16, 32, 64]
    }

    def __init__(self, file_path):
        self.feature_path = file_path
        self.feature_df = self.get_feture()
        self.X, self.y = self.split_feture_and_target()
        self.y = self.map_zero_to_n() # mapping y zero to number of class to make it usable for some modles i.e. xgaboost
        self.X_train, self.X_test, self.y_train, self.y_test = self.get_scale_and_test_train_split()

    # data read and processing section
    def remove_outliear(self, feature_df):
        iso = IsolationForest(contamination=0.01, random_state=42)
        outliers = iso.fit_predict(feature_df)
        data_cleaned = feature_df[outliers == 1]

        return data_cleaned

    def get_feture(self):
        feature_df = pd.read_csv(self.feature_path)
        feature_df = feature_df.iloc[:, 1:] # remove index
        
        return self.remove_outliear(feature_df)

    def split_feture_and_target(self):
        X = self.feature_df.iloc[:, :-1]
        y = self.feature_df.iloc[:, -1]
        
        return X, y

    def get_scale_and_test_train_split(self):
        #Scaling
        scaler = StandardScaler()
        scaled_fature = scaler.fit_transform(self.X)
        
        #test train split
        return train_test_split(scaled_fature, self.y, train_size=.20, random_state=42, stratify=self.y)
    
    def map_zero_to_n(self):
        unique_values = {val: idx for idx, val in enumerate(self.y.unique())}
        y_mapped = self.y.map(unique_values)

        return y_mapped
    
    # Cross validation section 
    def kfold_cross_validation(self, model, n_splits):
        kf = KFold(n_splits=n_splits, shuffle=True, random_state=42)
        kfold_score = cross_val_score(model, self.X, self.y, cv=kf)

        print("K-fold cross validaiton scores:", kfold_score)

    def stratified_cross_validation(self, model, n_splits):
        skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)
        skfold_score = cross_val_score(model, self.X, self.y, cv=skf)

        print("Straified cross validation scores:", skfold_score)

    def cross_validation(self, model, n_splits):
        self.kfold_cross_validation(model, n_splits)
        self.stratified_cross_validation(model, n_splits)

    # Hyper parameter tuning

    def gridSerach(self, estimator, param_grid):
        print("==== Grid Search: =====")

        grid_search = GridSearchCV(estimator=estimator, param_grid=param_grid, cv=3, verbose=0)
        grid_search.fit(self.X_train, self.y_train)

        print("Best parameters found: ", grid_search.best_params_)
        print("Best score found: ", grid_search.best_score_)

        return grid_search
    
    def randomSearch(self, estimator, param_grid):
        print("\n==== Random Search: =====")

        random_search = RandomizedSearchCV(estimator=estimator, param_distributions=param_grid, n_iter=500, cv=3, random_state=42)
        random_search.fit(self.X_train, self.y_train)

        print("Best parameters found: ", random_search.best_params_)
        print("Best score found: ", random_search.best_score_)

        return random_search
    
    def hyper_parameter_tuning(self, model, param_grid):
        grid_search = self.gridSerach(model, param_grid)
        random_search = self.randomSearch(model, param_grid)

        return grid_search if grid_search.best_score_ > random_search.best_score_ else random_search
    
    # Models section
    def run_logistic_regression_model(self):
        print("=============== 1. Logistic Regression Section: ==================")

        tuned_model = self.hyper_parameter_tuning(LogisticRegression(), self.param_grid_logistic_regression)
        lrm = tuned_model.best_estimator_

        train_and_accuracy_gen(lrm, "1. Logistic regression", self.X_train, self.X_test, self.y_train, self.y_test)
        self.cross_validation(lrm, 10)
    
    def run_decission_tree_classifier_model(self):
        print("=================2. Decission Tree Classifier Section: ================")

        tuned_model = self.hyper_parameter_tuning(DecisionTreeClassifier(), self.param_grid_decission_tree_classifier)
        dt = tuned_model.best_estimator_

        train_and_accuracy_gen(dt, "2. Decission Tree Classifier", self.X_train, self.X_test, self.y_train, self.y_test)
        self.cross_validation(dt, 10)

    def run_random_forest_classifier_model(self):
        print("=================== 3. Random Forest Classifier Section: ==================")

        tuned_model = self.hyper_parameter_tuning(RandomForestClassifier(), self.param_grid_random_forest_classifier)
        rfc = tuned_model.best_estimator_

        train_and_accuracy_gen(rfc, "3.  Random Forest Classifier", self.X_train, self.X_test, self.y_train, self.y_test)
        self.cross_validation(rfc, 10)

    def run_gaussian_naive_bias_classifier_model(self):
        print("=================== 4. Gaussian Naive Bias Classifier Section: ===================")

        tuned_model = self.hyper_parameter_tuning(GaussianNB(), self.param_grid_gaussian_naive_bias)
        gnb = tuned_model.best_estimator_

        train_and_accuracy_gen(gnb, "4. Gaussian Naive Bias Classifier", self.X_train, self.X_test, self.y_train, self.y_test)
        self.cross_validation(gnb, 10)


    def run_support_vector_classifier_model(self):
        print("=================== 5. Support Vector Classifier Section: ===================")

        tuned_model = self.hyper_parameter_tuning(SVC(), self.param_grid_svc)
        svc = tuned_model.best_estimator_

        train_and_accuracy_gen(svc, "5. Support Vector Classifier", self.X_train, self.X_test, self.y_train, self.y_test)
        self.cross_validation(svc, 10)


    def run_knn_classifier_model(self):
        print("=================== 6. K-Nearest Neighbors Classifier Section: ===================")

        tuned_model = self.hyper_parameter_tuning(KNeighborsClassifier(), self.param_grid_knn)
        knn = tuned_model.best_estimator_

        train_and_accuracy_gen(knn, "6. K-Nearest Neighbors", self.X_train, self.X_test, self.y_train, self.y_test)
        self.cross_validation(knn, 10)

    def run_ada_boost_classifier_model(self):
        print("=================== 7. Ada Boost Classifier Section: ===================")

        tuned_model = self.hyper_parameter_tuning(AdaBoostClassifier(), self.param_grid_ada_boost)
        abc = tuned_model.best_estimator_

        train_and_accuracy_gen(abc, "7. Ada Boost Classifier", self.X_train, self.X_test, self.y_train, self.y_test)
        self.cross_validation(abc, 10)

    def run_xg_boost_classifier_model(self):
        print("=================== 8. XG Boost Classifier Section: ===================")

        tuned_model = self.hyper_parameter_tuning(XGBClassifier(), self.param_grid_xgb)
        xgb = tuned_model.best_estimator_

        train_and_accuracy_gen(xgb, "8. XG Boost Classifier", self.X_train, self.X_test, self.y_train, self.y_test)
        self.cross_validation(xgb, 10)

    @staticmethod
    def build_ann(n_neurons=64, activation='relu'):
        model = Sequential()
        # Input layer
        model.add(Dense(n_neurons, activation=activation, input_shape=(24,)))
        
        model.add(Dense(n_neurons, activation=activation))
        model.add(Dense(n_neurons, activation=activation))
            
        # Output layer (example for binary classification)
        model.add(Dense(units=1, activation='sigmoid'))
        
        model.compile(optimizer='adam',
                    loss='binary_crossentropy',
                    metrics=['accuracy'])
        
        return model
    
    def run_ann_model(self):
        print("=================== 9. Artificial Neural Net Section: ===================")

        model = KerasClassifier(build_fn=self.build_ann, verbose=0)
        
        tuned_model = self.hyper_parameter_tuning(model, self.param_grid_ann)
        ann = tuned_model.best_estimator_

        train_and_accuracy_gen(ann, "9. Artificial Neuralnet", self.X_train, self.X_test, self.y_train, self.y_test)
        self.cross_validation(ann, 10)

    def driver(self):
        self.run_logistic_regression_model()
        self.run_decission_tree_classifier_model()
        self.run_random_forest_classifier_model()
        self.run_gaussian_naive_bias_classifier_model()
        self.run_support_vector_classifier_model()
        self.run_knn_classifier_model()
        self.run_ada_boost_classifier_model()
        self.run_xg_boost_classifier_model()
        # self.run_ann_model()


# == Model and scores for Window 100 & 25% Overlap ==
   

In [38]:
w100_o25_pipeline = ModelEvaluationPipeline("features/w100_o25_features.csv")
w100_o25_pipeline.run_logistic_regression_model()

==== Grid Search: =====
Best parameters found:  {'C': 100, 'max_iter': 100, 'penalty': 'l2', 'solver': 'liblinear'}
Best score found:  0.30028598976074977

==== Random Search: =====
Best parameters found:  {'solver': 'liblinear', 'penalty': 'l2', 'max_iter': 100, 'C': 100}
Best score found:  0.30028598976074977
1. Logistic regression metrics: 
              precision    recall  f1-score   support

           0       0.34      0.41      0.37      2113
           1       0.21      0.21      0.21      2267
           2       0.14      0.16      0.15      2174
           3       0.23      0.35      0.27      2224
           4       0.91      0.74      0.81      1721
           5       0.16      0.13      0.14      2222
           6       0.12      0.01      0.02      2133
           7       0.36      0.03      0.05      1649
           8       0.60      0.71      0.65      1982
           9       0.20      0.25      0.22      2113
          10       0.15      0.06      0.09      1788
     

In [39]:
w100_o25_pipeline.run_decission_tree_classifier_model()



==== Grid Search: =====
Best parameters found:  {'criterion': 'entropy', 'max_depth': 50, 'min_samples_split': 5}
Best score found:  0.5052100121775562

==== Random Search: =====
Best parameters found:  {'min_samples_split': 10, 'max_depth': None, 'criterion': 'entropy'}
Best score found:  0.504296924609449
2. Decission Tree Classifier metrics: 
              precision    recall  f1-score   support

           0       0.31      0.34      0.32      2113
           1       0.46      0.47      0.46      2267
           2       0.41      0.43      0.42      2174
           3       0.51      0.59      0.54      2224
           4       0.79      0.82      0.80      1721
           5       0.37      0.40      0.39      2222
           6       0.39      0.38      0.38      2133
           7       0.34      0.33      0.34      1649
           8       0.65      0.66      0.66      1982
           9       0.81      0.79      0.80      2113
          10       0.40      0.40      0.40      1788
 

In [41]:
w100_o25_pipeline.run_random_forest_classifier_model()



==== Grid Search: =====
Best parameters found:  {'bootstrap': False, 'criterion': 'entropy', 'max_depth': 50, 'n_estimators': 500}
Best score found:  0.6263648828486906

==== Random Search: =====
Best parameters found:  {'n_estimators': 500, 'max_depth': 20, 'criterion': 'entropy', 'bootstrap': False}
Best score found:  0.625974105201172
3.  Random Forest Classifier metrics: 
              precision    recall  f1-score   support

           0       0.41      0.60      0.49      2113
           1       0.61      0.66      0.63      2267
           2       0.68      0.47      0.56      2174
           3       0.65      0.74      0.69      2224
           4       0.91      0.85      0.88      1721
           5       0.50      0.56      0.53      2222
           6       0.66      0.42      0.51      2133
           7       0.56      0.43      0.48      1649
           8       0.67      0.83      0.74      1982
           9       0.85      0.90      0.87      2113
          10       0.57 

In [42]:
w100_o25_pipeline.run_gaussian_naive_bias_classifier_model()



==== Grid Search: =====
Best parameters found:  {'var_smoothing': 0.0001}
Best score found:  0.22420539077611987

==== Random Search: =====
Best parameters found:  {'var_smoothing': 0.0001}
Best score found:  0.22420539077611987
4. Gaussian Naive Bias Classifier metrics: 
              precision    recall  f1-score   support

           0       0.29      0.13      0.18      2113
           1       0.23      0.14      0.17      2267
           2       0.10      0.01      0.01      2174
           3       0.17      0.01      0.03      2224
           4       0.83      0.72      0.77      1721
           5       0.13      0.08      0.10      2222
           6       0.13      0.01      0.03      2133
           7       0.12      0.03      0.04      1649
           8       0.47      0.58      0.52      1982
           9       0.13      0.03      0.04      2113
          10       0.12      0.05      0.07      1788
          11       0.17      0.07      0.10      2081
          12       0.1

In [40]:
w100_o25_pipeline.run_support_vector_classifier_model()



==== Grid Search: =====
Best parameters found:  {'C': 1000, 'gamma': 0.1, 'kernel': 'rbf'}
Best score found:  0.5385602924487439

==== Random Search: =====
Best parameters found:  {'kernel': 'rbf', 'gamma': 0.1, 'C': 1000}
Best score found:  0.5385602924487439
5. Support Vector Classifier metrics: 
              precision    recall  f1-score   support

           0       0.35      0.47      0.40      2113
           1       0.56      0.57      0.56      2267
           2       0.40      0.45      0.43      2174
           3       0.56      0.68      0.61      2224
           4       0.84      0.83      0.84      1721
           5       0.45      0.43      0.44      2222
           6       0.37      0.28      0.32      2133
           7       0.43      0.34      0.38      1649
           8       0.57      0.79      0.66      1982
           9       0.80      0.83      0.81      2113
          10       0.53      0.41      0.46      1788
          11       0.35      0.24      0.29      

In [44]:
w100_o25_pipeline.run_knn_classifier_model()



==== Grid Search: =====
Best parameters found:  {'metric': 'minkowski', 'n_neighbors': 100, 'p': 1, 'weights': 'distance'}
Best score found:  0.5031243370554471

==== Random Search: =====
Best parameters found:  {'weights': 'distance', 'p': 1, 'n_neighbors': 100, 'metric': 'minkowski'}
Best score found:  0.5031243370554471
6. K-Nearest Neighbors metrics: 
              precision    recall  f1-score   support

           0       0.39      0.28      0.32      2113
           1       0.58      0.49      0.53      2267
           2       0.60      0.43      0.50      2174
           3       0.47      0.74      0.57      2224
           4       0.91      0.74      0.81      1721
           5       0.34      0.45      0.39      2222
           6       0.72      0.34      0.46      2133
           7       0.48      0.25      0.33      1649
           8       0.55      0.65      0.59      1982
           9       0.66      0.84      0.74      2113
          10       0.52      0.30      0.38  

In [45]:
w100_o25_pipeline.run_ada_boost_classifier_model()



==== Grid Search: =====
Best parameters found:  {'estimator': DecisionTreeClassifier(max_depth=3), 'learning_rate': 0.01, 'n_estimators': 100}
Best score found:  0.3444527244086726

==== Random Search: =====
Best parameters found:  {'n_estimators': 100, 'learning_rate': 0.01, 'estimator': DecisionTreeClassifier(max_depth=3)}
Best score found:  0.3444527244086726
7. Ada Boost Classifier metrics: 
              precision    recall  f1-score   support

           0       0.27      0.62      0.37      2113
           1       0.30      0.44      0.35      2267
           2       0.57      0.03      0.06      2174
           3       0.23      0.29      0.26      2224
           4       0.00      0.00      0.00      1721
           5       0.18      0.20      0.19      2222
           6       0.23      0.00      0.00      2133
           7       0.35      0.10      0.15      1649
           8       0.38      0.76      0.51      1982
           9       0.62      0.37      0.46      2113
    

In [46]:
w100_o25_pipeline.run_xg_boost_classifier_model()



==== Grid Search: =====
Best parameters found:  {'gamma': 0, 'learning_rate': 0.2, 'n_estimators': 200, 'subsample': 0.8}
Best score found:  0.6232385598239637

==== Random Search: =====
Best parameters found:  {'subsample': 0.8, 'n_estimators': 200, 'learning_rate': 0.2, 'gamma': 0}
Best score found:  0.6232385598239637
8. XG Boost Classifier metrics: 
              precision    recall  f1-score   support

           0       0.46      0.55      0.50      2113
           1       0.63      0.64      0.63      2267
           2       0.65      0.52      0.58      2174
           3       0.65      0.72      0.68      2224
           4       0.91      0.85      0.88      1721
           5       0.47      0.53      0.50      2222
           6       0.60      0.46      0.52      2133
           7       0.51      0.41      0.45      1649
           8       0.74      0.82      0.78      1982
           9       0.85      0.89      0.87      2113
          10       0.55      0.52      0.54    

# == Model and scores for Window 100 & 50% Overlap ==

In [48]:
w100_o50_pipeline = ModelEvaluationPipeline("features/w100_o50_features.csv")
w100_o50_pipeline.run_logistic_regression_model()

==== Grid Search: =====
Best parameters found:  {'C': 100, 'max_iter': 100, 'penalty': 'l2', 'solver': 'liblinear'}
Best score found:  0.2812825860271116

==== Random Search: =====
Best parameters found:  {'solver': 'liblinear', 'penalty': 'l2', 'max_iter': 100, 'C': 100}
Best score found:  0.2812825860271116
1. Logistic regression metrics: 
              precision    recall  f1-score   support

           0       0.32      0.40      0.36      3172
           1       0.22      0.22      0.22      3402
           2       0.11      0.09      0.10      3254
           3       0.20      0.38      0.26      3333
           4       0.93      0.75      0.83      2557
           5       0.16      0.12      0.14      3332
           6       0.17      0.01      0.02      3208
           7       0.32      0.01      0.02      2466
           8       0.62      0.72      0.66      2984
           9       0.16      0.13      0.15      3168
          10       0.15      0.05      0.07      2681
       

In [49]:
w100_o50_pipeline.run_decission_tree_classifier_model()



==== Grid Search: =====
Best parameters found:  {'criterion': 'gini', 'max_depth': 20, 'min_samples_split': 20}
Best score found:  0.5251129648939868

==== Random Search: =====
Best parameters found:  {'min_samples_split': 20, 'max_depth': 20, 'criterion': 'gini'}
Best score found:  0.5264164059784499
2. Decission Tree Classifier metrics: 
              precision    recall  f1-score   support

           0       0.37      0.42      0.40      3172
           1       0.46      0.55      0.50      3402
           2       0.47      0.42      0.45      3254
           3       0.60      0.62      0.61      3333
           4       0.80      0.80      0.80      2557
           5       0.38      0.48      0.42      3332
           6       0.48      0.41      0.44      3208
           7       0.39      0.35      0.37      2466
           8       0.61      0.65      0.63      2984
           9       0.78      0.79      0.79      3168
          10       0.44      0.42      0.43      2681
       

In [50]:
w100_o50_pipeline.run_random_forest_classifier_model()



==== Grid Search: =====
Best parameters found:  {'bootstrap': False, 'criterion': 'entropy', 'max_depth': 50, 'n_estimators': 500}
Best score found:  0.6624087591240876

==== Random Search: =====
Best parameters found:  {'n_estimators': 500, 'max_depth': 50, 'criterion': 'entropy', 'bootstrap': False}
Best score found:  0.6632777198470629
3.  Random Forest Classifier metrics: 
              precision    recall  f1-score   support

           0       0.44      0.62      0.51      3172
           1       0.63      0.73      0.67      3402
           2       0.76      0.54      0.63      3254
           3       0.73      0.76      0.75      3333
           4       0.92      0.88      0.90      2557
           5       0.56      0.61      0.59      3332
           6       0.73      0.47      0.57      3208
           7       0.60      0.49      0.54      2466
           8       0.70      0.84      0.76      2984
           9       0.87      0.91      0.89      3168
          10       0.65

In [51]:
w100_o50_pipeline.run_gaussian_naive_bias_classifier_model()



==== Grid Search: =====
Best parameters found:  {'var_smoothing': 0.0001}
Best score found:  0.24991310392770247

==== Random Search: =====
Best parameters found:  {'var_smoothing': 0.0001}
Best score found:  0.24991310392770247
4. Gaussian Naive Bias Classifier metrics: 
              precision    recall  f1-score   support

           0       0.30      0.16      0.21      3172
           1       0.23      0.13      0.16      3402
           2       0.22      0.02      0.03      3254
           3       0.19      0.55      0.28      3333
           4       0.85      0.73      0.78      2557
           5       0.13      0.09      0.10      3332
           6       0.21      0.02      0.03      3208
           7       0.09      0.03      0.04      2466
           8       0.49      0.59      0.53      2984
           9       0.18      0.02      0.04      3168
          10       0.11      0.06      0.07      2681
          11       0.19      0.05      0.08      3122
          12       0.1

In [52]:
w100_o50_pipeline.run_support_vector_classifier_model()



==== Grid Search: =====
Best parameters found:  {'C': 1000, 'gamma': 0.1, 'kernel': 'rbf'}
Best score found:  0.5652589502954467

==== Random Search: =====
Best parameters found:  {'kernel': 'rbf', 'gamma': 0.1, 'C': 1000}
Best score found:  0.5652589502954467
5. Support Vector Classifier metrics: 
              precision    recall  f1-score   support

           0       0.37      0.48      0.42      3172
           1       0.59      0.60      0.60      3402
           2       0.41      0.39      0.40      3254
           3       0.59      0.70      0.64      3333
           4       0.84      0.83      0.84      2557
           5       0.48      0.49      0.49      3332
           6       0.39      0.32      0.35      3208
           7       0.47      0.40      0.44      2466
           8       0.58      0.80      0.67      2984
           9       0.79      0.87      0.83      3168
          10       0.60      0.44      0.51      2681
          11       0.44      0.28      0.34      

In [53]:
w100_o50_pipeline.run_knn_classifier_model()



==== Grid Search: =====
Best parameters found:  {'metric': 'minkowski', 'n_neighbors': 100, 'p': 1, 'weights': 'distance'}
Best score found:  0.5222453945081682

==== Random Search: =====
Best parameters found:  {'weights': 'distance', 'p': 1, 'n_neighbors': 100, 'metric': 'minkowski'}
Best score found:  0.5222453945081682
6. K-Nearest Neighbors metrics: 
              precision    recall  f1-score   support

           0       0.39      0.30      0.34      3172
           1       0.53      0.52      0.53      3402
           2       0.64      0.46      0.53      3254
           3       0.49      0.73      0.58      3333
           4       0.92      0.77      0.84      2557
           5       0.37      0.47      0.41      3332
           6       0.73      0.35      0.47      3208
           7       0.52      0.27      0.36      2466
           8       0.58      0.68      0.63      2984
           9       0.68      0.88      0.77      3168
          10       0.61      0.37      0.46  

In [None]:
w100_o50_pipeline.run_ada_boost_classifier_model()

In [None]:
w100_o50_pipeline.run_xg_boost_classifier_model()

# == Model and scores for Window 200 & 25% Overlap ==

In [None]:
w200_o25_pipeline = ModelEvaluationPipeline("features/w200_o25_features.csv")

In [None]:
w200_o25_pipeline.run_logistic_regression_model()

In [None]:
w200_o25_pipeline.run_random_forest_classifier_model()

In [None]:
w200_o25_pipeline.run_decission_tree_classifier_model()

In [None]:
w200_o25_pipeline.run_gaussian_naive_bias_classifier_model()

In [None]:
w200_o25_pipeline.run_knn_classifier_model()

In [None]:
w200_o25_pipeline.run_ada_boost_classifier_model()

In [None]:
w200_o25_pipeline.run_xg_boost_classifier_model()

# == Model and scores for Window 200 & 50% Overlap ==

In [None]:
model_evaluation_pipeline = ModelEvaluationPipeline("features/w200_o50_features.csv", "Window 200 & 50% Overlap")
model_evaluation_pipeline.driver()

In [None]:
model_evaluation_pipeline = ModelEvaluationPipeline("features/w300_o25_features.csv", "Window 300 & 25% Overlap")
model_evaluation_pipeline.driver()

In [None]:
model_evaluation_pipeline = ModelEvaluationPipeline("features/w300_o50_features.csv", "Window 300 & 50% Overlap")
model_evaluation_pipeline.driver()

In [None]:
model_evaluation_pipeline = ModelEvaluationPipeline("features/w400_o25_features.csv", "Window 400 & 25% Overlap")
model_evaluation_pipeline.driver()

In [None]:
model_evaluation_pipeline = ModelEvaluationPipeline("features/w400_o50_features.csv", "Window 400 & 50% Overlap")
model_evaluation_pipeline.driver()

In [None]:
model_evaluation_pipeline = ModelEvaluationPipeline("features/w500_o25_features.csv", "Window 500 & 25% Overlap")
model_evaluation_pipeline.driver()

In [None]:
model_evaluation_pipeline = ModelEvaluationPipeline("features/w500_o50_features.csv", "Window 500 & 50% Overlap")
model_evaluation_pipeline.driver()