## Class Project Final Progress & Bonus section at the last

In [111]:
import pandas as pd
import numpy as np

from sklearn.exceptions import ConvergenceWarning
import warnings

from sklearn.ensemble import IsolationForest

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier

from keras.models import Sequential
from keras.layers import Dense
from tensorflow.keras.utils import to_categorical

from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from xgboost import XGBClassifier
from scikeras.wrappers import KerasClassifier

# Ensemble Methods
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier

from sklearn.decomposition import PCA

from sklearn import metrics
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score

from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.model_selection import StratifiedKFold

from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV

import os
import math

from tsfresh.feature_extraction import extract_features
from tsfresh.feature_extraction import MinimalFCParameters, EfficientFCParameters, ComprehensiveFCParameters
from tsfresh.utilities.dataframe_functions import impute

from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout, LSTM, SimpleRNN
from sklearn.preprocessing import StandardScaler, LabelEncoder


In [81]:
# Suppress specific warning
warnings.filterwarnings("ignore", category=ConvergenceWarning)

warnings.filterwarnings("ignore")

os.environ["PYTHONWARNINGS"] = "ignore"

In [82]:
# Set max output lines before scrolling
from IPython.core.interactiveshell import InteractiveShell

InteractiveShell.instance().display_formatter.formatters['text/plain'].for_type(
    type, lambda obj, p, cycle: p.text(repr(obj)[:10000])
)

<function __main__.<lambda>(obj, p, cycle)>

# Different models on different winodows and different overlaps

### Metrics Generation

In [122]:
#Metric Calculations

def calculate_metrics(classifier, y_val, y_pred):
    print(f"{classifier} metrics: ")

    print(classification_report(y_val, y_pred))

In [84]:
def train_and_accuracy_gen(model, label, X_train, X_test, y_train, y_test):
    model.fit(X_train, y_train)
    calculate_metrics(label, y_test, model.predict(X_test))

In [132]:
class ModelEvaluationPipeline:

    param_grid_logistic_regression = {
        'C': [0.01, 1, 10, 100],
        'solver': ['lbfgs', 'liblinear', 'saga'],
        'penalty': ['l2'],
        'max_iter': [100, 500, 1000]
    }

    param_grid_decission_tree_classifier = {
        'max_depth': [None, 5, 20, 50],
        'min_samples_split': [2, 5, 10, 20],
        'criterion': ['gini', 'entropy'],
    }

    param_grid_random_forest_classifier = {
        'n_estimators': [100, 200, 500],
        'max_depth': [None, 10, 20, 50],
        'bootstrap': [True, False],
        'criterion': ['gini', 'entropy']
    }

    param_grid_gaussian_naive_bias = {
        'var_smoothing': [1e-9, 1e-8, 1e-7, 1e-6, 1e-5, 1e-4]
    }

    param_grid_svc = {
        'C': [0.1, 1, 10, 100, 1000],
        'gamma': [1, 0.1, 0.01, 0.001, 0.0001],
        'kernel': ['rbf', 'poly']
    }

    param_grid_knn = {
        'n_neighbors': [100, 500, 700, 900, 1100, 1500],
        'weights': ['uniform', 'distance'],
        'metric': ['minkowski'],
        'p': [1, 2]
    }

    param_grid_ada_boost = {
        'n_estimators': [50, 100, 200],
        'learning_rate': [0.01, 0.5, 1.0],
        'estimator': [
            DecisionTreeClassifier(max_depth=1),
            DecisionTreeClassifier(max_depth=3)
        ],
    }

    param_grid_xgb = {
        'n_estimators': [50, 100, 200],
        'learning_rate': [0.01, 0.1, 0.2], 
        'subsample': [0.6, 0.8, 1.0],     
        'gamma': [0, 0.1, 0.3, 0.5],           
    }

    param_grid_grad_boost = {
      'n_estimators': [50],
      'learning_rate': [0.01, 0.1],
      'max_depth': [3, 5],
      'random_state': [42]
    }

    param_grid_ann = {
        'model__n_neurons': [64],
        'model__activation': ['relu', 'tanh'],
        'epochs': [100, 150],
        'batch_size': [50, 100]
    }

    def __init__(self, file_path):
        self.feature_path = file_path
        self.feature_df = self.get_feture()
        self.X, self.y = self.split_feture_and_target()
        self.y = self.map_zero_to_n() # mapping y zero to number of class to make it usable for some modles i.e. xgaboost
        self.number_of_categories = self.get_number_of_categories()
        self.X_train, self.X_test, self.y_train, self.y_test = self.get_scale_and_test_train_split()
        self.X_train_st_user, self.X_test_st_user, self.y_train_st_user, self.y_test_st_user = self.get_scale_and_test_train_split_based_on_user()

    # data read and processing section
    def remove_outliear(self, feature_df):
        iso = IsolationForest(contamination=0.01, random_state=42)
        outliers = iso.fit_predict(feature_df)
        data_cleaned = feature_df[outliers == 1]

        return data_cleaned

    def get_feture(self):
        feature_df = pd.read_csv(self.feature_path)
        feature_df = feature_df.iloc[:, 1:] # remove index
        
        return self.remove_outliear(feature_df)

    def split_feture_and_target(self):
        X = self.feature_df.iloc[:, :-2]
        y = self.feature_df.iloc[:, -1]
        
        return X, y

    def get_scale_and_test_train_split(self):
        #Scaling
        scaler = StandardScaler()
        scaled_fature = scaler.fit_transform(self.X)
        
        #test train split
        
        return train_test_split(scaled_fature, self.y, train_size=.20, random_state=42, stratify=self.y)
    
    def get_scale_and_test_train_split_based_on_user(self):
        #Scaling
        scaler = StandardScaler()
        scaled_fature = scaler.fit_transform(self.X)
        
        #test train split
        
        return train_test_split(scaled_fature, self.y, train_size=.30, random_state=42, stratify=self.feature_df.iloc[:, -2])
    
    def map_zero_to_n(self):
        unique_values = {val: idx for idx, val in enumerate(self.y.unique())}
        y_mapped = self.y.map(unique_values)

        return y_mapped
    
    def get_number_of_categories(self):
        return len(self.y.unique())

    def onehot_encode(self):
        self.y_train = to_categorical(self.y_train, num_classes = self.number_of_categories)
    
    # Cross validation section 
    def kfold_cross_validation(self, model, n_splits):
        kf = KFold(n_splits=n_splits, shuffle=True, random_state=42)
        kfold_score = cross_val_score(model, self.X, self.y, cv=kf)

        print("K-fold cross validaiton scores:", kfold_score)
        print("Average score:", np.mean(kfold_score))


    def stratified_cross_validation(self, model, n_splits):
        skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)
        skfold_score = cross_val_score(model, self.X, self.y, cv=skf)

        print("Straified cross validation scores:", skfold_score)
        print("Average score:", np.mean(skfold_score))


    def cross_validation(self, model, n_splits):
        self.kfold_cross_validation(model, n_splits)
        self.stratified_cross_validation(model, n_splits)

    # Hyper parameter tuning

    def gridSerach(self, estimator, param_grid):
        print("==== Grid Search: =====")

        grid_search = GridSearchCV(estimator=estimator, param_grid=param_grid, cv=3, verbose=0)
        grid_search.fit(self.X_train, self.y_train)

        print("Best parameters found: ", grid_search.best_params_)
        print("Best score found: ", grid_search.best_score_)

        return grid_search
    
    def randomSearch(self, estimator, param_grid):
        print("\n==== Random Search: =====")

        random_search = RandomizedSearchCV(estimator=estimator, param_distributions=param_grid, n_iter=500, cv=3, random_state=42)
        random_search.fit(self.X_train, self.y_train)

        print("Best parameters found: ", random_search.best_params_)
        print("Best score found: ", random_search.best_score_)

        return random_search
    
    def hyper_parameter_tuning(self, model, param_grid):
        grid_search = self.gridSerach(model, param_grid)
        random_search = self.randomSearch(model, param_grid)

        return grid_search if grid_search.best_score_ > random_search.best_score_ else random_search
    
    # Models section
    def run_logistic_regression_model(self):
        print("=============== 1. Logistic Regression Section: ==================")

        tuned_model = self.hyper_parameter_tuning(LogisticRegression(), self.param_grid_logistic_regression)
        lrm = tuned_model.best_estimator_

        train_and_accuracy_gen(lrm, "1. Logistic regression", self.X_train, self.X_test, self.y_train, self.y_test)
        self.cross_validation(lrm, 10)
    
    def run_decission_tree_classifier_model(self):
        print("=================2. Decission Tree Classifier Section: ================")

        tuned_model = self.hyper_parameter_tuning(DecisionTreeClassifier(), self.param_grid_decission_tree_classifier)
        dt = tuned_model.best_estimator_

        train_and_accuracy_gen(dt, "2. Decission Tree Classifier", self.X_train, self.X_test, self.y_train, self.y_test)
        self.cross_validation(dt, 10)

    def run_random_forest_classifier_model(self):
        print("=================== 3. Random Forest Classifier Section: ==================")

        tuned_model = self.hyper_parameter_tuning(RandomForestClassifier(), self.param_grid_random_forest_classifier)
        rfc = tuned_model.best_estimator_

        train_and_accuracy_gen(rfc, "3.  Random Forest Classifier", self.X_train, self.X_test, self.y_train, self.y_test)
        self.cross_validation(rfc, 10)

    def run_gaussian_naive_bias_classifier_model(self):
        print("=================== 4. Gaussian Naive Bias Classifier Section: ===================")

        tuned_model = self.hyper_parameter_tuning(GaussianNB(), self.param_grid_gaussian_naive_bias)
        gnb = tuned_model.best_estimator_

        train_and_accuracy_gen(gnb, "4. Gaussian Naive Bias Classifier", self.X_train, self.X_test, self.y_train, self.y_test)
        self.cross_validation(gnb, 10)


    def run_support_vector_classifier_model(self):
        print("=================== 5. Support Vector Classifier Section: ===================")

        tuned_model = self.hyper_parameter_tuning(SVC(), self.param_grid_svc)
        svc = tuned_model.best_estimator_

        train_and_accuracy_gen(svc, "5. Support Vector Classifier", self.X_train, self.X_test, self.y_train, self.y_test)
        self.cross_validation(svc, 10)


    def run_knn_classifier_model(self):
        print("=================== 6. K-Nearest Neighbors Classifier Section: ===================")

        tuned_model = self.hyper_parameter_tuning(KNeighborsClassifier(), self.param_grid_knn)
        knn = tuned_model.best_estimator_

        train_and_accuracy_gen(knn, "6. K-Nearest Neighbors", self.X_train, self.X_test, self.y_train, self.y_test)
        self.cross_validation(knn, 10)

    def run_ada_boost_classifier_model(self):
        print("=================== 7. Ada Boost Classifier Section: ===================")

        tuned_model = self.hyper_parameter_tuning(AdaBoostClassifier(), self.param_grid_ada_boost)
        abc = tuned_model.best_estimator_

        train_and_accuracy_gen(abc, "7. Ada Boost Classifier", self.X_train, self.X_test, self.y_train, self.y_test)
        self.cross_validation(abc, 10)

    def run_xg_boost_classifier_model(self):
        print("=================== 8. XG Boost Classifier Section: ===================")

        tuned_model = self.hyper_parameter_tuning(XGBClassifier(), self.param_grid_xgb)
        xgb = tuned_model.best_estimator_

        train_and_accuracy_gen(xgb, "8. XG Boost Classifier", self.X_train, self.X_test, self.y_train, self.y_test)
        self.cross_validation(xgb, 10)

    def run_gradient_boost_classifier_model(self):
        print("=================== 9. Gradient Boost Classifier Section: ===================")

        tuned_model = self.hyper_parameter_tuning(GradientBoostingClassifier(), self.param_grid_grad_boost)
        gb = tuned_model.best_estimator_

        train_and_accuracy_gen(gb, "9. Gradient Boost Classifier", self.X_train, self.X_test, self.y_train, self.y_test)
        self.cross_validation(gb, 5)

    # ANN section

    def ann_kfold_cross_validation(self, model, n_splits=2, epochs=50, batch_size=100):
      kf = KFold(n_splits=n_splits, shuffle=True, random_state=42)
      scores = []

      X = self.X
      y = self.y
      for train_index, val_index in kf.split(X):
          X_train, X_val = X.iloc[train_index], X.iloc[val_index]
          y_train, y_val = y.iloc[train_index], y.iloc[val_index]

          y_train = to_categorical(y_train, num_classes=self.number_of_categories)

          model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, verbose=0)

          # Evaluate the model
          y_val_pred = np.argmax(model.predict(X_val), axis=1)  # Convert probabilities to class labels
          accuracy = accuracy_score(y_val, y_val_pred)
          scores.append(accuracy)

      print("K-fold cross-validation scores:", scores)
      print("Average score:", np.mean(scores))

    def ann_stratified_cross_validation(self, model, n_splits=2, epochs=50, batch_size=100):
        skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)
        scores = []

        X = self.X
        y = self.y

        for train_index, val_index in skf.split(self.X, self.y):
            X_train, X_val = X.iloc[train_index], X.iloc[val_index]
            y_train, y_val = y.iloc[train_index], y.iloc[val_index]

            y_train = to_categorical(y_train, num_classes=self.number_of_categories)

            model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, verbose=0)

            # Evaluate the model
            y_val_pred = np.argmax(model.predict(X_val), axis=1)  # Convert probabilities to class labels
            accuracy = accuracy_score(y_val, y_val_pred)
            scores.append(accuracy)

        print("Stratified cross-validation scores:", scores)
        print("Average score:", np.mean(scores))

    def ann_cross_validation(self, model, n_splits=2, epochs=50, batch_size=100):
        self.ann_kfold_cross_validation(model, n_splits, epochs, batch_size)
        self.ann_stratified_cross_validation(model, n_splits, epochs, batch_size)

    @staticmethod
    def build_ann(n_neurons=64, activation='relu'):
        model = Sequential()
        # Input layer
        model.add(Dense(n_neurons, activation=activation, input_shape=(24,)))

        model.add(Dense(n_neurons, activation=activation))
        model.add(Dense(n_neurons, activation=activation))
        model.add(Dense(n_neurons, activation=activation))

        # Output layer (example for binary classification)
        model.add(Dense(units=15, activation='softmax'))

        model.compile(optimizer='adam',
                    loss='categorical_crossentropy',
                    metrics=['accuracy'])

        return model

    def run_ann_model(self):
        print("=================== 10. Artificial Neural Net Section: ===================")

        y_train_tmp = self.y_train

        self.onehot_encode()

        model = KerasClassifier(build_fn=self.build_ann, verbose=0, epochs = 50, batch_size = 100)

        tuned_model = self.hyper_parameter_tuning(model, self.param_grid_ann)
        ann = tuned_model.best_estimator_

        ann.fit(self.X_train, self.y_train)
        y_pred = ann.predict(self.X_test)
        y_pred_classes = np.argmax(y_pred, axis=1)

        calculate_metrics("10. Artificial Neuralnet", self.y_test, y_pred_classes)
        self.ann_cross_validation(ann)

        self.y_train = y_train_tmp


    def run_cnn_model(self):
        print("=================== Convolutional Neural Net Section: ===================")

        n_features = self.X.shape[1]
        n_classes = self.number_of_categories

        X = self.X
        y = self.y

        y_categorical = to_categorical(y, num_classes=n_classes)

        
        scaler = StandardScaler()
        X_scaled = scaler.fit_transform(X)

        # Reshape the data for Conv1D (samples, time steps, channels)
        X_scaled = X_scaled.reshape((X_scaled.shape[0], X_scaled.shape[1], 1))

        # Split the data into training and testing sets
        X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_categorical, test_size=0.2, random_state=42)

        
        model = Sequential([
            Conv1D(filters=64, kernel_size=2, activation='relu', input_shape=(n_features, 1)),
            MaxPooling1D(pool_size=2),
            Conv1D(filters=64, kernel_size=2, activation='relu'),
            MaxPooling1D(pool_size=2),
            Flatten(),               
            Dense(128, activation='relu'),
            Dropout(0.5),
            Dense(n_classes, activation='softmax')
        ])

        model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

        print("Model Summary:")
        model.summary()

       
        history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.1, verbose=1)

        test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=1)

        print(f"\nTest Accuracy: {test_accuracy * 100:.2f}%")


    def run_lstm_model(self):
        print("=================== Long Short-Term Memory (LSTM) Section: ===================")

        n_features = self.X.shape[1] 
        n_classes = self.number_of_categories 

        X = self.X
        y = self.y

        y_categorical = to_categorical(y, num_classes=n_classes)

        scaler = StandardScaler()
        X_scaled = scaler.fit_transform(X)

        X_scaled = X_scaled.reshape((X_scaled.shape[0], 1, n_features))

        X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_categorical, test_size=0.2, random_state=42)

        model = Sequential([
            LSTM(64, activation='tanh', input_shape=(1, n_features), return_sequences=True),
            Dropout(0.3),
            LSTM(64, activation='tanh', return_sequences=False),
            Dropout(0.5),
            Dense(128, activation='relu'),
            Dropout(0.5),
            Dense(n_classes, activation='softmax')
        ])

        model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

        print("Model Summary:")
        model.summary()

        history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.1, verbose=1)

        test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=1)

        print(f"\nTest Accuracy: {test_accuracy * 100:.2f}%")

        
    def run_rnn_model(self):
        print("=================== Recurrent Neural Network (RNN) Section: ===================")

        n_features = self.X.shape[1]  
        n_classes = self.number_of_categories  

        X = self.X
        y = self.y

        y_categorical = to_categorical(y, num_classes=n_classes)

        scaler = StandardScaler()
        X_scaled = scaler.fit_transform(X)

        X_scaled = X_scaled.reshape((X_scaled.shape[0], 1, n_features))

        X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_categorical, test_size=0.2, random_state=42)

        model = Sequential([
            SimpleRNN(64, activation='tanh', input_shape=(1, n_features), return_sequences=True),
            Dropout(0.3),
            SimpleRNN(64, activation='tanh', return_sequences=False),
            Dropout(0.5),
            Dense(128, activation='relu'),
            Dropout(0.5),
            Dense(n_classes, activation='softmax')
        ])

        model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

        print("Model Summary:")
        model.summary()

        history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.1, verbose=1)

        test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=1)

        print(f"\nTest Accuracy: {test_accuracy * 100:.2f}%")

    def driver(self):
        self.run_logistic_regression_model()
        self.run_decission_tree_classifier_model()
        self.run_random_forest_classifier_model()
        self.run_gaussian_naive_bias_classifier_model()
        self.run_support_vector_classifier_model()
        self.run_knn_classifier_model()
        self.run_ada_boost_classifier_model()
        self.run_xg_boost_classifier_model()
        self.run_ann_model()


# == Model and scores for Window 100 & 25% Overlap ==
   

In [150]:
w100_o25_pipeline = ModelEvaluationPipeline("features/w100_o25_features.csv")

In [None]:
w100_o25_pipeline.run_logistic_regression_model()

==== Grid Search: =====
Best parameters found:  {'C': 100, 'max_iter': 100, 'penalty': 'l2', 'solver': 'liblinear'}
Best score found:  0.30028598976074977

==== Random Search: =====
Best parameters found:  {'solver': 'liblinear', 'penalty': 'l2', 'max_iter': 100, 'C': 100}
Best score found:  0.30028598976074977
1. Logistic regression metrics: 
              precision    recall  f1-score   support

           0       0.34      0.41      0.37      2113
           1       0.21      0.21      0.21      2267
           2       0.14      0.16      0.15      2174
           3       0.23      0.35      0.27      2224
           4       0.91      0.74      0.81      1721
           5       0.16      0.13      0.14      2222
           6       0.12      0.01      0.02      2133
           7       0.36      0.03      0.05      1649
           8       0.60      0.71      0.65      1982
           9       0.20      0.25      0.22      2113
          10       0.15      0.06      0.09      1788
     

In [39]:
w100_o25_pipeline.run_decission_tree_classifier_model()



==== Grid Search: =====
Best parameters found:  {'criterion': 'entropy', 'max_depth': 50, 'min_samples_split': 5}
Best score found:  0.5052100121775562

==== Random Search: =====
Best parameters found:  {'min_samples_split': 10, 'max_depth': None, 'criterion': 'entropy'}
Best score found:  0.504296924609449
2. Decission Tree Classifier metrics: 
              precision    recall  f1-score   support

           0       0.31      0.34      0.32      2113
           1       0.46      0.47      0.46      2267
           2       0.41      0.43      0.42      2174
           3       0.51      0.59      0.54      2224
           4       0.79      0.82      0.80      1721
           5       0.37      0.40      0.39      2222
           6       0.39      0.38      0.38      2133
           7       0.34      0.33      0.34      1649
           8       0.65      0.66      0.66      1982
           9       0.81      0.79      0.80      2113
          10       0.40      0.40      0.40      1788
 

In [41]:
w100_o25_pipeline.run_random_forest_classifier_model()



==== Grid Search: =====
Best parameters found:  {'bootstrap': False, 'criterion': 'entropy', 'max_depth': 50, 'n_estimators': 500}
Best score found:  0.6263648828486906

==== Random Search: =====
Best parameters found:  {'n_estimators': 500, 'max_depth': 20, 'criterion': 'entropy', 'bootstrap': False}
Best score found:  0.625974105201172
3.  Random Forest Classifier metrics: 
              precision    recall  f1-score   support

           0       0.41      0.60      0.49      2113
           1       0.61      0.66      0.63      2267
           2       0.68      0.47      0.56      2174
           3       0.65      0.74      0.69      2224
           4       0.91      0.85      0.88      1721
           5       0.50      0.56      0.53      2222
           6       0.66      0.42      0.51      2133
           7       0.56      0.43      0.48      1649
           8       0.67      0.83      0.74      1982
           9       0.85      0.90      0.87      2113
          10       0.57 

In [42]:
w100_o25_pipeline.run_gaussian_naive_bias_classifier_model()



==== Grid Search: =====
Best parameters found:  {'var_smoothing': 0.0001}
Best score found:  0.22420539077611987

==== Random Search: =====
Best parameters found:  {'var_smoothing': 0.0001}
Best score found:  0.22420539077611987
4. Gaussian Naive Bias Classifier metrics: 
              precision    recall  f1-score   support

           0       0.29      0.13      0.18      2113
           1       0.23      0.14      0.17      2267
           2       0.10      0.01      0.01      2174
           3       0.17      0.01      0.03      2224
           4       0.83      0.72      0.77      1721
           5       0.13      0.08      0.10      2222
           6       0.13      0.01      0.03      2133
           7       0.12      0.03      0.04      1649
           8       0.47      0.58      0.52      1982
           9       0.13      0.03      0.04      2113
          10       0.12      0.05      0.07      1788
          11       0.17      0.07      0.10      2081
          12       0.1

In [40]:
w100_o25_pipeline.run_support_vector_classifier_model()



==== Grid Search: =====
Best parameters found:  {'C': 1000, 'gamma': 0.1, 'kernel': 'rbf'}
Best score found:  0.5385602924487439

==== Random Search: =====
Best parameters found:  {'kernel': 'rbf', 'gamma': 0.1, 'C': 1000}
Best score found:  0.5385602924487439
5. Support Vector Classifier metrics: 
              precision    recall  f1-score   support

           0       0.35      0.47      0.40      2113
           1       0.56      0.57      0.56      2267
           2       0.40      0.45      0.43      2174
           3       0.56      0.68      0.61      2224
           4       0.84      0.83      0.84      1721
           5       0.45      0.43      0.44      2222
           6       0.37      0.28      0.32      2133
           7       0.43      0.34      0.38      1649
           8       0.57      0.79      0.66      1982
           9       0.80      0.83      0.81      2113
          10       0.53      0.41      0.46      1788
          11       0.35      0.24      0.29      

In [44]:
w100_o25_pipeline.run_knn_classifier_model()



==== Grid Search: =====
Best parameters found:  {'metric': 'minkowski', 'n_neighbors': 100, 'p': 1, 'weights': 'distance'}
Best score found:  0.5031243370554471

==== Random Search: =====
Best parameters found:  {'weights': 'distance', 'p': 1, 'n_neighbors': 100, 'metric': 'minkowski'}
Best score found:  0.5031243370554471
6. K-Nearest Neighbors metrics: 
              precision    recall  f1-score   support

           0       0.39      0.28      0.32      2113
           1       0.58      0.49      0.53      2267
           2       0.60      0.43      0.50      2174
           3       0.47      0.74      0.57      2224
           4       0.91      0.74      0.81      1721
           5       0.34      0.45      0.39      2222
           6       0.72      0.34      0.46      2133
           7       0.48      0.25      0.33      1649
           8       0.55      0.65      0.59      1982
           9       0.66      0.84      0.74      2113
          10       0.52      0.30      0.38  

In [45]:
w100_o25_pipeline.run_ada_boost_classifier_model()



==== Grid Search: =====
Best parameters found:  {'estimator': DecisionTreeClassifier(max_depth=3), 'learning_rate': 0.01, 'n_estimators': 100}
Best score found:  0.3444527244086726

==== Random Search: =====
Best parameters found:  {'n_estimators': 100, 'learning_rate': 0.01, 'estimator': DecisionTreeClassifier(max_depth=3)}
Best score found:  0.3444527244086726
7. Ada Boost Classifier metrics: 
              precision    recall  f1-score   support

           0       0.27      0.62      0.37      2113
           1       0.30      0.44      0.35      2267
           2       0.57      0.03      0.06      2174
           3       0.23      0.29      0.26      2224
           4       0.00      0.00      0.00      1721
           5       0.18      0.20      0.19      2222
           6       0.23      0.00      0.00      2133
           7       0.35      0.10      0.15      1649
           8       0.38      0.76      0.51      1982
           9       0.62      0.37      0.46      2113
    

In [46]:
w100_o25_pipeline.run_xg_boost_classifier_model()



==== Grid Search: =====
Best parameters found:  {'gamma': 0, 'learning_rate': 0.2, 'n_estimators': 200, 'subsample': 0.8}
Best score found:  0.6232385598239637

==== Random Search: =====
Best parameters found:  {'subsample': 0.8, 'n_estimators': 200, 'learning_rate': 0.2, 'gamma': 0}
Best score found:  0.6232385598239637
8. XG Boost Classifier metrics: 
              precision    recall  f1-score   support

           0       0.46      0.55      0.50      2113
           1       0.63      0.64      0.63      2267
           2       0.65      0.52      0.58      2174
           3       0.65      0.72      0.68      2224
           4       0.91      0.85      0.88      1721
           5       0.47      0.53      0.50      2222
           6       0.60      0.46      0.52      2133
           7       0.51      0.41      0.45      1649
           8       0.74      0.82      0.78      1982
           9       0.85      0.89      0.87      2113
          10       0.55      0.52      0.54    

In [153]:
w100_o25_pipeline.run_gradient_boost_classifier_model()

==== Grid Search: =====
Best parameters found:  {'learning_rate': 0.1, 'max_depth': 5, 'n_estimators': 50, 'random_state': 42}
Best score found:  0.583505017831458

==== Random Search: =====
Best parameters found:  {'random_state': 42, 'n_estimators': 50, 'max_depth': 5, 'learning_rate': 0.1}
Best score found:  0.583505017831458
9. Gradient Boost Classifier metrics: 
              precision    recall  f1-score   support

           0       0.39      0.56      0.46      2113
           1       0.60      0.64      0.62      2267
           2       0.62      0.40      0.49      2174
           3       0.60      0.70      0.65      2224
           4       0.89      0.83      0.86      1721
           5       0.44      0.51      0.47      2222
           6       0.63      0.38      0.48      2133
           7       0.50      0.39      0.44      1649
           8       0.68      0.78      0.73      1982
           9       0.83      0.89      0.86      2113
          10       0.52      0.47  

In [151]:
w100_o25_pipeline.run_ann_model()

==== Grid Search: =====
Best parameters found:  {'batch_size': 100, 'epochs': 150, 'model__activation': 'tanh', 'model__n_neurons': 64}
Best score found:  0.5562777863836447

==== Random Search: =====
Best parameters found:  {'model__n_neurons': 64, 'model__activation': 'tanh', 'epochs': 100, 'batch_size': 50}
Best score found:  0.5574518506840341
10. Artificial Neuralnet metrics: 
              precision    recall  f1-score   support

           0       0.41      0.46      0.43      2113
           1       0.55      0.62      0.58      2267
           2       0.47      0.35      0.40      2174
           3       0.60      0.67      0.64      2224
           4       0.83      0.83      0.83      1721
           5       0.50      0.43      0.46      2222
           6       0.41      0.41      0.41      2133
           7       0.39      0.41      0.40      1649
           8       0.73      0.71      0.72      1982
           9       0.77      0.86      0.81      2113
          10       0

# == Model and scores for Window 100 & 50% Overlap ==

In [154]:
w100_o50_pipeline = ModelEvaluationPipeline("features/w100_o50_features.csv")

In [None]:
w100_o50_pipeline.run_logistic_regression_model()

==== Grid Search: =====
Best parameters found:  {'C': 100, 'max_iter': 100, 'penalty': 'l2', 'solver': 'liblinear'}
Best score found:  0.2812825860271116

==== Random Search: =====
Best parameters found:  {'solver': 'liblinear', 'penalty': 'l2', 'max_iter': 100, 'C': 100}
Best score found:  0.2812825860271116
1. Logistic regression metrics: 
              precision    recall  f1-score   support

           0       0.32      0.40      0.36      3172
           1       0.22      0.22      0.22      3402
           2       0.11      0.09      0.10      3254
           3       0.20      0.38      0.26      3333
           4       0.93      0.75      0.83      2557
           5       0.16      0.12      0.14      3332
           6       0.17      0.01      0.02      3208
           7       0.32      0.01      0.02      2466
           8       0.62      0.72      0.66      2984
           9       0.16      0.13      0.15      3168
          10       0.15      0.05      0.07      2681
       

In [49]:
w100_o50_pipeline.run_decission_tree_classifier_model()



==== Grid Search: =====
Best parameters found:  {'criterion': 'gini', 'max_depth': 20, 'min_samples_split': 20}
Best score found:  0.5251129648939868

==== Random Search: =====
Best parameters found:  {'min_samples_split': 20, 'max_depth': 20, 'criterion': 'gini'}
Best score found:  0.5264164059784499
2. Decission Tree Classifier metrics: 
              precision    recall  f1-score   support

           0       0.37      0.42      0.40      3172
           1       0.46      0.55      0.50      3402
           2       0.47      0.42      0.45      3254
           3       0.60      0.62      0.61      3333
           4       0.80      0.80      0.80      2557
           5       0.38      0.48      0.42      3332
           6       0.48      0.41      0.44      3208
           7       0.39      0.35      0.37      2466
           8       0.61      0.65      0.63      2984
           9       0.78      0.79      0.79      3168
          10       0.44      0.42      0.43      2681
       

In [50]:
w100_o50_pipeline.run_random_forest_classifier_model()



==== Grid Search: =====
Best parameters found:  {'bootstrap': False, 'criterion': 'entropy', 'max_depth': 50, 'n_estimators': 500}
Best score found:  0.6624087591240876

==== Random Search: =====
Best parameters found:  {'n_estimators': 500, 'max_depth': 50, 'criterion': 'entropy', 'bootstrap': False}
Best score found:  0.6632777198470629
3.  Random Forest Classifier metrics: 
              precision    recall  f1-score   support

           0       0.44      0.62      0.51      3172
           1       0.63      0.73      0.67      3402
           2       0.76      0.54      0.63      3254
           3       0.73      0.76      0.75      3333
           4       0.92      0.88      0.90      2557
           5       0.56      0.61      0.59      3332
           6       0.73      0.47      0.57      3208
           7       0.60      0.49      0.54      2466
           8       0.70      0.84      0.76      2984
           9       0.87      0.91      0.89      3168
          10       0.65

In [51]:
w100_o50_pipeline.run_gaussian_naive_bias_classifier_model()



==== Grid Search: =====
Best parameters found:  {'var_smoothing': 0.0001}
Best score found:  0.24991310392770247

==== Random Search: =====
Best parameters found:  {'var_smoothing': 0.0001}
Best score found:  0.24991310392770247
4. Gaussian Naive Bias Classifier metrics: 
              precision    recall  f1-score   support

           0       0.30      0.16      0.21      3172
           1       0.23      0.13      0.16      3402
           2       0.22      0.02      0.03      3254
           3       0.19      0.55      0.28      3333
           4       0.85      0.73      0.78      2557
           5       0.13      0.09      0.10      3332
           6       0.21      0.02      0.03      3208
           7       0.09      0.03      0.04      2466
           8       0.49      0.59      0.53      2984
           9       0.18      0.02      0.04      3168
          10       0.11      0.06      0.07      2681
          11       0.19      0.05      0.08      3122
          12       0.1

In [52]:
w100_o50_pipeline.run_support_vector_classifier_model()



==== Grid Search: =====
Best parameters found:  {'C': 1000, 'gamma': 0.1, 'kernel': 'rbf'}
Best score found:  0.5652589502954467

==== Random Search: =====
Best parameters found:  {'kernel': 'rbf', 'gamma': 0.1, 'C': 1000}
Best score found:  0.5652589502954467
5. Support Vector Classifier metrics: 
              precision    recall  f1-score   support

           0       0.37      0.48      0.42      3172
           1       0.59      0.60      0.60      3402
           2       0.41      0.39      0.40      3254
           3       0.59      0.70      0.64      3333
           4       0.84      0.83      0.84      2557
           5       0.48      0.49      0.49      3332
           6       0.39      0.32      0.35      3208
           7       0.47      0.40      0.44      2466
           8       0.58      0.80      0.67      2984
           9       0.79      0.87      0.83      3168
          10       0.60      0.44      0.51      2681
          11       0.44      0.28      0.34      

In [53]:
w100_o50_pipeline.run_knn_classifier_model()



==== Grid Search: =====
Best parameters found:  {'metric': 'minkowski', 'n_neighbors': 100, 'p': 1, 'weights': 'distance'}
Best score found:  0.5222453945081682

==== Random Search: =====
Best parameters found:  {'weights': 'distance', 'p': 1, 'n_neighbors': 100, 'metric': 'minkowski'}
Best score found:  0.5222453945081682
6. K-Nearest Neighbors metrics: 
              precision    recall  f1-score   support

           0       0.39      0.30      0.34      3172
           1       0.53      0.52      0.53      3402
           2       0.64      0.46      0.53      3254
           3       0.49      0.73      0.58      3333
           4       0.92      0.77      0.84      2557
           5       0.37      0.47      0.41      3332
           6       0.73      0.35      0.47      3208
           7       0.52      0.27      0.36      2466
           8       0.58      0.68      0.63      2984
           9       0.68      0.88      0.77      3168
          10       0.61      0.37      0.46  

In [54]:
w100_o50_pipeline.run_ada_boost_classifier_model()



==== Grid Search: =====
Best parameters found:  {'estimator': DecisionTreeClassifier(max_depth=3), 'learning_rate': 0.01, 'n_estimators': 200}
Best score found:  0.3435001737921446

==== Random Search: =====
Best parameters found:  {'n_estimators': 200, 'learning_rate': 0.01, 'estimator': DecisionTreeClassifier(max_depth=3)}
Best score found:  0.34358706986444215
7. Ada Boost Classifier metrics: 
              precision    recall  f1-score   support

           0       0.28      0.61      0.38      3172
           1       0.33      0.51      0.40      3402
           2       0.50      0.33      0.40      3254
           3       0.36      0.27      0.30      3333
           4       0.87      0.02      0.04      2557
           5       0.17      0.24      0.20      3332
           6       0.32      0.01      0.03      3208
           7       0.39      0.12      0.18      2466
           8       0.38      0.76      0.51      2984
           9       0.74      0.13      0.22      3168
   

In [55]:
w100_o50_pipeline.run_xg_boost_classifier_model()



==== Grid Search: =====
Best parameters found:  {'gamma': 0, 'learning_rate': 0.1, 'n_estimators': 200, 'subsample': 0.8}
Best score found:  0.651372957942301

==== Random Search: =====
Best parameters found:  {'subsample': 0.8, 'n_estimators': 200, 'learning_rate': 0.1, 'gamma': 0}
Best score found:  0.651372957942301
8. XG Boost Classifier metrics: 
              precision    recall  f1-score   support

           0       0.46      0.60      0.52      3172
           1       0.65      0.70      0.67      3402
           2       0.69      0.55      0.61      3254
           3       0.69      0.76      0.72      3333
           4       0.92      0.89      0.90      2557
           5       0.52      0.58      0.55      3332
           6       0.68      0.46      0.55      3208
           7       0.57      0.49      0.53      2466
           8       0.77      0.84      0.80      2984
           9       0.85      0.92      0.88      3168
          10       0.61      0.57      0.59      

In [155]:
w100_o50_pipeline.run_gradient_boost_classifier_model()

==== Grid Search: =====
Best parameters found:  {'learning_rate': 0.1, 'max_depth': 5, 'n_estimators': 50, 'random_state': 42}
Best score found:  0.6052311435523113

==== Random Search: =====
Best parameters found:  {'random_state': 42, 'n_estimators': 50, 'max_depth': 5, 'learning_rate': 0.1}
Best score found:  0.6052311435523113
9. Gradient Boost Classifier metrics: 
              precision    recall  f1-score   support

           0       0.40      0.56      0.47      3172
           1       0.60      0.67      0.64      3402
           2       0.70      0.42      0.53      3254
           3       0.61      0.71      0.66      3333
           4       0.89      0.86      0.87      2557
           5       0.46      0.55      0.50      3332
           6       0.72      0.39      0.50      3208
           7       0.52      0.42      0.46      2466
           8       0.70      0.81      0.75      2984
           9       0.83      0.90      0.86      3168
          10       0.55      0.50

In [156]:
w100_o50_pipeline.run_ann_model()

==== Grid Search: =====
Best parameters found:  {'batch_size': 100, 'epochs': 150, 'model__activation': 'tanh', 'model__n_neurons': 64}
Best score found:  0.5843760862009036

==== Random Search: =====
Best parameters found:  {'model__n_neurons': 64, 'model__activation': 'tanh', 'epochs': 150, 'batch_size': 100}
Best score found:  0.5809002433090025
10. Artificial Neuralnet metrics: 
              precision    recall  f1-score   support

           0       0.45      0.44      0.45      3172
           1       0.60      0.59      0.60      3402
           2       0.47      0.45      0.46      3254
           3       0.69      0.66      0.67      3333
           4       0.83      0.85      0.84      2557
           5       0.43      0.56      0.49      3332
           6       0.43      0.32      0.37      3208
           7       0.45      0.41      0.43      2466
           8       0.71      0.76      0.73      2984
           9       0.84      0.86      0.85      3168
          10       

# == Model and scores for Window 200 & 25% Overlap ==

In [157]:
w200_o25_pipeline = ModelEvaluationPipeline("features/w200_o25_features.csv")

In [57]:
w200_o25_pipeline.run_logistic_regression_model()

==== Grid Search: =====
Best parameters found:  {'C': 100, 'max_iter': 500, 'penalty': 'l2', 'solver': 'lbfgs'}
Best score found:  0.3118171573749667

==== Random Search: =====
Best parameters found:  {'solver': 'lbfgs', 'penalty': 'l2', 'max_iter': 500, 'C': 100}
Best score found:  0.3118171573749667
1. Logistic regression metrics: 
              precision    recall  f1-score   support

           0       0.39      0.44      0.42      1054
           1       0.27      0.20      0.23      1134
           2       0.12      0.09      0.10      1074
           3       0.18      0.42      0.25      1108
           4       0.91      0.73      0.81       867
           5       0.18      0.13      0.15      1109
           6       0.16      0.02      0.04      1050
           7       0.34      0.05      0.09       822
           8       0.66      0.68      0.67       996
           9       0.14      0.14      0.14      1053
          10       0.19      0.08      0.11       892
          11   

In [58]:
w200_o25_pipeline.run_random_forest_classifier_model()



==== Grid Search: =====
Best parameters found:  {'bootstrap': False, 'criterion': 'entropy', 'max_depth': None, 'n_estimators': 200}
Best score found:  0.6040225787284611

==== Random Search: =====
Best parameters found:  {'n_estimators': 200, 'max_depth': 50, 'criterion': 'entropy', 'bootstrap': False}
Best score found:  0.6053301780482307
3.  Random Forest Classifier metrics: 
              precision    recall  f1-score   support

           0       0.45      0.64      0.53      1054
           1       0.57      0.67      0.62      1134
           2       0.57      0.40      0.47      1074
           3       0.68      0.69      0.68      1108
           4       0.96      0.87      0.91       867
           5       0.51      0.51      0.51      1109
           6       0.57      0.35      0.43      1050
           7       0.52      0.40      0.45       822
           8       0.71      0.85      0.77       996
           9       0.84      0.90      0.87      1053
          10       0.

In [59]:
w200_o25_pipeline.run_decission_tree_classifier_model()



==== Grid Search: =====
Best parameters found:  {'criterion': 'entropy', 'max_depth': 20, 'min_samples_split': 10}
Best score found:  0.4712446985063618

==== Random Search: =====
Best parameters found:  {'min_samples_split': 10, 'max_depth': 20, 'criterion': 'entropy'}
Best score found:  0.4670625115249862
2. Decission Tree Classifier metrics: 
              precision    recall  f1-score   support

           0       0.32      0.34      0.33      1054
           1       0.41      0.46      0.43      1134
           2       0.27      0.34      0.30      1074
           3       0.49      0.54      0.51      1108
           4       0.81      0.80      0.81       867
           5       0.38      0.36      0.37      1109
           6       0.29      0.32      0.30      1050
           7       0.27      0.27      0.27       822
           8       0.69      0.63      0.66       996
           9       0.73      0.72      0.72      1053
          10       0.29      0.28      0.28       892
 

In [60]:
w200_o25_pipeline.run_gaussian_naive_bias_classifier_model()



==== Grid Search: =====
Best parameters found:  {'var_smoothing': 1e-09}
Best score found:  0.25326510541520686

==== Random Search: =====
Best parameters found:  {'var_smoothing': 1e-09}
Best score found:  0.25326510541520686
4. Gaussian Naive Bias Classifier metrics: 
              precision    recall  f1-score   support

           0       0.32      0.18      0.23      1054
           1       0.25      0.13      0.17      1134
           2       0.24      0.04      0.07      1074
           3       0.14      0.80      0.23      1108
           4       0.85      0.71      0.77       867
           5       0.14      0.12      0.13      1109
           6       0.07      0.00      0.01      1050
           7       0.13      0.04      0.07       822
           8       0.48      0.58      0.53       996
           9       0.19      0.07      0.10      1053
          10       0.14      0.05      0.07       892
          11       0.25      0.18      0.21      1038
          12       0.15 

In [64]:
w200_o25_pipeline.run_support_vector_classifier_model()



==== Grid Search: =====
Best parameters found:  {'C': 1000, 'gamma': 0.1, 'kernel': 'rbf'}
Best score found:  0.5151541787038745

==== Random Search: =====
Best parameters found:  {'kernel': 'rbf', 'gamma': 0.1, 'C': 1000}
Best score found:  0.5151541787038745
5. Support Vector Classifier metrics: 
              precision    recall  f1-score   support

           0       0.37      0.45      0.40      1054
           1       0.52      0.59      0.55      1134
           2       0.41      0.39      0.40      1074
           3       0.60      0.68      0.63      1108
           4       0.86      0.78      0.82       867
           5       0.46      0.38      0.42      1109
           6       0.34      0.29      0.31      1050
           7       0.40      0.35      0.37       822
           8       0.52      0.83      0.64       996
           9       0.78      0.82      0.80      1053
          10       0.50      0.40      0.44       892
          11       0.39      0.30      0.34      

In [61]:
w200_o25_pipeline.run_knn_classifier_model()



==== Grid Search: =====
Best parameters found:  {'metric': 'minkowski', 'n_neighbors': 100, 'p': 1, 'weights': 'distance'}
Best score found:  0.4576536152601061

==== Random Search: =====
Best parameters found:  {'weights': 'distance', 'p': 1, 'n_neighbors': 100, 'metric': 'minkowski'}
Best score found:  0.4576536152601061
6. K-Nearest Neighbors metrics: 
              precision    recall  f1-score   support

           0       0.43      0.30      0.35      1054
           1       0.56      0.46      0.51      1134
           2       0.65      0.32      0.43      1074
           3       0.41      0.73      0.53      1108
           4       0.95      0.68      0.79       867
           5       0.33      0.44      0.37      1109
           6       0.62      0.29      0.40      1050
           7       0.52      0.16      0.24       822
           8       0.51      0.58      0.55       996
           9       0.69      0.69      0.69      1053
          10       0.61      0.25      0.35  

In [62]:
w200_o25_pipeline.run_ada_boost_classifier_model()



==== Grid Search: =====
Best parameters found:  {'estimator': DecisionTreeClassifier(max_depth=3), 'learning_rate': 0.01, 'n_estimators': 100}
Best score found:  0.3481455528920032

==== Random Search: =====
Best parameters found:  {'n_estimators': 100, 'learning_rate': 0.01, 'estimator': DecisionTreeClassifier(max_depth=3)}
Best score found:  0.3478843198721495
7. Ada Boost Classifier metrics: 
              precision    recall  f1-score   support

           0       0.29      0.63      0.40      1054
           1       0.26      0.47      0.34      1134
           2       0.49      0.33      0.40      1074
           3       0.45      0.23      0.31      1108
           4       1.00      0.08      0.14       867
           5       0.20      0.14      0.17      1109
           6       0.11      0.00      0.01      1050
           7       0.17      0.01      0.02       822
           8       0.37      0.71      0.49       996
           9       0.86      0.27      0.41      1053
    

In [63]:
w200_o25_pipeline.run_xg_boost_classifier_model()



==== Grid Search: =====
Best parameters found:  {'gamma': 0, 'learning_rate': 0.2, 'n_estimators': 200, 'subsample': 1.0}
Best score found:  0.5993203843710943

==== Random Search: =====
Best parameters found:  {'subsample': 1.0, 'n_estimators': 200, 'learning_rate': 0.2, 'gamma': 0}
Best score found:  0.5993203843710943
8. XG Boost Classifier metrics: 
              precision    recall  f1-score   support

           0       0.49      0.56      0.52      1054
           1       0.62      0.70      0.66      1134
           2       0.55      0.45      0.50      1074
           3       0.65      0.67      0.66      1108
           4       0.94      0.87      0.90       867
           5       0.48      0.52      0.50      1109
           6       0.53      0.39      0.45      1050
           7       0.49      0.46      0.47       822
           8       0.78      0.84      0.81       996
           9       0.84      0.88      0.86      1053
          10       0.56      0.49      0.52    

In [158]:
w200_o25_pipeline.run_gradient_boost_classifier_model()

==== Grid Search: =====
Best parameters found:  {'learning_rate': 0.1, 'max_depth': 5, 'n_estimators': 50, 'random_state': 42}
Best score found:  0.5616815210932858

==== Random Search: =====
Best parameters found:  {'random_state': 42, 'n_estimators': 50, 'max_depth': 5, 'learning_rate': 0.1}
Best score found:  0.5616815210932858
9. Gradient Boost Classifier metrics: 
              precision    recall  f1-score   support

           0       0.42      0.58      0.49      1054
           1       0.59      0.67      0.63      1134
           2       0.53      0.38      0.44      1074
           3       0.62      0.66      0.64      1108
           4       0.95      0.82      0.88       867
           5       0.45      0.50      0.47      1109
           6       0.49      0.32      0.39      1050
           7       0.46      0.41      0.44       822
           8       0.73      0.83      0.78       996
           9       0.83      0.87      0.85      1053
          10       0.55      0.43

In [159]:
w200_o25_pipeline.run_ann_model()

==== Grid Search: =====
Best parameters found:  {'batch_size': 50, 'epochs': 150, 'model__activation': 'tanh', 'model__n_neurons': 64}
Best score found:  0.541556743909685

==== Random Search: =====
Best parameters found:  {'model__n_neurons': 64, 'model__activation': 'relu', 'epochs': 150, 'batch_size': 50}
Best score found:  0.532930112483865
10. Artificial Neuralnet metrics: 
              precision    recall  f1-score   support

           0       0.41      0.42      0.42      1054
           1       0.56      0.58      0.57      1134
           2       0.40      0.31      0.35      1074
           3       0.55      0.68      0.61      1108
           4       0.87      0.78      0.82       867
           5       0.44      0.42      0.43      1109
           6       0.39      0.39      0.39      1050
           7       0.40      0.32      0.35       822
           8       0.68      0.72      0.70       996
           9       0.77      0.85      0.81      1053
          10       0.40

# == Model and scores for Window 200 & 50% Overlap ==

In [160]:
w200_o50_pipeline = ModelEvaluationPipeline("features/w200_o50_features.csv")

In [66]:
w200_o50_pipeline.run_logistic_regression_model()

==== Grid Search: =====
Best parameters found:  {'C': 100, 'max_iter': 500, 'penalty': 'l2', 'solver': 'lbfgs'}
Best score found:  0.32971904654213874

==== Random Search: =====
Best parameters found:  {'solver': 'lbfgs', 'penalty': 'l2', 'max_iter': 500, 'C': 100}
Best score found:  0.32971904654213874
1. Logistic regression metrics: 
              precision    recall  f1-score   support

           0       0.39      0.44      0.41      1579
           1       0.24      0.24      0.24      1692
           2       0.12      0.10      0.11      1598
           3       0.20      0.35      0.26      1656
           4       0.91      0.75      0.82      1306
           5       0.19      0.10      0.13      1656
           6       0.23      0.04      0.07      1570
           7       0.31      0.03      0.06      1228
           8       0.68      0.70      0.69      1488
           9       0.21      0.34      0.26      1574
          10       0.23      0.10      0.14      1332
          11 

In [67]:
w200_o50_pipeline.run_decission_tree_classifier_model()



==== Grid Search: =====
Best parameters found:  {'criterion': 'gini', 'max_depth': None, 'min_samples_split': 2}
Best score found:  0.5124095923910601

==== Random Search: =====
Best parameters found:  {'min_samples_split': 10, 'max_depth': None, 'criterion': 'entropy'}
Best score found:  0.5101414809267526
2. Decission Tree Classifier metrics: 
              precision    recall  f1-score   support

           0       0.38      0.35      0.37      1579
           1       0.52      0.48      0.50      1692
           2       0.42      0.44      0.43      1598
           3       0.54      0.57      0.56      1656
           4       0.82      0.84      0.83      1306
           5       0.42      0.44      0.43      1656
           6       0.39      0.37      0.38      1570
           7       0.33      0.31      0.32      1228
           8       0.69      0.66      0.67      1488
           9       0.78      0.85      0.81      1574
          10       0.40      0.42      0.41      1332
 

In [68]:
w200_o50_pipeline.run_random_forest_classifier_model()



==== Grid Search: =====
Best parameters found:  {'bootstrap': False, 'criterion': 'entropy', 'max_depth': 20, 'n_estimators': 500}
Best score found:  0.6561179124497236

==== Random Search: =====
Best parameters found:  {'n_estimators': 500, 'max_depth': None, 'criterion': 'gini', 'bootstrap': False}
Best score found:  0.6529718844785499
3.  Random Forest Classifier metrics: 
              precision    recall  f1-score   support

           0       0.48      0.64      0.55      1579
           1       0.64      0.70      0.67      1692
           2       0.65      0.49      0.56      1598
           3       0.70      0.75      0.72      1656
           4       0.93      0.91      0.92      1306
           5       0.55      0.60      0.57      1656
           6       0.65      0.41      0.51      1570
           7       0.61      0.41      0.49      1228
           8       0.74      0.87      0.80      1488
           9       0.87      0.90      0.89      1574
          10       0.59 

In [69]:
w200_o50_pipeline.run_gaussian_naive_bias_classifier_model()



==== Grid Search: =====
Best parameters found:  {'var_smoothing': 1e-05}
Best score found:  0.24353328700267218

==== Random Search: =====
Best parameters found:  {'var_smoothing': 1e-05}
Best score found:  0.24353328700267218
4. Gaussian Naive Bias Classifier metrics: 
              precision    recall  f1-score   support

           0       0.35      0.17      0.23      1579
           1       0.23      0.12      0.16      1692
           2       0.16      0.02      0.03      1598
           3       0.12      0.05      0.08      1656
           4       0.78      0.75      0.76      1306
           5       0.14      0.12      0.13      1656
           6       0.24      0.03      0.06      1570
           7       0.18      0.04      0.07      1228
           8       0.50      0.58      0.53      1488
           9       0.19      0.06      0.09      1574
          10       0.15      0.05      0.07      1332
          11       0.19      0.13      0.15      1552
          12       0.21 

In [70]:
w200_o50_pipeline.run_support_vector_classifier_model()



==== Grid Search: =====
Best parameters found:  {'C': 1000, 'gamma': 0.1, 'kernel': 'rbf'}
Best score found:  0.558217520436572

==== Random Search: =====
Best parameters found:  {'kernel': 'rbf', 'gamma': 0.1, 'C': 1000}
Best score found:  0.558217520436572
5. Support Vector Classifier metrics: 
              precision    recall  f1-score   support

           0       0.41      0.54      0.46      1579
           1       0.60      0.58      0.59      1692
           2       0.43      0.45      0.44      1598
           3       0.59      0.73      0.65      1656
           4       0.86      0.85      0.85      1306
           5       0.48      0.47      0.47      1656
           6       0.39      0.31      0.34      1570
           7       0.42      0.37      0.39      1228
           8       0.61      0.84      0.71      1488
           9       0.80      0.85      0.83      1574
          10       0.55      0.45      0.49      1332
          11       0.47      0.31      0.38      15

In [71]:
w200_o50_pipeline.run_knn_classifier_model()



==== Grid Search: =====
Best parameters found:  {'metric': 'minkowski', 'n_neighbors': 100, 'p': 1, 'weights': 'distance'}
Best score found:  0.49126155308960034

==== Random Search: =====
Best parameters found:  {'weights': 'distance', 'p': 1, 'n_neighbors': 100, 'metric': 'minkowski'}
Best score found:  0.49126155308960034
6. K-Nearest Neighbors metrics: 
              precision    recall  f1-score   support

           0       0.45      0.34      0.38      1579
           1       0.55      0.50      0.52      1692
           2       0.54      0.41      0.47      1598
           3       0.45      0.71      0.55      1656
           4       0.93      0.72      0.81      1306
           5       0.35      0.44      0.39      1656
           6       0.74      0.27      0.40      1570
           7       0.57      0.19      0.28      1228
           8       0.56      0.66      0.60      1488
           9       0.72      0.84      0.77      1574
          10       0.54      0.30      0.39

In [72]:
w200_o50_pipeline.run_ada_boost_classifier_model()



==== Grid Search: =====
Best parameters found:  {'estimator': DecisionTreeClassifier(max_depth=3), 'learning_rate': 0.01, 'n_estimators': 200}
Best score found:  0.35699333451085846

==== Random Search: =====
Best parameters found:  {'n_estimators': 200, 'learning_rate': 0.01, 'estimator': DecisionTreeClassifier(max_depth=3)}
Best score found:  0.35699333451085846
7. Ada Boost Classifier metrics: 
              precision    recall  f1-score   support

           0       0.33      0.66      0.44      1579
           1       0.38      0.46      0.41      1692
           2       0.46      0.28      0.35      1598
           3       0.36      0.53      0.43      1656
           4       0.77      0.01      0.02      1306
           5       0.21      0.23      0.22      1656
           6       0.23      0.08      0.12      1570
           7       0.26      0.06      0.10      1228
           8       0.38      0.63      0.48      1488
           9       0.75      0.20      0.31      1574
  

In [74]:
w200_o50_pipeline.run_xg_boost_classifier_model()



==== Grid Search: =====
Best parameters found:  {'gamma': 0, 'learning_rate': 0.1, 'n_estimators': 200, 'subsample': 0.6}
Best score found:  0.6396845589957508

==== Random Search: =====
Best parameters found:  {'subsample': 0.6, 'n_estimators': 200, 'learning_rate': 0.1, 'gamma': 0}
Best score found:  0.6396845589957508
8. XG Boost Classifier metrics: 
              precision    recall  f1-score   support

           0       0.51      0.62      0.56      1579
           1       0.66      0.66      0.66      1692
           2       0.62      0.51      0.56      1598
           3       0.66      0.74      0.70      1656
           4       0.93      0.90      0.91      1306
           5       0.52      0.59      0.55      1656
           6       0.59      0.43      0.50      1570
           7       0.53      0.43      0.48      1228
           8       0.79      0.86      0.82      1488
           9       0.85      0.89      0.87      1574
          10       0.59      0.56      0.57    

In [161]:
w200_o50_pipeline.run_gradient_boost_classifier_model()

==== Grid Search: =====
Best parameters found:  {'learning_rate': 0.1, 'max_depth': 5, 'n_estimators': 50, 'random_state': 42}
Best score found:  0.597902409579552

==== Random Search: =====
Best parameters found:  {'random_state': 42, 'n_estimators': 50, 'max_depth': 5, 'learning_rate': 0.1}
Best score found:  0.597902409579552
9. Gradient Boost Classifier metrics: 
              precision    recall  f1-score   support

           0       0.46      0.58      0.51      1579
           1       0.62      0.65      0.63      1692
           2       0.60      0.44      0.51      1598
           3       0.61      0.69      0.65      1656
           4       0.89      0.87      0.88      1306
           5       0.49      0.56      0.52      1656
           6       0.62      0.37      0.46      1570
           7       0.51      0.39      0.44      1228
           8       0.73      0.82      0.77      1488
           9       0.84      0.86      0.85      1574
          10       0.54      0.51  

In [162]:
w200_o50_pipeline.run_ann_model()

==== Grid Search: =====
Best parameters found:  {'batch_size': 50, 'epochs': 150, 'model__activation': 'tanh', 'model__n_neurons': 64}
Best score found:  0.5727273994504516

==== Random Search: =====
Best parameters found:  {'model__n_neurons': 64, 'model__activation': 'tanh', 'epochs': 150, 'batch_size': 50}
Best score found:  0.5737760754408429
10. Artificial Neuralnet metrics: 
              precision    recall  f1-score   support

           0       0.46      0.44      0.45      1579
           1       0.59      0.59      0.59      1692
           2       0.47      0.45      0.46      1598
           3       0.64      0.73      0.68      1656
           4       0.85      0.85      0.85      1306
           5       0.51      0.49      0.50      1656
           6       0.43      0.39      0.41      1570
           7       0.39      0.39      0.39      1228
           8       0.71      0.74      0.73      1488
           9       0.79      0.90      0.84      1574
          10       0.

# == Model and scores for Window 300 & 25% Overlap ==

In [8]:
w300_o25_features_pipeline = ModelEvaluationPipeline("features/w300_o25_features.csv")

In [10]:
w300_o25_features_pipeline.run_logistic_regression_model()

==== Grid Search: =====
Best parameters found:  {'C': 1, 'max_iter': 100, 'penalty': 'l2', 'solver': 'lbfgs'}
Best score found:  0.3070206679940548

==== Random Search: =====
Best parameters found:  {'solver': 'lbfgs', 'penalty': 'l2', 'max_iter': 100, 'C': 1}
Best score found:  0.3070206679940548
1. Logistic regression metrics: 
              precision    recall  f1-score   support

           0       0.40      0.44      0.42       701
           1       0.25      0.26      0.25       750
           2       0.16      0.09      0.11       706
           3       0.18      0.20      0.19       734
           4       0.86      0.76      0.81       578
           5       0.19      0.13      0.15       734
           6       0.27      0.04      0.07       691
           7       0.31      0.07      0.12       545
           8       0.62      0.72      0.67       661
           9       0.16      0.34      0.22       698
          10       0.21      0.08      0.11       590
          11       

In [82]:
w300_o25_features_pipeline.run_decission_tree_classifier_model()

==== Grid Search: =====
Best parameters found:  {'criterion': 'gini', 'max_depth': 20, 'min_samples_split': 2}
Best score found:  0.4621035362741524

==== Random Search: =====
Best parameters found:  {'min_samples_split': 2, 'max_depth': None, 'criterion': 'gini'}
Best score found:  0.45618871346177214
2. Decission Tree Classifier metrics: 
              precision    recall  f1-score   support

           0       0.37      0.39      0.38       701
           1       0.43      0.41      0.42       750
           2       0.33      0.31      0.32       706
           3       0.40      0.48      0.44       734
           4       0.84      0.85      0.85       578
           5       0.37      0.38      0.37       734
           6       0.30      0.28      0.29       691
           7       0.32      0.31      0.31       545
           8       0.66      0.62      0.64       661
           9       0.75      0.77      0.76       698
          10       0.29      0.25      0.27       590
        

In [83]:
w300_o25_features_pipeline.run_random_forest_classifier_model()

==== Grid Search: =====
Best parameters found:  {'bootstrap': False, 'criterion': 'entropy', 'max_depth': 20, 'n_estimators': 500}
Best score found:  0.5801027323630313

==== Random Search: =====
Best parameters found:  {'n_estimators': 500, 'max_depth': 50, 'criterion': 'gini', 'bootstrap': False}
Best score found:  0.5769478486029707
3.  Random Forest Classifier metrics: 
              precision    recall  f1-score   support

           0       0.48      0.64      0.54       701
           1       0.57      0.64      0.60       750
           2       0.54      0.36      0.43       706
           3       0.64      0.68      0.66       734
           4       0.91      0.90      0.91       578
           5       0.47      0.53      0.50       734
           6       0.52      0.30      0.38       691
           7       0.53      0.35      0.42       545
           8       0.71      0.87      0.79       661
           9       0.83      0.88      0.85       698
          10       0.58     

In [84]:
w300_o25_features_pipeline.run_gaussian_naive_bias_classifier_model()

==== Grid Search: =====
Best parameters found:  {'var_smoothing': 0.0001}
Best score found:  0.2632103162362003

==== Random Search: =====
Best parameters found:  {'var_smoothing': 0.0001}
Best score found:  0.2632103162362003
4. Gaussian Naive Bias Classifier metrics: 
              precision    recall  f1-score   support

           0       0.36      0.20      0.26       701
           1       0.26      0.17      0.20       750
           2       0.15      0.01      0.02       706
           3       0.17      0.82      0.28       734
           4       0.75      0.74      0.74       578
           5       0.17      0.16      0.17       734
           6       0.27      0.03      0.06       691
           7       0.19      0.09      0.12       545
           8       0.48      0.62      0.54       661
           9       0.38      0.50      0.43       698
          10       0.12      0.04      0.06       590
          11       0.30      0.22      0.25       687
          12       0.21   

In [85]:
w300_o25_features_pipeline.run_support_vector_classifier_model()

==== Grid Search: =====
Best parameters found:  {'C': 1000, 'gamma': 0.1, 'kernel': 'rbf'}
Best score found:  0.49447264365237387

==== Random Search: =====
Best parameters found:  {'kernel': 'rbf', 'gamma': 0.1, 'C': 1000}
Best score found:  0.49447264365237387
5. Support Vector Classifier metrics: 
              precision    recall  f1-score   support

           0       0.34      0.47      0.40       701
           1       0.55      0.51      0.53       750
           2       0.32      0.31      0.32       706
           3       0.60      0.66      0.63       734
           4       0.85      0.80      0.82       578
           5       0.49      0.40      0.44       734
           6       0.30      0.25      0.27       691
           7       0.34      0.32      0.33       545
           8       0.49      0.86      0.63       661
           9       0.77      0.83      0.80       698
          10       0.56      0.35      0.43       590
          11       0.44      0.33      0.38      

In [86]:
w300_o25_features_pipeline.run_knn_classifier_model()

==== Grid Search: =====
Best parameters found:  {'metric': 'minkowski', 'n_neighbors': 100, 'p': 1, 'weights': 'distance'}
Best score found:  0.423836432130271

==== Random Search: =====
Best parameters found:  {'weights': 'distance', 'p': 1, 'n_neighbors': 100, 'metric': 'minkowski'}
Best score found:  0.423836432130271
6. K-Nearest Neighbors metrics: 
              precision    recall  f1-score   support

           0       0.47      0.28      0.35       701
           1       0.60      0.41      0.49       750
           2       0.59      0.27      0.37       706
           3       0.37      0.72      0.49       734
           4       0.85      0.70      0.77       578
           5       0.33      0.42      0.37       734
           6       0.63      0.25      0.36       691
           7       0.41      0.14      0.21       545
           8       0.59      0.57      0.58       661
           9       0.77      0.70      0.73       698
          10       0.59      0.19      0.29      

In [87]:
w300_o25_features_pipeline.run_ada_boost_classifier_model()

==== Grid Search: =====
Best parameters found:  {'estimator': DecisionTreeClassifier(max_depth=3), 'learning_rate': 0.01, 'n_estimators': 100}
Best score found:  0.3630649578881442

==== Random Search: =====
Best parameters found:  {'n_estimators': 100, 'learning_rate': 0.01, 'estimator': DecisionTreeClassifier(max_depth=3)}
Best score found:  0.3630649578881442
7. Ada Boost Classifier metrics: 
              precision    recall  f1-score   support

           0       0.32      0.64      0.42       701
           1       0.33      0.35      0.34       750
           2       0.49      0.33      0.39       706
           3       0.41      0.31      0.35       734
           4       0.98      0.65      0.78       578
           5       0.20      0.15      0.17       734
           6       0.04      0.00      0.00       691
           7       0.12      0.02      0.04       545
           8       0.52      0.74      0.61       661
           9       0.78      0.30      0.44       698
      

In [90]:
w300_o25_features_pipeline.run_xg_boost_classifier_model()

==== Grid Search: =====
Best parameters found:  {'gamma': 0, 'learning_rate': 0.1, 'n_estimators': 200, 'subsample': 0.8}
Best score found:  0.5737887583312301

==== Random Search: =====
Best parameters found:  {'subsample': 0.8, 'n_estimators': 200, 'learning_rate': 0.1, 'gamma': 0}
Best score found:  0.5737887583312301
8. XG Boost Classifier metrics: 
              precision    recall  f1-score   support

           0       0.50      0.59      0.54       701
           1       0.61      0.63      0.62       750
           2       0.55      0.42      0.48       706
           3       0.64      0.68      0.66       734
           4       0.91      0.90      0.90       578
           5       0.44      0.55      0.49       734
           6       0.54      0.33      0.41       691
           7       0.48      0.37      0.42       545
           8       0.74      0.85      0.79       661
           9       0.83      0.86      0.84       698
          10       0.58      0.49      0.53      

In [164]:
w300_o25_features_pipeline.run_gradient_boost_classifier_model()

==== Grid Search: =====
Best parameters found:  {'learning_rate': 0.1, 'max_depth': 5, 'n_estimators': 50, 'random_state': 42}
Best score found:  0.5374837581910131

==== Random Search: =====
Best parameters found:  {'random_state': 42, 'n_estimators': 50, 'max_depth': 5, 'learning_rate': 0.1}
Best score found:  0.5374837581910131
9. Gradient Boost Classifier metrics: 
              precision    recall  f1-score   support

           0       0.42      0.57      0.48       701
           1       0.55      0.60      0.57       750
           2       0.48      0.38      0.42       706
           3       0.62      0.60      0.61       734
           4       0.90      0.87      0.88       578
           5       0.41      0.51      0.45       734
           6       0.47      0.29      0.36       691
           7       0.42      0.35      0.38       545
           8       0.69      0.83      0.75       661
           9       0.81      0.83      0.82       698
          10       0.49      0.38

In [165]:
w300_o25_features_pipeline.run_ann_model()

==== Grid Search: =====
Best parameters found:  {'batch_size': 50, 'epochs': 150, 'model__activation': 'tanh', 'model__n_neurons': 64}
Best score found:  0.5149952793591145

==== Random Search: =====
Best parameters found:  {'model__n_neurons': 64, 'model__activation': 'tanh', 'epochs': 100, 'batch_size': 50}
Best score found:  0.5051239986165251
10. Artificial Neuralnet metrics: 
              precision    recall  f1-score   support

           0       0.41      0.42      0.42       701
           1       0.54      0.52      0.53       750
           2       0.37      0.41      0.39       706
           3       0.61      0.60      0.61       734
           4       0.84      0.86      0.85       578
           5       0.44      0.49      0.46       734
           6       0.37      0.29      0.32       691
           7       0.34      0.34      0.34       545
           8       0.69      0.73      0.71       661
           9       0.77      0.88      0.82       698
          10       0.

# == Model and scores for Window 300 & 50% Overlap ==

In [166]:
w300_o50_features_pipeline = ModelEvaluationPipeline("features/w300_o50_features.csv")

In [167]:
w300_o50_features_pipeline.run_logistic_regression_model()

==== Grid Search: =====
Best parameters found:  {'C': 100, 'max_iter': 100, 'penalty': 'l2', 'solver': 'liblinear'}
Best score found:  0.34185491402745116

==== Random Search: =====
Best parameters found:  {'solver': 'liblinear', 'penalty': 'l2', 'max_iter': 100, 'C': 100}
Best score found:  0.34185491402745116
1. Logistic regression metrics: 
              precision    recall  f1-score   support

           0       0.39      0.51      0.44      1050
           1       0.27      0.24      0.25      1123
           2       0.13      0.10      0.12      1063
           3       0.21      0.37      0.27      1099
           4       0.85      0.73      0.79       861
           5       0.19      0.14      0.16      1099
           6       0.31      0.05      0.09      1034
           7       0.41      0.09      0.14       816
           8       0.64      0.72      0.67       989
           9       0.16      0.16      0.16      1045
          10       0.31      0.11      0.16       884
     

In [93]:
w300_o50_features_pipeline.run_decission_tree_classifier_model()

==== Grid Search: =====
Best parameters found:  {'criterion': 'entropy', 'max_depth': None, 'min_samples_split': 2}
Best score found:  0.49921094544787453

==== Random Search: =====
Best parameters found:  {'min_samples_split': 2, 'max_depth': 50, 'criterion': 'entropy'}
Best score found:  0.49947465852804324
2. Decission Tree Classifier metrics: 
              precision    recall  f1-score   support

           0       0.38      0.38      0.38      1050
           1       0.47      0.49      0.48      1123
           2       0.35      0.37      0.36      1063
           3       0.49      0.49      0.49      1099
           4       0.85      0.86      0.85       861
           5       0.39      0.39      0.39      1099
           6       0.34      0.33      0.34      1034
           7       0.34      0.35      0.35       816
           8       0.69      0.67      0.68       989
           9       0.72      0.75      0.73      1045
          10       0.35      0.33      0.34       884
 

In [94]:
w300_o50_features_pipeline.run_random_forest_classifier_model()

==== Grid Search: =====
Best parameters found:  {'bootstrap': False, 'criterion': 'entropy', 'max_depth': 50, 'n_estimators': 200}
Best score found:  0.649973315988726

==== Random Search: =====
Best parameters found:  {'n_estimators': 500, 'max_depth': 20, 'criterion': 'entropy', 'bootstrap': False}
Best score found:  0.6512902136388653
3.  Random Forest Classifier metrics: 
              precision    recall  f1-score   support

           0       0.50      0.65      0.57      1050
           1       0.60      0.72      0.65      1123
           2       0.59      0.43      0.50      1063
           3       0.72      0.72      0.72      1099
           4       0.94      0.91      0.92       861
           5       0.54      0.57      0.55      1099
           6       0.59      0.39      0.47      1034
           7       0.57      0.43      0.49       816
           8       0.77      0.85      0.81       989
           9       0.89      0.88      0.88      1045
          10       0.62   

In [95]:
w300_o50_features_pipeline.run_gaussian_naive_bias_classifier_model()

==== Grid Search: =====
Best parameters found:  {'var_smoothing': 1e-09}
Best score found:  0.2712235869982155

==== Random Search: =====
Best parameters found:  {'var_smoothing': 1e-09}
Best score found:  0.2712235869982155
4. Gaussian Naive Bias Classifier metrics: 
              precision    recall  f1-score   support

           0       0.38      0.25      0.30      1050
           1       0.28      0.19      0.23      1123
           2       0.21      0.03      0.05      1063
           3       0.19      0.37      0.26      1099
           4       0.76      0.71      0.73       861
           5       0.14      0.09      0.11      1099
           6       0.35      0.07      0.11      1034
           7       0.19      0.11      0.14       816
           8       0.56      0.59      0.57       989
           9       0.27      0.04      0.08      1045
          10       0.15      0.05      0.08       884
          11       0.30      0.20      0.24      1030
          12       0.16     

In [96]:
w300_o50_features_pipeline.run_support_vector_classifier_model()

==== Grid Search: =====
Best parameters found:  {'C': 100, 'gamma': 0.1, 'kernel': 'rbf'}
Best score found:  0.5685253748269709

==== Random Search: =====
Best parameters found:  {'kernel': 'rbf', 'gamma': 0.1, 'C': 100}
Best score found:  0.5685253748269709
5. Support Vector Classifier metrics: 
              precision    recall  f1-score   support

           0       0.45      0.53      0.49      1050
           1       0.63      0.59      0.61      1123
           2       0.40      0.38      0.39      1063
           3       0.48      0.63      0.55      1099
           4       0.90      0.84      0.87       861
           5       0.44      0.46      0.45      1099
           6       0.28      0.17      0.21      1034
           7       0.41      0.38      0.39       816
           8       0.64      0.83      0.72       989
           9       0.85      0.78      0.81      1045
          10       0.60      0.43      0.50       884
          11       0.51      0.37      0.43      1030

In [97]:
w300_o50_features_pipeline.run_knn_classifier_model()

==== Grid Search: =====
Best parameters found:  {'metric': 'minkowski', 'n_neighbors': 100, 'p': 1, 'weights': 'distance'}
Best score found:  0.48155321792498457

==== Random Search: =====
Best parameters found:  {'weights': 'distance', 'p': 1, 'n_neighbors': 100, 'metric': 'minkowski'}
Best score found:  0.48155321792498457
6. K-Nearest Neighbors metrics: 
              precision    recall  f1-score   support

           0       0.51      0.32      0.39      1050
           1       0.56      0.51      0.53      1123
           2       0.57      0.32      0.41      1063
           3       0.50      0.68      0.57      1099
           4       0.87      0.71      0.78       861
           5       0.36      0.44      0.40      1099
           6       0.68      0.25      0.37      1034
           7       0.46      0.10      0.17       816
           8       0.61      0.60      0.60       989
           9       0.79      0.72      0.76      1045
          10       0.58      0.29      0.39  

In [98]:
w300_o50_features_pipeline.run_ada_boost_classifier_model()

==== Grid Search: =====
Best parameters found:  {'estimator': DecisionTreeClassifier(max_depth=3), 'learning_rate': 0.01, 'n_estimators': 100}
Best score found:  0.3510775754240256

==== Random Search: =====
Best parameters found:  {'n_estimators': 200, 'learning_rate': 0.5, 'estimator': DecisionTreeClassifier(max_depth=3)}
Best score found:  0.3576695685528928
7. Ada Boost Classifier metrics: 
              precision    recall  f1-score   support

           0       0.35      0.63      0.45      1050
           1       0.35      0.39      0.37      1123
           2       0.23      0.17      0.20      1063
           3       0.27      0.23      0.25      1099
           4       0.88      0.35      0.51       861
           5       0.17      0.50      0.26      1099
           6       0.30      0.20      0.24      1034
           7       0.18      0.10      0.13       816
           8       0.48      0.60      0.54       989
           9       0.81      0.37      0.51      1045
       

In [100]:
w300_o50_features_pipeline.run_xg_boost_classifier_model()

==== Grid Search: =====
Best parameters found:  {'gamma': 0, 'learning_rate': 0.1, 'n_estimators': 200, 'subsample': 0.8}
Best score found:  0.6507638298227181

==== Random Search: =====
Best parameters found:  {'subsample': 0.8, 'n_estimators': 200, 'learning_rate': 0.1, 'gamma': 0}
Best score found:  0.6507638298227181
8. XG Boost Classifier metrics: 
              precision    recall  f1-score   support

           0       0.54      0.64      0.58      1050
           1       0.65      0.70      0.67      1123
           2       0.58      0.49      0.53      1063
           3       0.67      0.72      0.70      1099
           4       0.95      0.90      0.92       861
           5       0.52      0.57      0.54      1099
           6       0.56      0.37      0.45      1034
           7       0.52      0.46      0.49       816
           8       0.80      0.86      0.83       989
           9       0.88      0.86      0.87      1045
          10       0.59      0.50      0.54      

In [168]:
w300_o50_features_pipeline.run_gradient_boost_classifier_model()

==== Grid Search: =====
Best parameters found:  {'learning_rate': 0.1, 'max_depth': 5, 'n_estimators': 50, 'random_state': 42}
Best score found:  0.594622963259452

==== Random Search: =====
Best parameters found:  {'random_state': 42, 'n_estimators': 50, 'max_depth': 5, 'learning_rate': 0.1}
Best score found:  0.594622963259452
9. Gradient Boost Classifier metrics: 
              precision    recall  f1-score   support

           0       0.47      0.58      0.52      1050
           1       0.62      0.70      0.65      1123
           2       0.50      0.41      0.45      1063
           3       0.67      0.67      0.67      1099
           4       0.93      0.84      0.88       861
           5       0.48      0.55      0.51      1099
           6       0.49      0.32      0.39      1034
           7       0.51      0.42      0.46       816
           8       0.74      0.81      0.77       989
           9       0.85      0.86      0.86      1045
          10       0.51      0.45  

In [169]:
w300_o50_features_pipeline.run_ann_model()

==== Grid Search: =====
Best parameters found:  {'batch_size': 50, 'epochs': 150, 'model__activation': 'tanh', 'model__n_neurons': 64}
Best score found:  0.5798548223011625

==== Random Search: =====
Best parameters found:  {'model__n_neurons': 64, 'model__activation': 'relu', 'epochs': 150, 'batch_size': 50}
Best score found:  0.5809123847167325
10. Artificial Neuralnet metrics: 
              precision    recall  f1-score   support

           0       0.50      0.44      0.47      1050
           1       0.62      0.62      0.62      1123
           2       0.39      0.43      0.41      1063
           3       0.51      0.79      0.62      1099
           4       0.87      0.86      0.86       861
           5       0.45      0.48      0.47      1099
           6       0.35      0.24      0.29      1034
           7       0.48      0.36      0.41       816
           8       0.75      0.78      0.77       989
           9       0.86      0.87      0.87      1045
          10       0.

# == Model and scores for Window 400 & 25% Overlap ==

In [19]:
w400_o25_features_pipeline = ModelEvaluationPipeline("features/w400_o25_features.csv")

In [20]:
w400_o25_features_pipeline.run_logistic_regression_model()

==== Grid Search: =====
Best parameters found:  {'C': 100, 'max_iter': 100, 'penalty': 'l2', 'solver': 'liblinear'}
Best score found:  0.33156938451056095

==== Random Search: =====
Best parameters found:  {'solver': 'liblinear', 'penalty': 'l2', 'max_iter': 100, 'C': 100}
Best score found:  0.33156938451056095
1. Logistic regression metrics: 
              precision    recall  f1-score   support

           0       0.41      0.44      0.43       523
           1       0.25      0.25      0.25       557
           2       0.22      0.09      0.12       529
           3       0.23      0.38      0.28       546
           4       0.86      0.78      0.82       434
           5       0.20      0.21      0.20       547
           6       0.43      0.09      0.15       514
           7       0.42      0.10      0.16       406
           8       0.66      0.75      0.70       491
           9       0.22      0.24      0.23       520
          10       0.28      0.14      0.19       439
     

In [103]:
w400_o25_features_pipeline.run_decission_tree_classifier_model()

==== Grid Search: =====
Best parameters found:  {'criterion': 'gini', 'max_depth': 20, 'min_samples_split': 2}
Best score found:  0.4555050849168496

==== Random Search: =====
Best parameters found:  {'min_samples_split': 2, 'max_depth': 20, 'criterion': 'gini'}
Best score found:  0.4533920475096946
2. Decission Tree Classifier metrics: 
              precision    recall  f1-score   support

           0       0.33      0.33      0.33       523
           1       0.37      0.41      0.39       557
           2       0.30      0.28      0.29       529
           3       0.44      0.49      0.46       546
           4       0.80      0.80      0.80       434
           5       0.42      0.34      0.38       547
           6       0.29      0.28      0.28       514
           7       0.28      0.26      0.27       406
           8       0.60      0.58      0.59       491
           9       0.79      0.78      0.79       520
          10       0.35      0.31      0.33       439
          1

In [104]:
w400_o25_features_pipeline.run_random_forest_classifier_model()

==== Grid Search: =====
Best parameters found:  {'bootstrap': False, 'criterion': 'entropy', 'max_depth': None, 'n_estimators': 500}
Best score found:  0.5958605664488018

==== Random Search: =====
Best parameters found:  {'n_estimators': 500, 'max_depth': 20, 'criterion': 'gini', 'bootstrap': False}
Best score found:  0.597441979794921
3.  Random Forest Classifier metrics: 
              precision    recall  f1-score   support

           0       0.51      0.58      0.54       523
           1       0.53      0.67      0.59       557
           2       0.52      0.31      0.39       529
           3       0.55      0.71      0.62       546
           4       0.89      0.88      0.88       434
           5       0.45      0.47      0.46       547
           6       0.53      0.35      0.42       514
           7       0.50      0.33      0.39       406
           8       0.72      0.84      0.78       491
           9       0.83      0.88      0.85       520
          10       0.53    

In [105]:
w400_o25_features_pipeline.run_gaussian_naive_bias_classifier_model()

==== Grid Search: =====
Best parameters found:  {'var_smoothing': 1e-09}
Best score found:  0.2733010321245615

==== Random Search: =====
Best parameters found:  {'var_smoothing': 1e-09}
Best score found:  0.2733010321245615
4. Gaussian Naive Bias Classifier metrics: 
              precision    recall  f1-score   support

           0       0.36      0.18      0.24       523
           1       0.29      0.21      0.24       557
           2       0.39      0.08      0.14       529
           3       0.16      0.79      0.27       546
           4       0.67      0.73      0.70       434
           5       0.17      0.18      0.17       547
           6       0.32      0.05      0.08       514
           7       0.30      0.12      0.17       406
           8       0.58      0.63      0.60       491
           9       0.34      0.28      0.30       520
          10       0.21      0.13      0.16       439
          11       0.27      0.21      0.24       514
          12       0.20     

In [106]:
w400_o25_features_pipeline.run_support_vector_classifier_model()

==== Grid Search: =====
Best parameters found:  {'C': 1000, 'gamma': 0.1, 'kernel': 'rbf'}
Best score found:  0.5211774800010094

==== Random Search: =====
Best parameters found:  {'kernel': 'rbf', 'gamma': 0.1, 'C': 1000}
Best score found:  0.5211774800010094
5. Support Vector Classifier metrics: 
              precision    recall  f1-score   support

           0       0.36      0.43      0.39       523
           1       0.55      0.50      0.52       557
           2       0.31      0.31      0.31       529
           3       0.60      0.65      0.63       546
           4       0.83      0.85      0.84       434
           5       0.39      0.39      0.39       547
           6       0.36      0.30      0.32       514
           7       0.35      0.36      0.35       406
           8       0.53      0.79      0.64       491
           9       0.76      0.79      0.77       520
          10       0.46      0.41      0.43       439
          11       0.46      0.39      0.42       5

In [107]:
w400_o25_features_pipeline.run_knn_classifier_model()

==== Grid Search: =====
Best parameters found:  {'metric': 'minkowski', 'n_neighbors': 100, 'p': 1, 'weights': 'distance'}
Best score found:  0.41260420083949495

==== Random Search: =====
Best parameters found:  {'weights': 'distance', 'p': 1, 'n_neighbors': 100, 'metric': 'minkowski'}
Best score found:  0.41260420083949495
6. K-Nearest Neighbors metrics: 
              precision    recall  f1-score   support

           0       0.49      0.28      0.35       523
           1       0.55      0.41      0.47       557
           2       0.65      0.19      0.29       529
           3       0.37      0.75      0.50       546
           4       0.81      0.70      0.75       434
           5       0.33      0.35      0.34       547
           6       0.58      0.27      0.37       514
           7       0.47      0.09      0.15       406
           8       0.65      0.58      0.61       491
           9       0.70      0.62      0.66       520
          10       0.44      0.25      0.32  

In [108]:
w400_o25_features_pipeline.run_ada_boost_classifier_model()

==== Grid Search: =====
Best parameters found:  {'estimator': DecisionTreeClassifier(max_depth=3), 'learning_rate': 0.01, 'n_estimators': 200}
Best score found:  0.37287119051824935

==== Random Search: =====
Best parameters found:  {'n_estimators': 200, 'learning_rate': 0.5, 'estimator': DecisionTreeClassifier(max_depth=3)}
Best score found:  0.3781655605185017
7. Ada Boost Classifier metrics: 
              precision    recall  f1-score   support

           0       0.40      0.45      0.42       523
           1       0.41      0.45      0.43       557
           2       0.36      0.37      0.37       529
           3       0.28      0.51      0.37       546
           4       0.83      0.40      0.54       434
           5       0.23      0.40      0.29       547
           6       0.15      0.08      0.11       514
           7       0.21      0.21      0.21       406
           8       0.51      0.50      0.50       491
           9       0.87      0.41      0.56       520
      

In [110]:
w400_o25_features_pipeline.run_xg_boost_classifier_model()

==== Grid Search: =====
Best parameters found:  {'gamma': 0, 'learning_rate': 0.2, 'n_estimators': 200, 'subsample': 0.8}
Best score found:  0.6011532540944305

==== Random Search: =====
Best parameters found:  {'subsample': 0.8, 'n_estimators': 200, 'learning_rate': 0.2, 'gamma': 0}
Best score found:  0.6011532540944305
8. XG Boost Classifier metrics: 
              precision    recall  f1-score   support

           0       0.52      0.55      0.53       523
           1       0.61      0.67      0.63       557
           2       0.50      0.42      0.46       529
           3       0.57      0.72      0.63       546
           4       0.91      0.89      0.90       434
           5       0.48      0.51      0.49       547
           6       0.47      0.35      0.40       514
           7       0.43      0.36      0.40       406
           8       0.76      0.87      0.81       491
           9       0.80      0.86      0.83       520
          10       0.51      0.47      0.49      

In [171]:
w400_o25_features_pipeline.run_gradient_boost_classifier_model()

==== Grid Search: =====
Best parameters found:  {'learning_rate': 0.1, 'max_depth': 5, 'n_estimators': 50, 'random_state': 42}
Best score found:  0.5439658145540499

==== Random Search: =====
Best parameters found:  {'random_state': 42, 'n_estimators': 50, 'max_depth': 5, 'learning_rate': 0.1}
Best score found:  0.5439658145540499
9. Gradient Boost Classifier metrics: 
              precision    recall  f1-score   support

           0       0.46      0.50      0.48       523
           1       0.52      0.61      0.56       557
           2       0.46      0.37      0.41       529
           3       0.59      0.65      0.62       546
           4       0.91      0.82      0.87       434
           5       0.41      0.50      0.45       547
           6       0.46      0.39      0.43       514
           7       0.40      0.36      0.38       406
           8       0.71      0.82      0.76       491
           9       0.84      0.83      0.84       520
          10       0.47      0.38

In [172]:
w400_o25_features_pipeline.run_ann_model()

==== Grid Search: =====
Best parameters found:  {'batch_size': 50, 'epochs': 100, 'model__activation': 'tanh', 'model__n_neurons': 64}
Best score found:  0.5354951590245708

==== Random Search: =====
Best parameters found:  {'model__n_neurons': 64, 'model__activation': 'relu', 'epochs': 100, 'batch_size': 50}
Best score found:  0.5302016302016302
10. Artificial Neuralnet metrics: 
              precision    recall  f1-score   support

           0       0.43      0.37      0.40       523
           1       0.57      0.57      0.57       557
           2       0.35      0.25      0.29       529
           3       0.51      0.69      0.59       546
           4       0.81      0.85      0.83       434
           5       0.46      0.39      0.42       547
           6       0.36      0.40      0.38       514
           7       0.37      0.37      0.37       406
           8       0.71      0.80      0.75       491
           9       0.81      0.80      0.81       520
          10       0.

# == Model and scores for Window 400 & 50% Overlap ==

In [21]:
w400_o50_features_pipeline = ModelEvaluationPipeline("features/w400_o50_features.csv")

In [112]:
w400_o50_features_pipeline.run_logistic_regression_model()

==== Grid Search: =====
Best parameters found:  {'C': 100, 'max_iter': 100, 'penalty': 'l2', 'solver': 'liblinear'}
Best score found:  0.3453644727530078

==== Random Search: =====
Best parameters found:  {'solver': 'liblinear', 'penalty': 'l2', 'max_iter': 100, 'C': 100}
Best score found:  0.3453644727530078
1. Logistic regression metrics: 
              precision    recall  f1-score   support

           0       0.42      0.51      0.46       783
           1       0.31      0.30      0.31       835
           2       0.18      0.11      0.14       782
           3       0.18      0.25      0.21       818
           4       0.85      0.78      0.81       650
           5       0.25      0.11      0.15       818
           6       0.32      0.08      0.13       774
           7       0.45      0.14      0.21       606
           8       0.66      0.76      0.71       736
           9       0.18      0.28      0.22       779
          10       0.27      0.09      0.13       658
       

In [113]:
w400_o50_features_pipeline.run_decission_tree_classifier_model()

==== Grid Search: =====
Best parameters found:  {'criterion': 'gini', 'max_depth': None, 'min_samples_split': 2}
Best score found:  0.4893842887473461

==== Random Search: =====
Best parameters found:  {'min_samples_split': 2, 'max_depth': None, 'criterion': 'gini'}
Best score found:  0.49115357395612175
2. Decission Tree Classifier metrics: 
              precision    recall  f1-score   support

           0       0.40      0.41      0.40       783
           1       0.47      0.42      0.44       835
           2       0.38      0.37      0.37       782
           3       0.57      0.51      0.54       818
           4       0.79      0.86      0.82       650
           5       0.39      0.42      0.40       818
           6       0.34      0.31      0.33       774
           7       0.36      0.33      0.34       606
           8       0.68      0.68      0.68       736
           9       0.74      0.72      0.73       779
          10       0.37      0.38      0.38       658
      

In [114]:
w400_o50_features_pipeline.run_random_forest_classifier_model()

==== Grid Search: =====
Best parameters found:  {'bootstrap': False, 'criterion': 'entropy', 'max_depth': 20, 'n_estimators': 500}
Best score found:  0.621726822363765

==== Random Search: =====
Best parameters found:  {'n_estimators': 500, 'max_depth': 20, 'criterion': 'entropy', 'bootstrap': False}
Best score found:  0.6224345364472753
3.  Random Forest Classifier metrics: 
              precision    recall  f1-score   support

           0       0.54      0.65      0.59       783
           1       0.60      0.71      0.66       835
           2       0.54      0.41      0.46       782
           3       0.63      0.70      0.67       818
           4       0.92      0.91      0.92       650
           5       0.50      0.55      0.53       818
           6       0.56      0.34      0.42       774
           7       0.59      0.42      0.49       606
           8       0.76      0.89      0.82       736
           9       0.87      0.90      0.88       779
          10       0.60   

In [115]:
w400_o50_features_pipeline.run_gaussian_naive_bias_classifier_model()

==== Grid Search: =====
Best parameters found:  {'var_smoothing': 1e-09}
Best score found:  0.31457891012031136

==== Random Search: =====
Best parameters found:  {'var_smoothing': 1e-09}
Best score found:  0.31457891012031136
4. Gaussian Naive Bias Classifier metrics: 
              precision    recall  f1-score   support

           0       0.34      0.23      0.27       783
           1       0.34      0.22      0.27       835
           2       0.38      0.04      0.08       782
           3       0.19      0.73      0.30       818
           4       0.68      0.74      0.71       650
           5       0.17      0.13      0.14       818
           6       0.41      0.10      0.16       774
           7       0.29      0.15      0.20       606
           8       0.57      0.64      0.60       736
           9       0.28      0.04      0.07       779
          10       0.23      0.08      0.12       658
          11       0.30      0.28      0.29       770
          12       0.18   

In [22]:
w400_o50_features_pipeline.run_support_vector_classifier_model()

==== Grid Search: =====
Best parameters found:  {'C': 100, 'gamma': 0.1, 'kernel': 'rbf'}
Best score found:  0.5477707006369427

==== Random Search: =====
Best parameters found:  {'kernel': 'rbf', 'gamma': 0.1, 'C': 100}
Best score found:  0.5477707006369427
5. Support Vector Classifier metrics: 
              precision    recall  f1-score   support

           0       0.47      0.52      0.50       783
           1       0.65      0.62      0.64       835
           2       0.39      0.36      0.37       782
           3       0.50      0.72      0.59       818
           4       0.84      0.88      0.86       650
           5       0.48      0.45      0.47       818
           6       0.30      0.22      0.26       774
           7       0.53      0.42      0.47       606
           8       0.64      0.86      0.73       736
           9       0.73      0.90      0.81       779
          10       0.59      0.52      0.55       658
          11       0.55      0.42      0.48       770

In [116]:
w400_o50_features_pipeline.run_knn_classifier_model()

==== Grid Search: =====
Best parameters found:  {'metric': 'minkowski', 'n_neighbors': 100, 'p': 1, 'weights': 'distance'}
Best score found:  0.45364472753007784

==== Random Search: =====
Best parameters found:  {'weights': 'distance', 'p': 1, 'n_neighbors': 100, 'metric': 'minkowski'}
Best score found:  0.45364472753007784
6. K-Nearest Neighbors metrics: 
              precision    recall  f1-score   support

           0       0.52      0.29      0.37       783
           1       0.57      0.47      0.52       835
           2       0.55      0.29      0.38       782
           3       0.42      0.73      0.54       818
           4       0.82      0.72      0.76       650
           5       0.34      0.43      0.38       818
           6       0.77      0.21      0.33       774
           7       0.52      0.16      0.25       606
           8       0.72      0.60      0.66       736
           9       0.65      0.78      0.71       779
          10       0.58      0.29      0.38  

In [117]:
w400_o50_features_pipeline.run_ada_boost_classifier_model()

==== Grid Search: =====
Best parameters found:  {'estimator': DecisionTreeClassifier(max_depth=3), 'learning_rate': 0.5, 'n_estimators': 200}
Best score found:  0.394550601556971

==== Random Search: =====
Best parameters found:  {'n_estimators': 200, 'learning_rate': 0.5, 'estimator': DecisionTreeClassifier(max_depth=3)}
Best score found:  0.39171974522292996
7. Ada Boost Classifier metrics: 
              precision    recall  f1-score   support

           0       0.39      0.58      0.47       783
           1       0.40      0.44      0.42       835
           2       0.27      0.11      0.16       782
           3       0.25      0.41      0.31       818
           4       0.82      0.53      0.64       650
           5       0.24      0.20      0.21       818
           6       0.33      0.23      0.27       774
           7       0.23      0.10      0.14       606
           8       0.52      0.63      0.57       736
           9       0.72      0.53      0.61       779
        

In [119]:
w400_o50_features_pipeline.run_xg_boost_classifier_model()

==== Grid Search: =====
Best parameters found:  {'gamma': 0, 'learning_rate': 0.1, 'n_estimators': 200, 'subsample': 0.8}
Best score found:  0.618895966029724

==== Random Search: =====
Best parameters found:  {'subsample': 0.8, 'n_estimators': 200, 'learning_rate': 0.1, 'gamma': 0}
Best score found:  0.618895966029724
8. XG Boost Classifier metrics: 
              precision    recall  f1-score   support

           0       0.61      0.64      0.63       783
           1       0.64      0.72      0.68       835
           2       0.57      0.45      0.50       782
           3       0.62      0.71      0.66       818
           4       0.93      0.90      0.92       650
           5       0.50      0.58      0.54       818
           6       0.55      0.39      0.46       774
           7       0.53      0.46      0.49       606
           8       0.80      0.88      0.84       736
           9       0.86      0.90      0.88       779
          10       0.60      0.53      0.57       6

In [174]:
w400_o50_features_pipeline.run_gradient_boost_classifier_model()

==== Grid Search: =====
Best parameters found:  {'learning_rate': 0.1, 'max_depth': 5, 'n_estimators': 50, 'random_state': 42}
Best score found:  0.5789101203113942

==== Random Search: =====
Best parameters found:  {'random_state': 42, 'n_estimators': 50, 'max_depth': 5, 'learning_rate': 0.1}
Best score found:  0.5789101203113942
9. Gradient Boost Classifier metrics: 
              precision    recall  f1-score   support

           0       0.53      0.61      0.57       783
           1       0.63      0.68      0.66       835
           2       0.52      0.38      0.44       782
           3       0.60      0.68      0.64       818
           4       0.90      0.88      0.89       650
           5       0.45      0.55      0.49       818
           6       0.47      0.35      0.40       774
           7       0.52      0.41      0.46       606
           8       0.78      0.83      0.81       736
           9       0.85      0.86      0.86       779
          10       0.53      0.48

In [175]:
w400_o50_features_pipeline.run_ann_model()

==== Grid Search: =====
Best parameters found:  {'batch_size': 100, 'epochs': 150, 'model__activation': 'tanh', 'model__n_neurons': 64}
Best score found:  0.556617126680821

==== Random Search: =====
Best parameters found:  {'model__n_neurons': 64, 'model__activation': 'relu', 'epochs': 150, 'batch_size': 100}
Best score found:  0.5559094125973106
10. Artificial Neuralnet metrics: 
              precision    recall  f1-score   support

           0       0.52      0.46      0.49       783
           1       0.60      0.64      0.62       835
           2       0.39      0.25      0.31       782
           3       0.63      0.59      0.61       818
           4       0.83      0.85      0.84       650
           5       0.48      0.46      0.47       818
           6       0.37      0.41      0.39       774
           7       0.47      0.41      0.44       606
           8       0.77      0.76      0.77       736
           9       0.81      0.82      0.82       779
          10       0

# == Model and scores for Window 500 & 25% Overlap ==

In [11]:
w500_o25_features_pipeline = ModelEvaluationPipeline("features/w500_o25_features.csv")

In [122]:
w500_o25_features_pipeline.run_logistic_regression_model()

==== Grid Search: =====
Best parameters found:  {'C': 100, 'max_iter': 500, 'penalty': 'l2', 'solver': 'lbfgs'}
Best score found:  0.3534424264505926

==== Random Search: =====
Best parameters found:  {'solver': 'lbfgs', 'penalty': 'l2', 'max_iter': 500, 'C': 100}
Best score found:  0.3534424264505926
1. Logistic regression metrics: 
              precision    recall  f1-score   support

           0       0.43      0.43      0.43       416
           1       0.25      0.27      0.26       448
           2       0.27      0.12      0.17       418
           3       0.17      0.24      0.20       437
           4       0.82      0.78      0.80       348
           5       0.20      0.15      0.17       438
           6       0.36      0.12      0.17       408
           7       0.34      0.16      0.22       325
           8       0.74      0.71      0.73       394
           9       0.22      0.38      0.28       414
          10       0.26      0.16      0.20       351
          11   

In [123]:
w500_o25_features_pipeline.run_decission_tree_classifier_model()

==== Grid Search: =====
Best parameters found:  {'criterion': 'entropy', 'max_depth': 20, 'min_samples_split': 2}
Best score found:  0.4436317024809972

==== Random Search: =====
Best parameters found:  {'min_samples_split': 2, 'max_depth': None, 'criterion': 'entropy'}
Best score found:  0.4515905892665257
2. Decission Tree Classifier metrics: 
              precision    recall  f1-score   support

           0       0.42      0.38      0.40       416
           1       0.38      0.39      0.39       448
           2       0.28      0.30      0.29       418
           3       0.53      0.52      0.52       437
           4       0.74      0.75      0.75       348
           5       0.32      0.32      0.32       438
           6       0.32      0.30      0.31       408
           7       0.27      0.29      0.28       325
           8       0.63      0.59      0.61       394
           9       0.68      0.73      0.70       414
          10       0.33      0.32      0.32       351
   

In [124]:
w500_o25_features_pipeline.run_random_forest_classifier_model()

==== Grid Search: =====
Best parameters found:  {'bootstrap': False, 'criterion': 'entropy', 'max_depth': None, 'n_estimators': 500}
Best score found:  0.5842158206141637

==== Random Search: =====
Best parameters found:  {'n_estimators': 200, 'max_depth': None, 'criterion': 'entropy', 'bootstrap': False}
Best score found:  0.5835478496880602
3.  Random Forest Classifier metrics: 
              precision    recall  f1-score   support

           0       0.52      0.58      0.55       416
           1       0.47      0.67      0.55       448
           2       0.48      0.32      0.38       418
           3       0.57      0.63      0.60       437
           4       0.87      0.89      0.88       348
           5       0.48      0.47      0.47       438
           6       0.50      0.34      0.41       408
           7       0.49      0.37      0.42       325
           8       0.74      0.85      0.79       394
           9       0.81      0.81      0.81       414
          10       0.

In [125]:
w500_o25_features_pipeline.run_gaussian_naive_bias_classifier_model()

==== Grid Search: =====
Best parameters found:  {'var_smoothing': 0.0001}
Best score found:  0.29640747810613083

==== Random Search: =====
Best parameters found:  {'var_smoothing': 0.0001}
Best score found:  0.29640747810613083
4. Gaussian Naive Bias Classifier metrics: 
              precision    recall  f1-score   support

           0       0.48      0.30      0.37       416
           1       0.32      0.23      0.27       448
           2       0.34      0.06      0.10       418
           3       0.15      0.03      0.05       437
           4       0.73      0.73      0.73       348
           5       0.15      0.14      0.15       438
           6       0.30      0.10      0.15       408
           7       0.20      0.10      0.13       325
           8       0.56      0.71      0.62       394
           9       0.19      0.04      0.07       414
          10       0.17      0.10      0.13       351
          11       0.32      0.31      0.31       410
          12       0.21 

In [12]:
w500_o25_features_pipeline.run_support_vector_classifier_model()

==== Grid Search: =====
Best parameters found:  {'C': 100, 'gamma': 0.1, 'kernel': 'rbf'}
Best score found:  0.49337309476474484

==== Random Search: =====
Best parameters found:  {'kernel': 'rbf', 'gamma': 0.1, 'C': 100}
Best score found:  0.49337309476474484
5. Support Vector Classifier metrics: 
              precision    recall  f1-score   support

           0       0.39      0.42      0.40       416
           1       0.57      0.56      0.57       448
           2       0.37      0.32      0.34       418
           3       0.50      0.68      0.58       437
           4       0.82      0.82      0.82       348
           5       0.41      0.41      0.41       438
           6       0.35      0.25      0.29       408
           7       0.42      0.35      0.38       325
           8       0.50      0.77      0.60       394
           9       0.76      0.75      0.76       414
          10       0.53      0.46      0.49       351
          11       0.53      0.45      0.48       4

In [126]:
w500_o25_features_pipeline.run_knn_classifier_model()

==== Grid Search: =====
Best parameters found:  {'metric': 'minkowski', 'n_neighbors': 100, 'p': 1, 'weights': 'distance'}
Best score found:  0.41047341449312097

==== Random Search: =====
Best parameters found:  {'weights': 'distance', 'p': 1, 'n_neighbors': 100, 'metric': 'minkowski'}
Best score found:  0.41047341449312097
6. K-Nearest Neighbors metrics: 
              precision    recall  f1-score   support

           0       0.43      0.31      0.36       416
           1       0.46      0.40      0.43       448
           2       0.62      0.15      0.25       418
           3       0.36      0.77      0.49       437
           4       0.76      0.69      0.73       348
           5       0.31      0.34      0.32       438
           6       0.64      0.23      0.34       408
           7       0.66      0.08      0.14       325
           8       0.79      0.48      0.60       394
           9       0.70      0.73      0.71       414
          10       0.44      0.30      0.36  

In [127]:
w500_o25_features_pipeline.run_ada_boost_classifier_model()

==== Grid Search: =====
Best parameters found:  {'estimator': DecisionTreeClassifier(max_depth=3), 'learning_rate': 0.01, 'n_estimators': 50}
Best score found:  0.418433621379294

==== Random Search: =====
Best parameters found:  {'n_estimators': 200, 'learning_rate': 0.5, 'estimator': DecisionTreeClassifier(max_depth=3)}
Best score found:  0.41710560013095405
7. Ada Boost Classifier metrics: 
              precision    recall  f1-score   support

           0       0.33      0.58      0.42       416
           1       0.28      0.45      0.34       448
           2       0.49      0.30      0.37       418
           3       0.33      0.33      0.33       437
           4       0.82      0.18      0.30       348
           5       0.21      0.12      0.15       438
           6       0.31      0.06      0.10       408
           7       0.10      0.01      0.01       325
           8       0.41      0.77      0.53       394
           9       0.29      0.03      0.05       414
        

In [129]:
w500_o25_features_pipeline.run_xg_boost_classifier_model()

==== Grid Search: =====
Best parameters found:  {'gamma': 0, 'learning_rate': 0.1, 'n_estimators': 200, 'subsample': 1.0}
Best score found:  0.5709435287346306

==== Random Search: =====
Best parameters found:  {'subsample': 1.0, 'n_estimators': 200, 'learning_rate': 0.1, 'gamma': 0}
Best score found:  0.5709435287346306
8. XG Boost Classifier metrics: 
              precision    recall  f1-score   support

           0       0.54      0.57      0.56       416
           1       0.57      0.65      0.61       448
           2       0.53      0.41      0.46       418
           3       0.57      0.62      0.59       437
           4       0.90      0.85      0.87       348
           5       0.42      0.49      0.45       438
           6       0.49      0.38      0.43       408
           7       0.46      0.38      0.41       325
           8       0.76      0.86      0.81       394
           9       0.78      0.78      0.78       414
          10       0.54      0.44      0.49      

In [177]:
w500_o25_features_pipeline.run_gradient_boost_classifier_model()

==== Grid Search: =====
Best parameters found:  {'learning_rate': 0.1, 'max_depth': 5, 'n_estimators': 50, 'random_state': 42}
Best score found:  0.5411184948740492

==== Random Search: =====
Best parameters found:  {'random_state': 42, 'n_estimators': 50, 'max_depth': 5, 'learning_rate': 0.1}
Best score found:  0.5411184948740492
9. Gradient Boost Classifier metrics: 
              precision    recall  f1-score   support

           0       0.51      0.56      0.53       416
           1       0.47      0.60      0.53       448
           2       0.42      0.37      0.39       418
           3       0.56      0.56      0.56       437
           4       0.87      0.84      0.86       348
           5       0.37      0.47      0.41       438
           6       0.38      0.32      0.35       408
           7       0.44      0.36      0.40       325
           8       0.75      0.82      0.78       394
           9       0.78      0.74      0.76       414
          10       0.55      0.41

In [178]:
w500_o25_features_pipeline.run_ann_model()

==== Grid Search: =====
Best parameters found:  {'batch_size': 50, 'epochs': 150, 'model__activation': 'tanh', 'model__n_neurons': 64}
Best score found:  0.5066414263423443

==== Random Search: =====
Best parameters found:  {'model__n_neurons': 64, 'model__activation': 'tanh', 'epochs': 150, 'batch_size': 50}
Best score found:  0.5172537154232638
10. Artificial Neuralnet metrics: 
              precision    recall  f1-score   support

           0       0.50      0.44      0.47       416
           1       0.58      0.63      0.61       448
           2       0.35      0.41      0.38       418
           3       0.62      0.61      0.62       437
           4       0.81      0.81      0.81       348
           5       0.43      0.37      0.40       438
           6       0.34      0.27      0.30       408
           7       0.44      0.33      0.37       325
           8       0.72      0.71      0.71       394
           9       0.78      0.83      0.80       414
          10       0.

# == Model and scores for Window 500 & 50% Overlap ==

In [179]:
w500_o50_features_pipeline = ModelEvaluationPipeline("features/w500_o50_features.csv")

In [131]:
w500_o50_features_pipeline.run_logistic_regression_model()

==== Grid Search: =====
Best parameters found:  {'C': 100, 'max_iter': 500, 'penalty': 'l2', 'solver': 'lbfgs'}
Best score found:  0.35364663996439694

==== Random Search: =====
Best parameters found:  {'solver': 'lbfgs', 'penalty': 'l2', 'max_iter': 500, 'C': 100}
Best score found:  0.35364663996439694
1. Logistic regression metrics: 
              precision    recall  f1-score   support

           0       0.43      0.51      0.46       621
           1       0.32      0.30      0.31       667
           2       0.31      0.14      0.19       633
           3       0.19      0.23      0.21       651
           4       0.84      0.75      0.79       500
           5       0.19      0.14      0.16       652
           6       0.30      0.11      0.16       621
           7       0.27      0.13      0.17       484
           8       0.74      0.72      0.73       586
           9       0.22      0.39      0.28       618
          10       0.30      0.19      0.23       523
          11 

In [132]:
w500_o50_features_pipeline.run_decission_tree_classifier_model()

==== Grid Search: =====
Best parameters found:  {'criterion': 'gini', 'max_depth': 50, 'min_samples_split': 2}
Best score found:  0.4768687138406764

==== Random Search: =====
Best parameters found:  {'min_samples_split': 2, 'max_depth': 20, 'criterion': 'gini'}
Best score found:  0.47954131434505265
2. Decission Tree Classifier metrics: 
              precision    recall  f1-score   support

           0       0.41      0.39      0.40       621
           1       0.49      0.46      0.48       667
           2       0.33      0.35      0.34       633
           3       0.55      0.57      0.56       651
           4       0.83      0.79      0.81       500
           5       0.43      0.51      0.47       652
           6       0.32      0.29      0.31       621
           7       0.28      0.25      0.26       484
           8       0.65      0.67      0.66       586
           9       0.73      0.71      0.72       618
          10       0.33      0.38      0.36       523
          

In [133]:
w500_o50_features_pipeline.run_random_forest_classifier_model()

==== Grid Search: =====
Best parameters found:  {'bootstrap': False, 'criterion': 'entropy', 'max_depth': 50, 'n_estimators': 500}
Best score found:  0.6303367452900163

==== Random Search: =====
Best parameters found:  {'n_estimators': 200, 'max_depth': None, 'criterion': 'entropy', 'bootstrap': False}
Best score found:  0.6298917074618009
3.  Random Forest Classifier metrics: 
              precision    recall  f1-score   support

           0       0.55      0.71      0.62       621
           1       0.64      0.67      0.66       667
           2       0.56      0.40      0.47       633
           3       0.63      0.77      0.69       651
           4       0.88      0.92      0.90       500
           5       0.51      0.61      0.55       652
           6       0.61      0.40      0.48       621
           7       0.52      0.40      0.45       484
           8       0.80      0.88      0.84       586
           9       0.89      0.83      0.86       618
          10       0.57

In [134]:
w500_o50_features_pipeline.run_gaussian_naive_bias_classifier_model()

==== Grid Search: =====
Best parameters found:  {'var_smoothing': 1e-09}
Best score found:  0.29491885476932206

==== Random Search: =====
Best parameters found:  {'var_smoothing': 1e-09}
Best score found:  0.29491885476932206
4. Gaussian Naive Bias Classifier metrics: 
              precision    recall  f1-score   support

           0       0.40      0.27      0.32       621
           1       0.30      0.17      0.22       667
           2       0.38      0.07      0.12       633
           3       0.17      0.79      0.28       651
           4       0.66      0.75      0.70       500
           5       0.15      0.12      0.13       652
           6       0.35      0.09      0.14       621
           7       0.26      0.14      0.19       484
           8       0.60      0.72      0.66       586
           9       0.32      0.23      0.27       618
          10       0.31      0.10      0.16       523
          11       0.30      0.38      0.33       610
          12       0.20   

In [135]:
w500_o50_features_pipeline.run_knn_classifier_model()

==== Grid Search: =====
Best parameters found:  {'metric': 'minkowski', 'n_neighbors': 100, 'p': 1, 'weights': 'distance'}
Best score found:  0.43282955051179345

==== Random Search: =====
Best parameters found:  {'weights': 'distance', 'p': 1, 'n_neighbors': 100, 'metric': 'minkowski'}
Best score found:  0.43282955051179345
6. K-Nearest Neighbors metrics: 
              precision    recall  f1-score   support

           0       0.54      0.40      0.46       621
           1       0.61      0.41      0.49       667
           2       0.52      0.22      0.31       633
           3       0.40      0.76      0.52       651
           4       0.73      0.71      0.72       500
           5       0.32      0.40      0.35       652
           6       0.64      0.22      0.33       621
           7       0.45      0.15      0.23       484
           8       0.82      0.55      0.66       586
           9       0.75      0.72      0.73       618
          10       0.46      0.34      0.39  

In [136]:
w500_o50_features_pipeline.run_support_vector_classifier_model()

==== Grid Search: =====
Best parameters found:  {'C': 100, 'gamma': 0.1, 'kernel': 'rbf'}
Best score found:  0.5542661326212729

==== Random Search: =====
Best parameters found:  {'kernel': 'rbf', 'gamma': 0.1, 'C': 100}
Best score found:  0.5542661326212729
5. Support Vector Classifier metrics: 
              precision    recall  f1-score   support

           0       0.46      0.53      0.49       621
           1       0.69      0.59      0.64       667
           2       0.37      0.28      0.32       633
           3       0.51      0.71      0.59       651
           4       0.84      0.86      0.85       500
           5       0.49      0.45      0.47       652
           6       0.35      0.32      0.34       621
           7       0.43      0.40      0.42       484
           8       0.56      0.85      0.68       586
           9       0.80      0.75      0.78       618
          10       0.54      0.49      0.51       523
          11       0.56      0.44      0.49       610

In [137]:
w500_o50_features_pipeline.run_ada_boost_classifier_model()

==== Grid Search: =====
Best parameters found:  {'estimator': DecisionTreeClassifier(max_depth=3), 'learning_rate': 0.5, 'n_estimators': 200}
Best score found:  0.39500667556742325

==== Random Search: =====
Best parameters found:  {'n_estimators': 200, 'learning_rate': 0.5, 'estimator': DecisionTreeClassifier(max_depth=3)}
Best score found:  0.37721940364931017
7. Ada Boost Classifier metrics: 
              precision    recall  f1-score   support

           0       0.37      0.62      0.47       621
           1       0.43      0.38      0.41       667
           2       0.37      0.21      0.27       633
           3       0.27      0.39      0.32       651
           4       0.91      0.47      0.62       500
           5       0.24      0.18      0.21       652
           6       0.23      0.19      0.21       621
           7       0.24      0.17      0.19       484
           8       0.56      0.43      0.48       586
           9       0.55      0.59      0.57       618
      

In [138]:
w500_o50_features_pipeline.run_xg_boost_classifier_model()

==== Grid Search: =====
Best parameters found:  {'gamma': 0, 'learning_rate': 0.1, 'n_estimators': 200, 'subsample': 0.8}
Best score found:  0.6352226672600504

==== Random Search: =====
Best parameters found:  {'subsample': 0.8, 'n_estimators': 200, 'learning_rate': 0.1, 'gamma': 0}
Best score found:  0.6352226672600504
8. XG Boost Classifier metrics: 
              precision    recall  f1-score   support

           0       0.58      0.69      0.63       621
           1       0.71      0.70      0.71       667
           2       0.56      0.44      0.49       633
           3       0.64      0.74      0.69       651
           4       0.91      0.92      0.91       500
           5       0.51      0.60      0.55       652
           6       0.54      0.40      0.46       621
           7       0.49      0.44      0.46       484
           8       0.80      0.88      0.84       586
           9       0.87      0.86      0.87       618
          10       0.57      0.55      0.56      

In [180]:
w500_o50_features_pipeline.run_gradient_boost_classifier_model()

==== Grid Search: =====
Best parameters found:  {'learning_rate': 0.1, 'max_depth': 5, 'n_estimators': 50, 'random_state': 42}
Best score found:  0.592526924788607

==== Random Search: =====
Best parameters found:  {'random_state': 42, 'n_estimators': 50, 'max_depth': 5, 'learning_rate': 0.1}
Best score found:  0.592526924788607
9. Gradient Boost Classifier metrics: 
              precision    recall  f1-score   support

           0       0.53      0.61      0.57       621
           1       0.62      0.63      0.62       667
           2       0.48      0.39      0.43       633
           3       0.61      0.71      0.65       651
           4       0.90      0.87      0.88       500
           5       0.43      0.56      0.49       652
           6       0.45      0.36      0.40       621
           7       0.46      0.36      0.40       484
           8       0.74      0.84      0.79       586
           9       0.87      0.81      0.84       618
          10       0.52      0.51  

In [181]:
w500_o25_features_pipeline.run_ann_model()

==== Grid Search: =====
Best parameters found:  {'batch_size': 50, 'epochs': 150, 'model__activation': 'relu', 'model__n_neurons': 64}
Best score found:  0.5112723394031561

==== Random Search: =====
Best parameters found:  {'model__n_neurons': 64, 'model__activation': 'relu', 'epochs': 150, 'batch_size': 50}
Best score found:  0.5092948286377353
10. Artificial Neuralnet metrics: 
              precision    recall  f1-score   support

           0       0.44      0.42      0.43       416
           1       0.55      0.61      0.58       448
           2       0.38      0.27      0.31       418
           3       0.65      0.56      0.60       437
           4       0.83      0.82      0.83       348
           5       0.33      0.46      0.38       438
           6       0.33      0.35      0.34       408
           7       0.37      0.38      0.37       325
           8       0.74      0.75      0.74       394
           9       0.78      0.80      0.79       414
          10       0.

# Bonus Points

# Feature extraction using TS Fresh with 100 features

In [15]:
sensore_meta = pd.read_csv("meta_data_with_path.csv")

sensore_meta.head()

Unnamed: 0,path,file_name,exp_id,sensor,frequency,activity_id,activity,activity_details
0,DataSet2/User25/316_MetaWear_2021-08-01T14.52....,316_MetaWear_2021-08-01T14.52.00.189_EB942CED9...,316,Accelerometer,100.000Hz,5,While sitting,"Moving head, body"
1,DataSet2/User25/321_MetaWear_2021-08-01T15.09....,321_MetaWear_2021-08-01T15.09.34.379_EB942CED9...,321,Accelerometer,100.000Hz,11,Standing,Taking stairs
2,DataSet2/User25/318_MetaWear_2021-08-01T14.59....,318_MetaWear_2021-08-01T14.59.48.670_EB942CED9...,318,Gyroscope,100.000Hz,7,Sitting,Stand up from sitting
3,DataSet2/User25/316_MetaWear_2021-08-01T14.52....,316_MetaWear_2021-08-01T14.52.00.189_EB942CED9...,316,Gyroscope,100.000Hz,5,While sitting,"Moving head, body"
4,DataSet2/User25/315_MetaWear_2021-08-01T14.48....,315_MetaWear_2021-08-01T14.48.37.034_EB942CED9...,315,Gyroscope,100.000Hz,4,Using computer,Browsing


In [16]:
def extract_feature(window_size, overlap):
    step_size = int(math.ceil(window_size * (1 - overlap)))

    window_list = []
    activity_list = []
    id = 0

    for exp_id in sensore_meta['exp_id'].value_counts().index:
        temp_df = sensore_meta[sensore_meta['exp_id'] == exp_id]
        
        acc_df = pd.read_csv(temp_df[temp_df['sensor'] == 'Accelerometer'].iloc[0 , 0])
        acc_df.columns = ['epoch', 'time', 'elapsed', 'x', 'y', 'z']
        acc_df = acc_df[['x', 'y', 'z']]

        gyro_df = pd.read_csv(temp_df[temp_df['sensor'] == 'Gyroscope'].iloc[0 , 0])
        gyro_df.columns = ['epoch', 'time', 'elapsed', 'x', 'y', 'z']
        gyro_df = gyro_df[['x', 'y', 'z']]

        if gyro_df.isna().any().any():
            print(temp_df[temp_df['sensor'] == 'Gyroscope'].iloc[0 , 0])

        for i in range(0, len(acc_df) - window_size, step_size):
            activity_id = temp_df[temp_df['sensor'] == 'Accelerometer'].iloc[0, 5]

            window_acc_data = acc_df.iloc[i : i + window_size].copy()
            window_acc_data['id'] = id
            window_list.append(window_acc_data)
            activity_list.append({'id': id, 'activity_id' : activity_id})
            id += 1

            window_gyro_data = gyro_df.iloc[i : i + window_size].copy()
            window_gyro_data['id'] = id
            window_list.append(window_gyro_data)
            activity_list.append({'id': id, 'activity_id' : activity_id})
            id += 1

    window_df = pd.concat(window_list)
    activiteis =  pd.DataFrame(activity_list)

    # TS Fresh part
    extraction_df = window_df.melt(id_vars=['id'], 
                                   value_vars=['x', 'y', 'z'], 
                                   var_name='axis', 
                                   value_name='value')
    
    comprehensive_settings = ComprehensiveFCParameters()
    first_100_features = dict(list(comprehensive_settings.items())[:100])

    features = extract_features(extraction_df, 
                                column_id='id', 
                                column_kind='axis', 
                                column_value='value', 
                                default_fc_parameters=ComprehensiveFCParameters())
    
    features = impute(features)
    
    features.index.name = 'id'  
    features = features.reset_index()
    features = features.merge(activiteis, on='id')
    features.drop(columns = ['id'])

    print(f"Window size: {window_size}, Overlap: {overlap*100} Step size: {step_size}, Shape: {features.shape}")
    
    features.to_csv(f"tsfresh_features/w{window_size}_o{int(overlap*100)}_ts_features.csv")


In [4]:
df = extract_feature(400, 0.25)

Feature Extraction: 100%|██████████| 20/20 [26:42<00:00, 80.15s/it]  
 'x__query_similarity_count__query_None__threshold_0.0'
 'y__query_similarity_count__query_None__threshold_0.0'] did not have any finite values. Filling with zeros.


Window size: 400, Overlap: 25.0 Step size: 300, Shape: (19146, 2351)


In [5]:
df = extract_feature(400, 0.50)

Feature Extraction: 100%|██████████| 20/20 [46:04<00:00, 138.23s/it] 
 'y__query_similarity_count__query_None__threshold_0.0'
 'z__query_similarity_count__query_None__threshold_0.0'] did not have any finite values. Filling with zeros.


Window size: 400, Overlap: 50.0 Step size: 200, Shape: (28654, 2351)


In [5]:
df = extract_feature(500, 0.25)

Feature Extraction: 100%|██████████| 20/20 [2:10:04<00:00, 390.24s/it]  
 'x__query_similarity_count__query_None__threshold_0.0'
 'y__query_similarity_count__query_None__threshold_0.0'] did not have any finite values. Filling with zeros.


Window size: 500, Overlap: 25.0 Step size: 375, Shape: (15287, 2351)


In [17]:
df = extract_feature(500, 0.50)

Feature Extraction: 100%|██████████| 20/20 [2:31:44<00:00, 455.23s/it]  
 'z__query_similarity_count__query_None__threshold_0.0'
 'x__query_similarity_count__query_None__threshold_0.0'] did not have any finite values. Filling with zeros.


Window size: 500, Overlap: 50.0 Step size: 250, Shape: (22789, 2351)


# Utils

In [43]:
def get_features_and_target(features):
    X = features.iloc[:, :-1]
    y = features.iloc[:, -1]

    return X, y

# PCA

In [2]:
def pca(features):

    pca = PCA()
    pca.fit(features)

    cumulative_variance = np.cumsum(pca.explained_variance_ratio_)

    n_components_95 = np.argmax(cumulative_variance >= 0.95) + 1

    print(f"Number of components to retain 95% variance: {n_components_95}")

    pca = PCA(n_components=n_components_95)
    reduced_data = pca.fit_transform(features)

    reduced_feaure = pd.DataFrame(reduced_data, columns=[f'PC{i+1}' for i in range(n_components_95)])
    print("\nReduced DataFrame (95% Variance):", reduced_feaure.head())

    return reduced_feaure

In [None]:
def pca_runner(window, overlap):
    feature_df = pd.read_csv(f"tsfresh_features/w{window}_o{int(overlap*100)}_ts_features.csv")
    X, y = get_features_and_target(feature_df)

    print("No of features before PCA: ", X.shape[1])

    reduced_X = pca(X)

    print("No of features after PCA: ", reduced_X.shape[1]) 

    pca_features = pd.concat([X, y], axis=1)
    pca_features.to_csv(f"pca_features/w{window}_o{int(overlap*100)}_ts_features.csv")

    return reduced_X, y

In [12]:
pca_runner(400, .25)

No of features before PCA:  2351
Number of components to retain 95% variance: 5

Reduced DataFrame (95% Variance):              PC1            PC2            PC3            PC4           PC5
0 -182636.840740    1847.579209   -6154.450598   -4681.275295 -10097.112419
1  346160.716696  -49250.942623 -200718.783607  183717.890786  88629.495596
2 -182668.412868    1843.101291   -6148.620943   -4674.769134  -9994.862707
3  641459.048327 -236067.619165 -217834.381500 -453299.899325   7916.460309
4 -182663.679495    1848.544904   -6136.220128   -4672.474060 -10004.588755
No of features after PCA:  5


(                 PC1            PC2            PC3            PC4  \
 0     -182636.840740    1847.579209   -6154.450598   -4681.275295   
 1      346160.716696  -49250.942623 -200718.783607  183717.890786   
 2     -182668.412868    1843.101291   -6148.620943   -4674.769134   
 3      641459.048327 -236067.619165 -217834.381500 -453299.899325   
 4     -182663.679495    1848.544904   -6136.220128   -4672.474060   
 ...              ...            ...            ...            ...   
 19141  117209.134172  -55285.384041  -58169.061766  -70929.136420   
 19142 -182644.208017    1818.496912   -6239.210038   -4675.939465   
 19143  113409.131086  -60077.873600 -104363.052769  -71046.130628   
 19144 -182648.629947    1823.393882   -6227.750808   -4673.719436   
 19145   -3178.661641   -4124.933257   80379.757563   19200.047356   
 
                 PC5  
 0     -10097.112419  
 1      88629.495596  
 2      -9994.862707  
 3       7916.460309  
 4     -10004.588755  
 ...             ...

In [18]:
X, y = pca_runner(400, .50)

No of features before PCA:  2351
Number of components to retain 95% variance: 5

Reduced DataFrame (95% Variance):              PC1            PC2            PC3            PC4           PC5
0 -182204.707995    3018.840568   -6656.307279   -4473.353359  -9798.131622
1  347336.596265  -52595.639940 -210326.608892  167238.916436  94841.938043
2 -182225.187519    3032.163797   -6612.788804   -4458.724429  -9729.639579
3  646652.286346 -227175.469397 -111506.919269 -454162.186049  86400.946347
4 -182246.222419    3023.008354   -6622.466487   -4459.765386  -9663.413603
No of features after PCA:  5


In [19]:
X, y = pca_runner(500, .25)

No of features before PCA:  2351
Number of components to retain 95% variance: 5

Reduced DataFrame (95% Variance):              PC1            PC2            PC3            PC4           PC5
0 -224990.619470  -11329.295683    7682.962310   -3216.781503 -11572.962831
1  940047.750019 -520347.738748  120474.789752 -166855.399910  84881.735137
2 -225034.177711  -11302.317823    7655.314948   -3202.744993 -11431.564745
3  967258.338221 -529341.527070  140516.157031 -206662.245057 -42907.142620
4 -224994.070330  -11357.261687    7683.556676   -3209.778429 -11486.934250
No of features after PCA:  5


In [20]:
X, y = pca_runner(500, .25)

No of features before PCA:  2351
Number of components to retain 95% variance: 5

Reduced DataFrame (95% Variance):              PC1            PC2            PC3            PC4           PC5
0 -224990.619470  -11329.295683    7682.962310   -3216.781503 -11572.962831
1  940047.750019 -520347.738748  120474.789752 -166855.399910  84881.735137
2 -225034.177711  -11302.317823    7655.314948   -3202.744993 -11431.564745
3  967258.338221 -529341.527070  140516.157031 -206662.245057 -42907.142620
4 -224994.070330  -11357.261687    7683.556676   -3209.778429 -11486.934250
No of features after PCA:  5


In [133]:
w500_o25_pipeline = ModelEvaluationPipeline("features/w500_o25_features.csv")

In [134]:
w500_o25_pipeline.run_lstm_model()

Model Summary:


Epoch 1/50
[1m170/170[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.1120 - loss: 2.6368 - val_accuracy: 0.2003 - val_loss: 2.2956
Epoch 2/50
[1m170/170[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 976us/step - accuracy: 0.1998 - loss: 2.2556 - val_accuracy: 0.2881 - val_loss: 2.0736
Epoch 3/50
[1m170/170[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 964us/step - accuracy: 0.2451 - loss: 2.1144 - val_accuracy: 0.3510 - val_loss: 1.9378
Epoch 4/50
[1m170/170[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 956us/step - accuracy: 0.3010 - loss: 1.9793 - val_accuracy: 0.3560 - val_loss: 1.8277
Epoch 5/50
[1m170/170[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 969us/step - accuracy: 0.3264 - loss: 1.9222 - val_accuracy: 0.3709 - val_loss: 1.7567
Epoch 6/50
[1m170/170[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 962us/step - accuracy: 0.3398 - loss: 1.8707 - val_accuracy: 0.3775 - val_loss: 1.7217
Epoch 7/50
[1m170

In [135]:
w500_o25_pipeline.run_cnn_model()

Model Summary:


Epoch 1/50
[1m170/170[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.1570 - loss: 2.5048 - val_accuracy: 0.3096 - val_loss: 2.0833
Epoch 2/50
[1m170/170[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.2945 - loss: 2.0825 - val_accuracy: 0.3725 - val_loss: 1.8437
Epoch 3/50
[1m170/170[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.3779 - loss: 1.8585 - val_accuracy: 0.4321 - val_loss: 1.7116
Epoch 4/50
[1m170/170[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.4100 - loss: 1.7110 - val_accuracy: 0.4586 - val_loss: 1.5959
Epoch 5/50
[1m170/170[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.4448 - loss: 1.6236 - val_accuracy: 0.4652 - val_loss: 1.5279
Epoch 6/50
[1m170/170[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.4693 - loss: 1.5513 - val_accuracy: 0.4950 - val_loss: 1.4629
Epoch 7/50
[1m170/170[0m 

In [136]:
w500_o25_pipeline.run_rnn_model()

Model Summary:


Epoch 1/50
[1m170/170[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.1138 - loss: 2.7187 - val_accuracy: 0.2682 - val_loss: 2.2295
Epoch 2/50
[1m170/170[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 748us/step - accuracy: 0.1960 - loss: 2.3633 - val_accuracy: 0.3228 - val_loss: 2.0672
Epoch 3/50
[1m170/170[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 750us/step - accuracy: 0.2285 - loss: 2.2291 - val_accuracy: 0.3493 - val_loss: 1.9722
Epoch 4/50
[1m170/170[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 743us/step - accuracy: 0.2840 - loss: 2.1180 - val_accuracy: 0.3675 - val_loss: 1.9030
Epoch 5/50
[1m170/170[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 768us/step - accuracy: 0.2830 - loss: 2.0889 - val_accuracy: 0.3957 - val_loss: 1.8514
Epoch 6/50
[1m170/170[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 747us/step - accuracy: 0.3066 - loss: 2.0126 - val_accuracy: 0.4073 - val_loss: 1.8124
Epoch 7/50
[1m170