# Hyperparameter search

### Importing our own functions

In [38]:
import importlib

import src.Data as Data
importlib.reload(Data)
Data = Data.Data

import src.Metrics as Metrics
importlib.reload(Metrics)
Metrics = Metrics.Metrics

### Importing libraries

In [39]:
import matplotlib.pyplot as plt
from sklearn import metrics
from sklearn.model_selection import LearningCurveDisplay, learning_curve, cross_validate, train_test_split, cross_val_predict
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
import numpy as np
import pandas as pd

### Loading the data

In [40]:
data: Data = Data(test_size=0.2, include_images=False)

least_populated_class_count = np.unique(data.y_train, return_counts=True)[1].min()
print("Least populated class count:", least_populated_class_count)
print("This is the maximum valid number of folds for cross validation.")

Least populated class count: 8
This is the maximum valid number of folds for cross validation.


### Choosing the models

In [41]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, BaggingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import SGDClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import AdaBoostClassifier

classifiers = [
    # DecisionTreeClassifier(), 
    # RandomForestClassifier(n_jobs=-1), 
    # BaggingClassifier(n_jobs=-1), 
    # LogisticRegression(n_jobs=-1), 
    # SVC(), 
    # GaussianNB(), 
    # SGDClassifier(n_jobs=-1), 
    # KNeighborsClassifier(n_jobs=-1), 
    GradientBoostingClassifier(), 
    # MLPClassifier(), 
    AdaBoostClassifier()
]

### Getting the list of hyperparameters


In [42]:
for classifier in classifiers:
    print("Classifier:", classifier.__class__.__name__)
    print("Parameters:")
    for key in classifier.get_params():
        print("\t", key)
    print("")
    

Classifier: GradientBoostingClassifier
Parameters:
	 ccp_alpha
	 criterion
	 init
	 learning_rate
	 loss
	 max_depth
	 max_features
	 max_leaf_nodes
	 min_impurity_decrease
	 min_samples_leaf
	 min_samples_split
	 min_weight_fraction_leaf
	 n_estimators
	 n_iter_no_change
	 random_state
	 subsample
	 tol
	 validation_fraction
	 verbose
	 warm_start

Classifier: AdaBoostClassifier
Parameters:
	 algorithm
	 base_estimator
	 estimator
	 learning_rate
	 n_estimators
	 random_state



### Chosing the hyperparameters to tune

In [43]:
param_grids = []

# DecisionTreeClassifier
param_grid = {
    "criterion": ["gini", "entropy"],
    "splitter": ["best", "random"],
    "max_depth": [None, 5, 10, 20, 50, 100],
    "min_samples_split": [2, 5, 10],
    "min_samples_leaf": [1, 2, 5, 10],
    "max_features": ["sqrt", "log2"]
}
if "DecisionTreeClassifier" in [classifier.__class__.__name__ for classifier in classifiers]:
    param_grids.append(param_grid)

# RandomForestClassifier
param_grid = {
    "n_estimators": [10, 50, 100, 200, 500],
    "criterion": ["gini", "entropy"],
    "min_samples_split": [2, 5, 10],
    "max_features": ["sqrt", "log2"]
}
if "RandomForestClassifier" in [classifier.__class__.__name__ for classifier in classifiers]:
    param_grids.append(param_grid)

# BaggingClassifier
param_grid = {
    "n_estimators": [10, 20, 50, 100],
    "max_samples": [0.1, 0.5, 1.0],
    "max_features": [0.1, 0.5, 1.0],
    "bootstrap": [True, False],
    "bootstrap_features": [True, False]
}
if "BaggingClassifier" in [classifier.__class__.__name__ for classifier in classifiers]:
    param_grids.append(param_grid)

# LogisticRegression
param_grid = {
    "penalty": ["l1", "l2", "elasticnet"],
    "C": [0.1, 0.5, 2, 5, 10, 20, 50, 100, 200, 500, 1000],
    "solver": ["newton-cg", "lbfgs", "liblinear", "sag", "saga"],
    "max_iter": [100, 200, 500]
}
if "LogisticRegression" in [classifier.__class__.__name__ for classifier in classifiers]:
    param_grids.append(param_grid)

# SVC
param_grid = {
    "kernel": ["linear", "poly", "rbf", "sigmoid"],
    "C": [0.1, 0.5, 2, 5, 10, 20, 50, 100, 200, 500, 1000],
    "gamma": ["scale", "auto"]
}
if "SVC" in [classifier.__class__.__name__ for classifier in classifiers]:
    param_grids.append(param_grid)

# GaussianNB
param_grid = {
    "var_smoothing": [1e-07, 1e-06, 1e-05, 1e-04, 1e-03, 0.005, 0.01, 0.02, 0.05, 0.075, 0.1]
}
if "GaussianNB" in [classifier.__class__.__name__ for classifier in classifiers]:
    param_grids.append(param_grid)

# SGDClassifier
param_grid = {
    "loss": ["hinge", "log", "modified_huber", "squared_hinge", "perceptron"],
    "penalty": ["l1", "l2", "elasticnet"],
    "alpha": [0.00001, 0.0001, 0.001, 0.01],
    "max_iter": [1000, 2000, 5000, 10000],
}
if "SGDClassifier" in [classifier.__class__.__name__ for classifier in classifiers]:
    param_grids.append(param_grid)

# KNeighborsClassifier
param_grid = {
    "n_neighbors": [1, 2, 5, 10],
    "weights": ["uniform", "distance"],
    "algorithm": ["auto", "ball_tree", "kd_tree", "brute"],
    "leaf_size": [1, 2, 5, 10, 20, 30, 50],
    "p": [1, 2]
}
if "KNeighborsClassifier" in [classifier.__class__.__name__ for classifier in classifiers]:
    param_grids.append(param_grid)

# GradientBoostingClassifier
param_grid = {
    "learning_rate": [0.005, 0.01, 0.025, 0.05, 0.1, 0.5],
    "n_estimators": [100, 500], 
    "criterion": ["friedman_mse", "squared_error"],
    "max_depth": [1, 2, 3, 5, 10],
    "min_samples_split": [2, 5, 10, 15, 20],
    "max_features": ["sqrt", "log2"]
}
if "GradientBoostingClassifier" in [classifier.__class__.__name__ for classifier in classifiers]:
    param_grids.append(param_grid)

# MLPClassifier
param_grid = {
    "hidden_layer_sizes":  [(20,20,), (100,), (200,), (500,)],
    "activation": ["logistic", "tanh", "relu"],
    "solver": ["lbfgs", "sgd"],
    "alpha": [0.00001, 0.0001, 0.001, 0.01] ,
    "learning_rate": ["constant", "adaptive"],
    "max_iter": [200, 500]
}
if "MLPClassifier" in [classifier.__class__.__name__ for classifier in classifiers]:
    param_grids.append(param_grid)

# AdaBoostClassifier
param_grid = {
    "n_estimators": [50, 200, 500],
    "learning_rate": [0.001, 0.01, 0.1, 0.5],
    "algorithm": ["SAMME", "SAMME.R"]
}
if "AdaBoostClassifier" in [classifier.__class__.__name__ for classifier in classifiers]:
    param_grids.append(param_grid)
