# Hyperparameter search

### Importing our own functions

In [1]:
import importlib

import src.Data as Data
importlib.reload(Data)
Data = Data.Data

import src.Metrics as Metrics
importlib.reload(Metrics)
Metrics = Metrics.Metrics

### Importing libraries

In [2]:
import matplotlib.pyplot as plt
from sklearn import metrics
from sklearn.model_selection import LearningCurveDisplay, learning_curve, cross_validate, train_test_split, cross_val_predict
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
import numpy as np
import pandas as pd

### Loading the data

In [3]:
data: Data = Data(test_size=0.2, include_images=True)

least_populated_class_count = np.unique(data.y_train, return_counts=True)[1].min()
print("Least populated class count:", least_populated_class_count)
print("This is the maximum valid number of folds for cross validation.")

Least populated class count: 8
This is the maximum valid number of folds for cross validation.


### Choosing the models

In [8]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import AdaBoostClassifier

classifiers = [
    RandomForestClassifier(), 
    SVC(), 
    KNeighborsClassifier(), 
    GradientBoostingClassifier(), 
    AdaBoostClassifier()
]

### Getting the list of hyperparameters


In [9]:
for classifier in classifiers:
    print("Classifier:", classifier.__class__.__name__)
    print("Parameters:")
    for key in classifier.get_params():
        print("\t", key)
    print("")
    

Classifier: RandomForestClassifier
Parameters:
	 bootstrap
	 ccp_alpha
	 class_weight
	 criterion
	 max_depth
	 max_features
	 max_leaf_nodes
	 max_samples
	 min_impurity_decrease
	 min_samples_leaf
	 min_samples_split
	 min_weight_fraction_leaf
	 n_estimators
	 n_jobs
	 oob_score
	 random_state
	 verbose
	 warm_start

Classifier: SVC
Parameters:
	 C
	 break_ties
	 cache_size
	 class_weight
	 coef0
	 decision_function_shape
	 degree
	 gamma
	 kernel
	 max_iter
	 probability
	 random_state
	 shrinking
	 tol
	 verbose

Classifier: KNeighborsClassifier
Parameters:
	 algorithm
	 leaf_size
	 metric
	 metric_params
	 n_jobs
	 n_neighbors
	 p
	 weights

Classifier: GradientBoostingClassifier
Parameters:
	 ccp_alpha
	 criterion
	 init
	 learning_rate
	 loss
	 max_depth
	 max_features
	 max_leaf_nodes
	 min_impurity_decrease
	 min_samples_leaf
	 min_samples_split
	 min_weight_fraction_leaf
	 n_estimators
	 n_iter_no_change
	 random_state
	 subsample
	 tol
	 validation_fraction
	 verbose
	 warm_

### Chosing the hyperparameters to tune

In [10]:
param_grids = []

# RandomForestClassifier
param_grid = {
    "n_estimators": [10, 50, 100, 200, 500],
    "criterion": ["gini", "entropy"],
    "min_samples_split": [2, 5, 10],
    "max_features": ["sqrt", "log2"]
}
if "RandomForestClassifier" in [classifier.__class__.__name__ for classifier in classifiers]:
    param_grids.append(param_grid)

# SVC
param_grid = {
    "kernel": ["linear", "poly", "rbf", "sigmoid"],
    "C": [0.1, 0.5, 2, 5, 10, 20, 50, 100, 200, 500, 1000],
    "gamma": ["scale", "auto"]
}
if "SVC" in [classifier.__class__.__name__ for classifier in classifiers]:
    param_grids.append(param_grid)

# KNeighborsClassifier
param_grid = {
    "n_neighbors": [1, 2, 5, 10],
    "weights": ["uniform", "distance"],
    "algorithm": ["auto", "ball_tree", "kd_tree", "brute"],
    "leaf_size": [1, 2, 5, 10, 20, 30, 50],
    "p": [1, 2]
}
if "KNeighborsClassifier" in [classifier.__class__.__name__ for classifier in classifiers]:
    param_grids.append(param_grid)

# GradientBoostingClassifier
param_grid = {
    "learning_rate": [0.005, 0.01, 0.025, 0.05, 0.1, 0.5],
    "n_estimators": [100, 500], 
    "criterion": ["friedman_mse", "squared_error"],
    "max_depth": [1, 2, 3, 5, 10],
    "min_samples_split": [2, 5, 10, 15, 20],
    "max_features": ["sqrt", "log2"]
}
if "GradientBoostingClassifier" in [classifier.__class__.__name__ for classifier in classifiers]:
    param_grids.append(param_grid)

# AdaBoostClassifier
param_grid = {
    "n_estimators": [50, 200, 500],
    "learning_rate": [0.001, 0.01, 0.1, 0.5],
    "algorithm": ["SAMME", "SAMME.R"]
}
if "AdaBoostClassifier" in [classifier.__class__.__name__ for classifier in classifiers]:
    param_grids.append(param_grid)
