In [1]:
import numpy as np
import matplotlib.pyplot as plt

from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.svm import SVC

from sklearn.decomposition import PCA

from utils.data import CLASS_NAMES, read_dataset
from utils.image_preprocessing import preprocess_dataset
from utils.hyperparameter_optimizer import optimize_hyperparameters

%matplotlib inline
plt.rcParams['figure.figsize'] = [10, 5]

# Loading data

In [None]:
X, y = read_dataset('./data/rockpaperscissors/')
X = preprocess_dataset(X, image_shape=(300,200))

In [3]:
X.shape

(2188, 60000)

In [4]:
TRAIN_SIZE = int(0.8 * y.shape[0])
X_train, y_train = X[:TRAIN_SIZE], y[:TRAIN_SIZE]
X_test, y_test = X[TRAIN_SIZE:], y[TRAIN_SIZE:]

In [5]:
X_train.shape

(1750, 60000)

# Training models

## Hyperparameter tuning

For each model we select hyperparameters that maximize mean accuracy measured by cross-validation on training set.

### KNN

In [6]:
knn_result = optimize_hyperparameters('knn', X_train, y_train, n_trials=100)
print(knn_result)

[I 2024-02-08 14:51:50,003] A new study created in memory with name: no-name-1d46a280-12d2-43b1-a833-a5e7ce043fcc
[I 2024-02-08 14:51:57,439] Trial 1 finished with value: 0.8428571428571427 and parameters: {'n_neighbors': 31}. Best is trial 1 with value: 0.8428571428571427.
[I 2024-02-08 14:51:57,472] Trial 3 finished with value: 0.8320000000000001 and parameters: {'n_neighbors': 39}. Best is trial 1 with value: 0.8428571428571427.
[I 2024-02-08 14:52:04,981] Trial 5 finished with value: 0.8537142857142858 and parameters: {'n_neighbors': 25}. Best is trial 5 with value: 0.8537142857142858.
[I 2024-02-08 14:52:06,590] Trial 10 finished with value: 0.8337142857142856 and parameters: {'n_neighbors': 37}. Best is trial 5 with value: 0.8537142857142858.
[I 2024-02-08 14:52:08,419] Trial 9 finished with value: 0.8245714285714285 and parameters: {'n_neighbors': 42}. Best is trial 5 with value: 0.8537142857142858.
[I 2024-02-08 14:52:09,446] Trial 7 finished with value: 0.8862857142857143 and 

FrozenTrial(number=24, state=1, values=[0.9074285714285715], datetime_start=datetime.datetime(2024, 2, 8, 14, 52, 19, 396659), datetime_complete=datetime.datetime(2024, 2, 8, 14, 52, 43, 792595), params={'n_neighbors': 3}, user_attrs={}, system_attrs={}, intermediate_values={}, distributions={'n_neighbors': IntDistribution(high=49, log=False, low=3, step=1)}, trial_id=24, value=None)


In [7]:
knn_result.params

{'n_neighbors': 3}

In [8]:
knn_result.value

0.9074285714285715

In [9]:
best_knn_model = KNeighborsClassifier(n_jobs=-1, **knn_result.params)
best_knn_model.fit(X_train, y_train)
best_knn_model.score(X_test, y_test)

0.9178082191780822

### Decision Tree

### Random Forest

### SVM

### XGBoost