In [1]:
import os

import numpy as np
import matplotlib.pyplot as plt

from PIL import Image
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.svm import SVC

from sklearn.decomposition import PCA

from utils.data import CLASS_NAMES, read_dataset
from utils.image_preprocessing import preprocess_dataset

%matplotlib inline
plt.rcParams['figure.figsize'] = [10, 5]

# Clear dataset (no augmentation)

In [None]:
X, y = read_dataset('./data/rockpaperscissors/')

In [47]:
X = preprocess_dataset(X)

In [48]:
TRAIN_SIZE = int(0.8 * y.shape[0])
X_train, y_train = X[:TRAIN_SIZE], y[:TRAIN_SIZE]
X_test, y_test = X[TRAIN_SIZE:], y[TRAIN_SIZE:]

## On full images

### KNN

In [49]:
knn_classifier = KNeighborsClassifier(n_neighbors=7, n_jobs=-1)
knn_classifier.fit(X_train, y_train)
knn_classifier.score(X_test, y_test)

0.91324200913242

### Decision Tree

In [6]:
decision_tree_classifier = DecisionTreeClassifier()
decision_tree_classifier.fit(X_train, y_train)
decision_tree_classifier.score(X_test, y_test)

0.8401826484018264

### Random Forest

In [7]:
random_forest_classifier = RandomForestClassifier(n_estimators=100, n_jobs=-1)
random_forest_classifier.fit(X_train, y_train)
random_forest_classifier.score(X_test, y_test)

0.9474885844748858

### XGBoost

In [8]:
# We need more RAM for this!
# xgb_classifier = XGBClassifier(n_estimators=10, max_depth=5)
# xgb_classifier.fit(X_train, y_train)
# xgb_classifier.score(X_test, y_test)

### Support Vector Machines

In [9]:
svm_classifier = SVC(kernel='linear', random_state=0)
svm_classifier.fit(X_train, y_train)
svm_classifier.score(X_test, y_test)

0.6872146118721462

## With PCA

In [50]:
pca_model = PCA(n_components=300)
X_pca = pca_model.fit_transform(X)

In [51]:
X_pca_train = X_pca[:TRAIN_SIZE]
X_pca_test = X_pca[TRAIN_SIZE:]

### KNN

In [52]:
knn_classifier_pca = KNeighborsClassifier(n_neighbors=7, n_jobs=-1)
knn_classifier_pca.fit(X_pca_train, y_train)
knn_classifier_pca.score(X_pca_test, y_test)

0.9155251141552512

### Decision Tree

In [13]:
decision_tree_classifier_pca = DecisionTreeClassifier()
decision_tree_classifier_pca.fit(X_pca_train, y_train)
decision_tree_classifier_pca.score(X_pca_test, y_test)

0.7397260273972602

### Random Forest

In [14]:
random_forest_classifier_pca = RandomForestClassifier(n_estimators=100, n_jobs=-1)
random_forest_classifier_pca.fit(X_pca_train, y_train)
random_forest_classifier_pca.score(X_pca_test, y_test)

0.9018264840182648

### XGBoost

In [15]:
xgb_classifier_pca = XGBClassifier(n_estimators=1000, max_depth=15)
xgb_classifier_pca.fit(X_pca_train, y_train)
xgb_classifier_pca.score(X_pca_test, y_test)

0.910958904109589

### Support Vector Machines

In [16]:
svm_classifier_pca = SVC(kernel='linear', random_state=0)
svm_classifier_pca.fit(X_pca_train, y_train)
svm_classifier_pca.score(X_pca_test, y_test)

0.680365296803653

# Results for augmented data

In [None]:
X, y = read_dataset('./data/rockpaperscissors/', n_aug=3)

In [18]:
X = preprocess_dataset(X)

In [19]:
TRAIN_SIZE = int(0.8 * y.shape[0])
X_train, y_train = X[:TRAIN_SIZE], y[:TRAIN_SIZE]
X_test, y_test = X[TRAIN_SIZE:], y[TRAIN_SIZE:]

## On full images

### KNN

In [20]:
knn_classifier = KNeighborsClassifier(n_neighbors=7, n_jobs=-1)
knn_classifier.fit(X_train, y_train)
knn_classifier.score(X_test, y_test)

0.6858937749857225

### Decision Tree

In [21]:
decision_tree_classifier = DecisionTreeClassifier()
decision_tree_classifier.fit(X_train, y_train)
decision_tree_classifier.score(X_test, y_test)

0.6493432324386065

### Random Forest

In [22]:
random_forest_classifier = RandomForestClassifier(n_estimators=100, n_jobs=-1)
random_forest_classifier.fit(X_train, y_train)
random_forest_classifier.score(X_test, y_test)

0.8640776699029126

### XGBoost

In [23]:
# We need more RAM for this!
# xgb_classifier = XGBClassifier(n_estimators=10, max_depth=5)
# xgb_classifier.fit(X_train, y_train)
# xgb_classifier.score(X_test, y_test)

### Support Vector Machines

In [24]:
svm_classifier = SVC(kernel='linear', random_state=0)
svm_classifier.fit(X_train, y_train)
svm_classifier.score(X_test, y_test)

0.4386065105653912

## With PCA

In [25]:
pca_model = PCA(n_components=300)
X_pca = pca_model.fit_transform(X)

In [26]:
X_pca_train = X_pca[:TRAIN_SIZE]
X_pca_test = X_pca[TRAIN_SIZE:]

### KNN

In [27]:
knn_classifier_pca = KNeighborsClassifier(n_neighbors=7, n_jobs=-1)
knn_classifier_pca.fit(X_pca_train, y_train)
knn_classifier_pca.score(X_pca_test, y_test)

0.6978869217589948

### Decision Tree

In [28]:
decision_tree_classifier_pca = DecisionTreeClassifier()
decision_tree_classifier_pca.fit(X_pca_train, y_train)
decision_tree_classifier_pca.score(X_pca_test, y_test)

0.5859508852084523

### Random Forest

In [29]:
random_forest_classifier_pca = RandomForestClassifier(n_estimators=100, n_jobs=-1)
random_forest_classifier_pca.fit(X_pca_train, y_train)
random_forest_classifier_pca.score(X_pca_test, y_test)

0.7692747001713307

### XGBoost

In [30]:
xgb_classifier_pca = XGBClassifier(n_estimators=1000, max_depth=15)
xgb_classifier_pca.fit(X_pca_train, y_train)
xgb_classifier_pca.score(X_pca_test, y_test)

0.8726442033123929

### Support Vector Machines

In [31]:
svm_classifier_pca = SVC(kernel='linear', random_state=0)
svm_classifier_pca.fit(X_pca_train, y_train)
svm_classifier_pca.score(X_pca_test, y_test)

0.45059965733866364