In [None]:
import numpy as np
import pandas as pd
import cv2

from sklearn.svm import LinearSVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split

import os

In [None]:
shuffle = True
random_state = 42
n_splits = 5
n_jobs = -1

In [None]:
def ImagePreprocessing(files : str) -> np.array:
    images = []
    for i in os.listdir(files):
        img = cv2.imread(files + i)
        cropped_image = img[35:70, 420:480]
        cv2.cvtColor(cropped_image, cv2.COLOR_BGR2GRAY)
        cropped_image = np.array(cropped_image, dtype=float)
        cropped_image /= 255
        cropped_image = cv2.resize(cropped_image, (10, 10), interpolation=cv2.INTER_AREA)
        images.append(cropped_image.reshape(-1))
        
    return np.array(images)

In [None]:
down_file = "../input/aleksandra-nevskogo/down/"
mov_file = "../input/aleksandra-nevskogo/mov/"
up_file = "../input/aleksandra-nevskogo/up/"

down_images  = ImagePreprocessing(down_file)
down_labels = np.zeros(len(down_images))

mov_images = ImagePreprocessing(mov_file)
mov_labels = np.ones(len(mov_images))

up_images = ImagePreprocessing(up_file)
up_labels = 2 * np.ones(len(up_images))

X = np.concatenate((down_images, mov_images, up_images), axis=0)
y = np.concatenate((down_labels, mov_labels, up_labels), axis=0)

In [None]:
skf = StratifiedKFold(n_splits=n_splits, random_state=random_state, shuffle=shuffle)

param_grid_SVC = {"penalty" : ["l1", "l2"], "C" : [1, 0.01, 0.001, 10, 3], "max_iter": [10**3, 10**4, 10**5]}
param_grid_KNN = {"n_neighbors" : [3, 4, 5, 10], "weights" : ["uniform", "distance"]}

clf_SVC = GridSearchCV(LinearSVC(random_state=random_state), param_grid=param_grid_SVC, cv=skf, n_jobs=n_jobs)
clf_KNN = GridSearchCV(KNeighborsClassifier(n_jobs=n_jobs), param_grid=param_grid_KNN, cv=skf, n_jobs=n_jobs)

In [None]:
clf_SVC.fit(X, y)
clf_KNN.fit(X, y)
print("best_params_: ", clf_SVC.best_params_, " best_score_: ", clf_SVC.best_score_)
print("best_params_: ", clf_KNN.best_params_, " best_score_: ", clf_KNN.best_score_)

75 fits failed out of a total of 150.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
75 fits failed with the following error:
Traceback (most recent call last):
  File "/opt/conda/lib/python3.7/site-packages/sklearn/model_selection/_validation.py", line 680, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/opt/conda/lib/python3.7/site-packages/sklearn/svm/_classes.py", line 272, in fit
    sample_weight=sample_weight,
  File "/opt/conda/lib/python3.7/site-packages/sklearn/svm/_base.py", line 1185, in _fit_liblinear
    solver_type = _get_liblinear_solver_type(multi_class, penalty, loss, dual)
  File "/opt/conda/lib/python3.7/site-packages/sklearn/svm/_base.py", line 1026, in _get_liblinear_solver_type
    % (

best_params_:  {'C': 10, 'max_iter': 10000, 'penalty': 'l2'}  best_score_:  0.9119794876657343
best_params_:  {'n_neighbors': 4, 'weights': 'distance'}  best_score_:  0.945726860411827


In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=random_state, shuffle=shuffle)

lsvc = LinearSVC(penalty="l2", C=10, random_state=random_state, max_iter=10000)
knn = KNeighborsClassifier(n_neighbors=4, weights="distance", n_jobs=n_jobs)

lsvc.fit(X_train, y_train)
knn.fit(X_train, y_train)

y_predict_lcvs = lsvc.predict(X_test)
y_predict_knn = knn.predict(X_test)



In [None]:
confusion_matrix(y_test, y_predict_lcvs)

array([[499,   9,   0],
       [ 33, 232,  40],
       [ 12,  14, 335]])

In [None]:
confusion_matrix(y_test, y_predict_knn)

array([[494,  13,   1],
       [ 12, 277,  16],
       [  5,  20, 336]])