In [42]:
import numpy as np
from os import walk
from sklearn.model_selection import GridSearchCV
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix
from sklearn.svm import LinearSVC
from sklearn.neighbors import KNeighborsClassifier
import pandas as pd

## Загрузка обработанных изображений

In [3]:
# Папка с обработанными черно-белыми изображениями
img_dir = r'C:/Users/Lenovo/Documents/cropped/'
_, _, up = next(walk(img_dir + 'up'))
_, _, down = next(walk(img_dir + 'down'))
_, _, mov = next(walk(img_dir + 'mov'))
up = [img_dir + 'up/' + filename for filename in up]
down = [img_dir + 'down/' + filename for filename in down]
mov = [img_dir + 'mov/' + filename for filename in mov]

In [4]:
# конвертация изображений в numpy-массив
X = []
for img_path in up + down + mov:
    img = Image.open(img_path)
    X.append(np.hstack(np.array(img)))
y = ['up']*len(up) + ['down']*len(down) + ['mov']*len(mov)
X = np.array(X)
y = np.array(y)

In [5]:
X.shape

(55162, 2100)

In [6]:
y

array(['up', 'up', 'up', ..., 'mov', 'mov', 'mov'], dtype='<U4')

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.01,train_size=0.1, random_state=23)

In [8]:
X_train.shape

(5516, 2100)

In [15]:
def metrics(y_true, y_pred):
    print('Accuracy :', accuracy_score(y_true, y_pred))
    print('f1 :', f1_score(y_true, y_pred,average='weighted'))

## SVM

In [10]:
model = LinearSVC()
parameters = {
    'C': np.logspace(-3,3,5),
}
grid = GridSearchCV(model, parameters, scoring='accuracy', cv=5,verbose=1,n_jobs=-1)
grid.fit(X_train, y_train);

Fitting 5 folds for each of 7 candidates, totalling 35 fits




In [11]:
grid.cv_results_

{'mean_fit_time': array([29.71152167, 27.9081543 , 27.7956553 , 29.78831682, 27.20742044,
        28.02323818, 23.76103854]),
 'std_fit_time': array([0.41279426, 0.98166096, 1.01443278, 0.83506019, 0.93305741,
        1.07711108, 2.59083579]),
 'mean_score_time': array([0.01356382, 0.01236696, 0.0159575 , 0.01456165, 0.01236725,
        0.01376362, 0.01117082]),
 'std_score_time': array([0.00149276, 0.00101719, 0.00373185, 0.00306403, 0.00079805,
        0.00270601, 0.00193391]),
 'param_C': masked_array(data=[0.001, 0.01, 0.1, 1.0, 10.0, 100.0, 1000.0],
              mask=[False, False, False, False, False, False, False],
        fill_value='?',
             dtype=object),
 'params': [{'C': 0.001},
  {'C': 0.01},
  {'C': 0.1},
  {'C': 1.0},
  {'C': 10.0},
  {'C': 100.0},
  {'C': 1000.0}],
 'split0_test_score': array([0.90307971, 0.89402174, 0.91032609, 0.90036232, 0.90036232,
        0.90036232, 0.89311594]),
 'split1_test_score': array([0.9256573 , 0.87669991, 0.88939257, 0.86944696,

In [12]:
model_best = grid.best_estimator_.fit(X_train, y_train)



In [13]:
y_pred = model_best.predict(X_test)

In [16]:
metrics(y_test, y_pred)

Accuracy : 0.9311594202898551
f1 : 0.9321487495155095


## KNN

In [17]:
model = KNeighborsClassifier()
parameters = {
    'n_neighbors': [2, 3, 5, 7, 10],
    'weights': ['uniform', 'distance']
}
grid = GridSearchCV(model, parameters, scoring='accuracy', cv=5,n_jobs=-1)
grid.fit(X_train, y_train);

In [18]:
grid.cv_results_

{'mean_fit_time': array([0.01236753, 0.01515989, 0.01296554, 0.01316524, 0.01356316,
        0.01376224, 0.01396151, 0.01276736, 0.01276579, 0.01276517]),
 'std_fit_time': array([0.00195414, 0.00395991, 0.00109319, 0.00182678, 0.00205539,
        0.00230935, 0.00235921, 0.00074535, 0.001934  , 0.00146523]),
 'mean_score_time': array([0.87545719, 0.74121575, 0.77652187, 0.7388227 , 0.7835032 ,
        0.80883603, 0.91116271, 0.89600143, 0.82958097, 0.77372952]),
 'std_score_time': array([0.05056934, 0.01228613, 0.00766305, 0.01137808, 0.01014295,
        0.05141063, 0.01355185, 0.03263904, 0.01172268, 0.03957927]),
 'param_n_neighbors': masked_array(data=[2, 2, 3, 3, 5, 5, 7, 7, 10, 10],
              mask=[False, False, False, False, False, False, False, False,
                    False, False],
        fill_value='?',
             dtype=object),
 'param_weights': masked_array(data=['uniform', 'distance', 'uniform', 'distance',
                    'uniform', 'distance', 'uniform', 'dis

In [19]:
grid.best_params_

{'n_neighbors': 2, 'weights': 'distance'}

In [20]:
model_knn = grid.best_estimator_

In [21]:
model_knn.fit(X_train, y_train)

KNeighborsClassifier(n_neighbors=2, weights='distance')

In [22]:
knn_pred = model_knn.predict(X_test)

In [23]:
metrics(y_test, knn_pred)

Accuracy : 0.9565217391304348
f1 : 0.9570272853707639


In [43]:
pd.DataFrame(y_train).value_counts()

down    2425
up      2179
mov      912
dtype: int64

In [47]:
pd.DataFrame(y_test).value_counts()

up      235
down    229
mov      88
dtype: int64

In [49]:
confusion_matrix(y_test, knn_pred)

array([[218,   6,   5],
       [  1,  83,   4],
       [  1,   7, 227]], dtype=int64)