In [22]:
from warnings import simplefilter
simplefilter(action='ignore', category=FutureWarning)
import pandas as pd
import numpy as np

Создаем названия для столбцов по парам точек oXi, oYi и movement_type

In [23]:
columns = []
for i in range(1, 46):
    columns.append(f"{i}oX")
    columns.append(f"{i}oY")
columns.append("movement_type")

In [24]:
df = pd.read_csv("csv_dir/movement_libras.csv", names=columns)

In [25]:
y = df['movement_type']
X = df.drop('movement_type', axis=1)

Найдем максимальное, минимальное значения и среднее значения точек, чтобы понять о необходимости нормализации данных

In [26]:
print(max(X.iloc[0]))
print(min(X.iloc[0]))
print(np.mean(X.iloc[0]))

0.87963
0.26112
0.4870105555555555


In [27]:
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier

In [28]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.206, stratify=y, random_state=1)

In [29]:
default_model = KNeighborsClassifier(n_jobs=-1)
default_model.fit(X_train, y_train)
predicted = default_model.predict(X_test)

In [30]:
from sklearn import metrics

In [31]:
print(metrics.classification_report(y_test, predicted))

              precision    recall  f1-score   support

           1       1.00      0.20      0.33         5
           2       0.56      1.00      0.71         5
           3       1.00      1.00      1.00         5
           4       1.00      0.80      0.89         5
           5       1.00      1.00      1.00         5
           6       1.00      1.00      1.00         5
           7       0.45      1.00      0.62         5
           8       0.40      0.40      0.40         5
           9       0.83      1.00      0.91         5
          10       1.00      0.40      0.57         5
          11       0.67      0.80      0.73         5
          12       0.80      0.80      0.80         5
          13       1.00      0.40      0.57         5
          14       1.00      1.00      1.00         5
          15       1.00      0.80      0.89         5

    accuracy                           0.77        75
   macro avg       0.85      0.77      0.76        75
weighted avg       0.85   

In [32]:
from sklearn.model_selection import validation_curve
from sklearn.model_selection import StratifiedKFold

In [33]:
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=1)

train_scores, valid_scores = validation_curve(
  default_model,
  X,
  y,
  param_name="n_neighbors",
  param_range=range(1,16),
  scoring = 'f1_weighted',
  cv=cv
)

In [34]:
# from sklearn.model_selection import train_test_split

In [35]:
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.206, random_state=1, stratify=y)

In [36]:
from sklearn.neighbors import KNeighborsClassifier #импортируем нашу модель

In [37]:
best_model = KNeighborsClassifier(
   n_neighbors=10,
   weights='distance',
   algorithm='auto',
   leaf_size=25,
   metric='euclidean',
   metric_params=None,
   n_jobs=-1
)

In [38]:
best_model.fit(X_train, y_train) # обучаем модель при начальных значениях парметров

In [39]:
from sklearn.model_selection import GridSearchCV

In [40]:
model_params = best_model.get_params()
tuned_params = {}

for k, v in model_params.items():
    tuned_params[k] = [v]

tuned_params['n_neighbors'] = range(1, 30)
tuned_params['weights'] = ['distance', 'uniform', ]
tuned_params['algorithm'] = ['ball_tree', 'kd_tree', 'brute', ]
tuned_params['metric'] = ['euclidean', 'minkowski', ]

clf = GridSearchCV(KNeighborsClassifier(), tuned_params, cv=cv, scoring="f1_weighted", n_jobs=-1)
clf.fit(X_train, y_train)
best_params = clf.best_params_
tuned_params

{'algorithm': ['ball_tree', 'kd_tree', 'brute'],
 'leaf_size': [25],
 'metric': ['euclidean', 'minkowski'],
 'metric_params': [None],
 'n_jobs': [-1],
 'n_neighbors': range(1, 30),
 'p': [2],
 'weights': ['distance', 'uniform']}

In [41]:
best_model = KNeighborsClassifier(**best_params)
best_model.fit(X_train, y_train)
predicted = best_model.predict(X_test)

In [42]:
print('Used params:', best_params)
print('Evaluation:\n', metrics.classification_report(y_test, predicted))

Used params: {'algorithm': 'ball_tree', 'leaf_size': 25, 'metric': 'euclidean', 'metric_params': None, 'n_jobs': -1, 'n_neighbors': 1, 'p': 2, 'weights': 'distance'}
Evaluation:
               precision    recall  f1-score   support

           1       1.00      0.80      0.89         5
           2       0.83      1.00      0.91         5
           3       1.00      1.00      1.00         5
           4       1.00      0.80      0.89         5
           5       1.00      1.00      1.00         5
           6       1.00      1.00      1.00         5
           7       1.00      1.00      1.00         5
           8       1.00      0.80      0.89         5
           9       0.83      1.00      0.91         5
          10       0.80      0.80      0.80         5
          11       0.71      1.00      0.83         5
          12       0.80      0.80      0.80         5
          13       1.00      0.60      0.75         5
          14       1.00      1.00      1.00         5
          