In [18]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from scipy.ndimage import shift


In [19]:
mnist = fetch_openml('mnist_784')

In [20]:
X, y = mnist['data'], mnist['target']
X = X.astype(np.float32) / 255.0 # нормализация [0, 1]

In [21]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # Разделяем датасет
y_train = y_train.to_numpy()

In [22]:
##==> Настройка гиперпараметров с помощью поиска по сетке 
###########################################################
param_grid = {
    'n_neighbors': [3, 5, 7, 9],
    'weights': ['uniform', 'distance']
}

grid_search = GridSearchCV(KNeighborsClassifier(), param_grid, cv=3, scoring='accuracy')
grid_search.fit(X_train, y_train)

best_model = grid_search.best_estimator_
print(f"Best parameters: {grid_search.best_params_}")


Best parameters: {'n_neighbors': 3, 'weights': 'distance'}


In [23]:
accuracy = best_model.score(X_test, y_test)
print(f"Accuracy on test set: {accuracy * 100:.2f}%")


Accuracy on test set: 97.29%


In [24]:
def shift_image(digit, dist=1):
    """
    Смещение изображений в любое направление
    """
    digit_image = digit.reshape((28, 28))
    shifted_images = [
        shift(digit_image, [-dist, 0], cval=0),   # Up
        shift(digit_image, [dist, 0], cval=0),    # Down
        shift(digit_image, [0, -dist], cval=0),   # Left
        shift(digit_image, [0, dist], cval=0)      # Right
    ]
    return np.array([img.flatten() for img in shifted_images])


In [25]:
shifted_images = []
for image in X_train.to_numpy():
    shifted_images.extend(shift_image(image))


X_train_augmented = np.array(shifted_images)
y_train = y_train.to_numpy()
y_train_augmented = np.repeat(y_train, 5)
print(f"X_train_augmented shape: {X_train_augmented.shape}")
print(f"y_train_augmented shape: {y_train_augmented.shape}")
best_model.fit(X_train_augmented, y_train_augmented)


AttributeError: 'numpy.ndarray' object has no attribute 'to_numpy'

In [None]:
final_accuracy = best_model.score(X_test, y_test)
print(f"Final accuracy on test set after augmentation: {final_accuracy * 100:.2f}%")
