In [161]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_openml
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score




In [None]:
mnist_data = fetch_openml('mnist_784', version=1)

In [None]:
X_data = mnist_data['data'].values  
y_data = mnist_data['target'].values
y_data = y_data.astype(int)
X_data = X_data / 255.0
sample_size = 7000
random_indices = np.random.choice(X_data.shape[0], sample_size, replace=False)
X_data = X_data[random_indices]
y_data = y_data[random_indices]
k_values = [i for i in range(1,11)]


In [None]:

X_train, X_test, y_train, y_test = train_test_split(X_data, y_data, test_size=0.2, random_state=42)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

In [None]:
fig = plt.figure
plt.imshow(X_data[44].reshape(28,28), cmap='gray')
plt.show()
#and the according label is
print("Label:", y_data[44])

In [None]:

accuracies = []

for i in k_values:
    # Tworzenie modelu KNN
    knn = KNeighborsClassifier(n_neighbors=i)
    knn.fit(X_train, y_train)  # Trenowanie modelu
    y_pred = knn.predict(X_test)  
    
    # Obliczanie dokładności
    accuracy = accuracy_score(y_test, y_pred)
    accuracies.append(accuracy)
    print(f'n={i}, Accuracy={accuracy:.4f}')

In [None]:
plt.figure(figsize=(6, 4))
plt.plot(k_values, accuracies, marker='o')
plt.title('Dokładność KNN w zależności od liczby sąsiadów')
plt.xlabel('Liczba sąsiadów (n)')
plt.ylabel('Dokładność')
plt.xticks(k_values)
plt.grid()
plt.show()

In [None]:

bootstrap_iterations = [1, 2, 11, 22, 44, 88]  # Możesz dostosować do swoich potrzeb
results = {k: [] for k in k_values}

# Iteracja po k
for k in k_values:
    for iters in bootstrap_iterations:
        accuracies = []
        for _ in range(iters):
            
            X_train, X_test, y_train, y_test = train_test_split(X_data, y_data, test_size=0.2, random_state=None)
            knn = KNeighborsClassifier(n_neighbors=k)
            knn.fit(X_train, y_train)
            y_pred = knn.predict(X_test)
            accuracy = accuracy_score(y_test, y_pred)
            accuracies.append(accuracy)

        mean_accuracy = np.mean(accuracies)
        results[k].append(mean_accuracy)

for k, mean_accuracies in results.items():
    print(f'Wyniki dla k={k}: {mean_accuracies}')

In [None]:
plt.figure(figsize=(10, 6))
for i in k_values:
    plt.plot(bootstrap_iterations, results[i], marker='o', label=f'k={i}')
plt.legend(loc='lower right', ncol=3, title='Liczba sąsiadów (k)')
plt.title('Dokładność KNN w zależności od liczby iteracji bootstrap')



In [None]:
plt.figure(figsize=(10, 6))
results2 = {k: np.mean(results[k]) for k in results.keys()}
print(results2)
plt.plot(results2.keys(),results2.values(), marker='o')
plt.title('Średnia Dokładność KNN w zależności od liczby sąsiadów')
plt.xlabel('Liczba sąsiadów (k)')
plt.ylabel('Średnia dokładność')
plt.grid()