In [1]:
import matplotlib.pyplot as plt
import matplotlib as mpl
import pandas as pd
import numpy as np
import seaborn as sns

In [2]:
from sklearn.datasets import fetch_openml

In [3]:
mnist = fetch_openml("mnist_784", version = 1)
mnist.keys()

In [None]:
X, y = mnist["data"], mnist["target"]

In [None]:
X.shape

In [6]:
y = y.astype(np.uint8)

In [7]:
X.describe()

In [8]:
import matplotlib.pyplot as plt
import seaborn as sns

sns.set_theme(style="whitegrid")
plt.figure(figsize=(8, 4))

order = sorted(y.unique())
ax = sns.countplot(x=y, order=order, palette="viridis")
ax.set_title("MNIST label distribution")
ax.set_xlabel("Digit")
ax.set_ylabel("Count")
ax.bar_label(ax.containers[0], padding=2, fontsize=8)

plt.tight_layout();

In [9]:
plt.figure(figsize=(10, 10))

for i in range(100):
    plt.subplot(10, 10, i + 1)
    img = X.iloc[i].values.reshape(28, 28)
    plt.imshow(img, cmap = "gray")
    plt.title(y[i])
    plt.axis("off")
plt.tight_layout()
plt.show()

In [10]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(
    X,y,
    test_size=0.14, 
    random_state=42,
    stratify= y
    )

In [11]:
x_train = x_train/255.0
x_test = x_test/255.0

In [15]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import StratifiedKFold, cross_val_score


skf = StratifiedKFold(n_splits = 10)

pipeline = Pipeline([
    ("scaler", StandardScaler()),
    ("model", KNeighborsClassifier())
])

scores = cross_val_score(
    pipeline,
    x_train,
    y_train,
    cv = skf
)

In [16]:
pipeline.fit(x_train,y_train)

In [17]:
pipeline.score(x_test, y_test)

In [None]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, ConfusionMatrixDisplay

y_train_pred = pipeline.predict(x_train)
print("Train accuracy:", accuracy_score(y_train, y_train_pred))
print(classification_report(y_train, y_train_pred))

cm = confusion_matrix(y_train, y_train_pred)
disp = ConfusionMatrixDisplay(confusion_matrix=cm)
disp.plot(cmap="Blues", colorbar=False)
plt.title("Train Confusion Matrix")
plt.tight_layout()