# Logistic Regression với MNIST

In [None]:

from sklearn.datasets import fetch_openml
import numpy as np

mnist = fetch_openml('mnist_784', version=1, parser='auto')
X = mnist.data.astype('float32')
y = mnist.target.astype('int')

print("Shape X:", X.shape)
print("Shape y:", y.shape)
print("Giá trị pixel:", X.min(), X.max())


In [None]:

X_normalized = X / 255.0
print("Chuẩn hóa:", X_normalized.min(), X_normalized.max())


In [None]:

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
    X_normalized, y, test_size=0.2, random_state=42, stratify=y
)


In [None]:

from sklearn.linear_model import LogisticRegression
model = LogisticRegression(
    multi_class='multinomial',
    solver='saga',
    max_iter=100,
    tol=0.1,
    verbose=1,
    n_jobs=-1,
    random_state=42
)


In [None]:

model.fit(X_train, y_train)


In [None]:

y_pred = model.predict(X_test)
y_pred_proba = model.predict_proba(X_test)

print(y_pred[:10])
print(y_test[:10])
print(y_pred_proba[0])


In [None]:

from sklearn.metrics import accuracy_score, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

acc = accuracy_score(y_test, y_pred)
print("Accuracy:", acc)

cm = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(10,8))
sns.heatmap(cm, cmap='Blues')
plt.show()


In [None]:

import joblib
joblib.dump(model, "mnist_logistic_model.pkl")
