# 🧪 MNIST Handwritten Digit Classification Lab Final

**Dataset**: MNIST (70,000 handwritten digit images)

**Tasks:**
1. Dataset Acquisition
2. Data Loading
3. Data Exploration
4. Data Visualization
5. Data Preprocessing
6. Model Training (Logistic Regression, SVM, Decision Tree)
7. Hyperparameter Optimization
8. Training Evaluation
9. Testing Evaluation with Bootstrapping
10. Model Comparison

In [None]:
# Install required libraries (uncomment if needed)
# !pip install scikit-learn matplotlib seaborn numpy
from sklearn.datasets import fetch_openml
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, precision_score, recall_score, f1_score
from sklearn.utils import resample
import warnings
warnings.filterwarnings('ignore')

## 📥 1. Dataset Acquisition and Loading

In [None]:
# Load MNIST from OpenML
mnist = fetch_openml('mnist_784', version=1)
X, y = mnist.data, mnist.target.astype('int')

## 🔍 2. Data Exploration

In [None]:
print("Number of samples:", X.shape[0])
print("Number of features:", X.shape[1])
print("Target classes:", np.unique(y))

## 🖼️ 3. Data Visualization

In [None]:
# Display 10 sample digits
fig, axes = plt.subplots(1, 10, figsize=(12, 4))
for i in range(10):
    axes[i].imshow(X.iloc[i].values.reshape(28, 28), cmap='gray')
    axes[i].axis('off')
    axes[i].set_title(f"Label: {y[i]}")
plt.tight_layout()
plt.show()

## 🧹 4. Data Preprocessing

In [None]:
# Normalize and split data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

## 🤖 5. Model Training

In [None]:
# Initialize models
lr = LogisticRegression(max_iter=1000)
svm = SVC()
dt = DecisionTreeClassifier()

# Fit models
lr.fit(X_train, y_train)
svm.fit(X_train, y_train)
dt.fit(X_train, y_train)

## 🔍 6. Hyperparameter Tuning (GridSearchCV)

In [None]:
# Logistic Regression
param_lr = {'C': [0.1, 1, 10]}
grid_lr = GridSearchCV(LogisticRegression(max_iter=1000), param_lr, cv=3)
grid_lr.fit(X_train, y_train)

# SVM
param_svm = {'C': [0.1, 1], 'kernel': ['linear', 'rbf']}
grid_svm = GridSearchCV(SVC(), param_svm, cv=3)
grid_svm.fit(X_train, y_train)

# Decision Tree
param_dt = {'max_depth': [10, 20, None]}
grid_dt = GridSearchCV(DecisionTreeClassifier(), param_dt, cv=3)
grid_dt.fit(X_train, y_train)

## 📊 7. Training Evaluation

In [None]:
def evaluate(model, X, y):
    y_pred = model.predict(X)
    print("Accuracy:", accuracy_score(y, y_pred))
    print("Precision:", precision_score(y, y_pred, average='weighted'))
    print("Recall:", recall_score(y, y_pred, average='weighted'))
    print("F1 Score:", f1_score(y, y_pred, average='weighted'))
    sns.heatmap(confusion_matrix(y, y_pred), annot=False, fmt='d')
    plt.title("Confusion Matrix")
    plt.show()
    print(classification_report(y, y_pred))

In [None]:
print("Logistic Regression")
evaluate(grid_lr.best_estimator_, X_train, y_train)

print("SVM")
evaluate(grid_svm.best_estimator_, X_train, y_train)

print("Decision Tree")
evaluate(grid_dt.best_estimator_, X_train, y_train)

## 🧪 8. Test Evaluation with Bootstrapping

In [None]:
def bootstrap(model, X, y, n_iterations=100):
    acc, prec, rec = [], [], []
    for _ in range(n_iterations):
        X_s, y_s = resample(X, y)
        y_pred = model.predict(X_s)
        acc.append(accuracy_score(y_s, y_pred))
        prec.append(precision_score(y_s, y_pred, average='weighted'))
        rec.append(recall_score(y_s, y_pred, average='weighted'))
    return np.mean(acc), np.std(acc), np.mean(prec), np.std(prec), np.mean(rec), np.std(rec)

models = {'LR': grid_lr.best_estimator_, 'SVM': grid_svm.best_estimator_, 'DT': grid_dt.best_estimator_}
for name, model in models.items():
    print(f"\n{name} Bootstrapping Results")
    acc_m, acc_s, p_m, p_s, r_m, r_s = bootstrap(model, X_test, y_test)
    print(f"Accuracy: {acc_m:.4f} ± {1.96*acc_s:.4f}")
    print(f"Precision: {p_m:.4f} ± {1.96*p_s:.4f}")
    print(f"Recall: {r_m:.4f} ± {1.96*r_s:.4f}")

## ✅ 9. Model Comparison and Conclusion

In [None]:
# Write your conclusion here:
# - Which model performed best?
# - Did any model overfit?
# - What would you improve next time?