In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import Pipeline
import joblib

In [2]:
# Load data
digits = load_digits()
X, y = digits.data, digits.target

In [3]:
# Scale data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [4]:
# Save the scaler
joblib.dump(scaler, '../model/scaler.pkl')

['../model/scaler.pkl']

In [5]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)


In [6]:
# Define models and params for GridSearchCV
models = {
    "LogisticRegression": {
        "model": LogisticRegression(max_iter=10000),
        "params": {"C": [0.1, 1, 10]}
    },
    "RandomForest": {
        "model": RandomForestClassifier(),
        "params": {"n_estimators": [50, 100], "max_depth": [10, 20]}
    },
    "SVC": {
        "model": SVC(),
        "params": {"C": [0.1, 1, 10], "kernel": ["linear", "rbf"]}
    },
    "KNeighbors": {
        "model": KNeighborsClassifier(),
        "params": {"n_neighbors": [3, 5, 7]}
    }
}


In [7]:
# Run GridSearchCV
best_models = {}
for name, mp in models.items():
    print(f"Training {name}...")
    grid = GridSearchCV(mp["model"], mp["params"], cv=5, scoring='accuracy', n_jobs=-1)
    grid.fit(X_train, y_train)
    best_models[name] = grid
    print(f"{name} Best Score: {grid.best_score_}")
    print(f"{name} Best Params: {grid.best_params_}")
    print(classification_report(y_test, grid.predict(X_test)))


Training LogisticRegression...
LogisticRegression Best Score: 0.9617136082075106
LogisticRegression Best Params: {'C': 1}
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        33
           1       0.97      1.00      0.98        28
           2       1.00      1.00      1.00        33
           3       0.97      0.97      0.97        34
           4       1.00      0.98      0.99        46
           5       0.94      0.94      0.94        47
           6       0.97      0.97      0.97        35
           7       1.00      0.97      0.99        34
           8       0.97      0.97      0.97        30
           9       0.93      0.95      0.94        40

    accuracy                           0.97       360
   macro avg       0.97      0.97      0.97       360
weighted avg       0.97      0.97      0.97       360

Training RandomForest...
RandomForest Best Score: 0.9791206929926443
RandomForest Best Params: {'max_depth': 20, 'n_est

In [8]:
# Pick the best model manually or automatically
best_model = best_models["RandomForest"]  # Example choice


In [9]:
# Save model
joblib.dump(best_model.best_estimator_, '../model/best_model.pkl')

['../model/best_model.pkl']