In [1]:
# Import necessary libraries
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report


In [2]:
#  Load the dataset
digits = load_digits()
X, y = digits.data, digits.target

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)



In [3]:
#  Cross-validation with k-fold
model = RandomForestClassifier(random_state=42)
scores = cross_val_score(model, X_train, y_train, cv=5)  # 5-fold cross-validation
print(f"Cross-validation scores: {scores}")
print(f"Mean CV score: {scores.mean()}")



Cross-validation scores: [0.98015873 0.97222222 0.98007968 0.96812749 0.96414343]
Mean CV score: 0.9729463099981028


In [4]:
# Hyperparameter tuning using GridSearchCV
param_grid = {
    'n_estimators': [50, 100, 150],
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5, scoring='accuracy', n_jobs=-1)
grid_search.fit(X_train, y_train)

print(f"Best Parameters: {grid_search.best_params_}")
print(f"Best Cross-validation Score: {grid_search.best_score_}")



Best Parameters: {'max_depth': 10, 'min_samples_leaf': 1, 'min_samples_split': 5, 'n_estimators': 150}
Best Cross-validation Score: 0.9753367482451148


In [5]:
#  Evaluate the best model on the test set
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)

print("Classification Report on Test Data:")
print(classification_report(y_test, y_pred))


Classification Report on Test Data:
              precision    recall  f1-score   support

           0       0.98      0.96      0.97        54
           1       0.92      1.00      0.96        55
           2       1.00      0.98      0.99        53
           3       0.98      0.98      0.98        55
           4       0.96      0.96      0.96        54
           5       1.00      0.98      0.99        55
           6       1.00      0.98      0.99        54
           7       0.90      1.00      0.95        54
           8       0.94      0.88      0.91        52
           9       0.96      0.89      0.92        54

    accuracy                           0.96       540
   macro avg       0.96      0.96      0.96       540
weighted avg       0.96      0.96      0.96       540

