In [2]:
from sklearn.datasets import fetch_openml
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix

# Load MNIST dataset
mnist = fetch_openml('mnist_784')
X, y = mnist['data'], mnist['target']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# Function to train and evaluate Random Forest
def evaluate_random_forest(n_estimators, max_features):
  # Create Random Forest model
  model = RandomForestClassifier(n_estimators=n_estimators, max_features=max_features)

  # Train the model
  model.fit(X_train, y_train)

  # Make predictions on test set
  y_pred = model.predict(X_test)

  # Evaluate accuracy
  accuracy = accuracy_score(y_test, y_pred)
  print(f"n_estimators: {n_estimators}, max_features: {max_features}, Accuracy: {accuracy:.4f}")

  # Get confusion matrix
  confusion_matrix_result = confusion_matrix(y_test, y_pred)
  print(f"Confusion Matrix:\n{confusion_matrix_result}")

# Run with default parameters (10 trees, all features)
evaluate_random_forest(10, None)

# Run with 12 features per node
evaluate_random_forest(100, 12)

# Run with 200 trees and 12 features per node
evaluate_random_forest(200, 12)

# Run with 300 trees and 12 features per node
evaluate_random_forest(300, 12)

# Further Exploration can be conducted can by replacing these lines with other parameter
# So we can valuate the tuned model and compare accuracy and confusion matrix with previous runs


  warn(


n_estimators: 10, max_features: None, Accuracy: 0.9389
Confusion Matrix:
[[1380    1    8    1    4    5    3    1    8    0]
 [   0 1534   17    1    1    5    2    4    6    2]
 [   8    6 1309   11   11    4    7   12    9    6]
 [  10    9   31 1316    3   30    5   17   13    6]
 [   5    3   10    2 1254    3   10    4    7   40]
 [  11    6    6   26    9 1172    6    1   17    4]
 [  10    5   11    2    8   22 1285    0   11    1]
 [   3   14   17   10   12    2    0 1350    7   20]
 [   5   24   22   27   14   13   15    3 1201   16]
 [   5    5    7   17   35   25    3   13   14 1344]]
n_estimators: 100, max_features: 12, Accuracy: 0.9669
Confusion Matrix:
[[1393    1    4    0    2    0    4    0    6    1]
 [   0 1547   10    2    0    3    1    5    3    1]
 [   4    1 1338    5    6    0    5   16    7    1]
 [   0    2   16 1374    1   11    2   18   13    3]
 [   2    4    2    0 1294    0    7    2    4   23]
 [   8    0    0   19    2 1216    4    0    5    4]
 [   4

Let us observe how changing the maximum number of features by considering a number of trees equal to 100, 200 and 300 gives better results. We note that considering 10 or 12 features the result in terms of accuracy changes little, while considering a number of nodes equal to 20 clearly improves the accuracy; furthermore, considering 15 features slightly increases the accuracy