In [10]:
import warnings

# Disable warnings
warnings.filterwarnings("ignore")
from sklearn.model_selection import GridSearchCV

In [10]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from tensorflow.keras.datasets import mnist

# Load the MNIST dataset
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# Reshape the input data
x_train = x_train.reshape(-1, 28*28)
x_test = x_test.reshape(-1, 28*28)

# Split the data into training and validation sets
x_train, x_test, y_train, y_test = train_test_split(x_train, y_train, test_size=0.3, random_state=42)

# Create and train the Random Forest model
clf_rf = RandomForestClassifier(n_estimators=100, random_state=42)
clf_rf.fit(x_train, y_train)

# Make predictions on the validation set
y_pred = clf_rf.predict(x_test)

# Calculate the accuracy on the validation set
val_accuracy = accuracy_score(y_test, y_pred)
print("Validation Accuracy:", val_accuracy)

# Make predictions on the test set
y_pred_test = clf_rf.predict(x_test)

# Calculate the accuracy on the test set
test_accuracy = accuracy_score(y_test, y_pred_test)
print("Test Accuracy:", test_accuracy)


In [4]:
if clf_rf.score(x_train, y_train) < clf_rf.score(x_test, y_test):
  print("Overfit")
  print("Training Accuracy:", clf_rf.score(x_train, y_train))
  print("Test Accuracy:", clf_rf.score(x_test, y_test))
else:
  print("Good Fit")
  print("Training Accuracy:", clf_rf.score(x_train, y_train))
  print("Test Accuracy:", clf_rf.score(x_test, y_test))


Good Fit
Training Accuracy: 1.0
Test Accuracy: 0.9665


In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from tensorflow.keras.datasets import mnist

# Load the MNIST dataset
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# Reshape the input data
x_train = x_train.reshape(-1, 28*28)
x_test = x_test.reshape(-1, 28*28)

# Split the data into training and validation sets
x_train, x_test, y_train, y_test = train_test_split(x_train, y_train, test_size=0.3, random_state=42)

# Define the parameter grid
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [5, 10, 15],
    'min_samples_split': [2, 5, 10]
}

# Create the Random Forest model
rf_model = RandomForestClassifier(random_state=42)

# Perform grid search
grid_search = GridSearchCV(estimator=rf_model, param_grid=param_grid, cv=3)
grid_search.fit(x_train, y_train)

# Get the best model and its parameters
best_model = grid_search.best_estimator_
best_params = grid_search.best_params_


# Make predictions on the validation set
y_pred = clf_rf.predict(x_test)

# Calculate the accuracy on the validation set
val_accuracy = accuracy_score(y_test, y_pred)
print("Validation Accuracy:", val_accuracy)

# Make predictions on the test set
y_pred_test = clf_rf.predict(x_test)

# Calculate the accuracy on the test set
test_accuracy = accuracy_score(y_test, y_pred_test)
print("Test Accuracy:", test_accuracy)
