In [1]:
import numpy as np
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split

# Generate a synthetic dataset
X, y = make_classification(n_samples=1000, n_features=20, n_informative=10, n_redundant=5, random_state=42)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


In [7]:
X_train

array([[ 1.3745378 , -3.65709249, -1.02534819, ..., -0.92007962,
        -4.01422687, -1.08026706],
       [ 0.60489028,  0.95739834, -2.7166734 , ..., -0.5050802 ,
         0.54340164,  1.08927225],
       [-0.14915368,  1.45072011,  0.4225225 , ..., -1.72152321,
         1.67893601,  0.16681606],
       ...,
       [ 0.30115059,  1.26528762, -3.24642512, ..., -2.49598216,
        -4.50307871,  0.21747348],
       [-1.26031643, -0.13898988, -2.88699428, ..., -5.03055758,
        -1.54198924,  1.25928371],
       [ 4.50203808, -2.06389391, -0.07681336, ...,  1.94517706,
        -6.80482744,  2.0403466 ]])

In [9]:
y_train

array([0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1,
       1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0,
       1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0,
       0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1,
       0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1,
       0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1,
       0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0,
       1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1,
       1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1,
       0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1,
       0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0,
       0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
       0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0,
       1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0,

In [2]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV

# Define the parameter grid
param_grid = {
    'n_estimators': [50, 100, 150, 200],
    'max_depth': [None, 10, 20, 30, 40]
}

# Initialize the model
rf = RandomForestClassifier(random_state=42)

# Initialize GridSearchCV
grid_search = GridSearchCV(estimator=rf, param_grid=param_grid, cv=5, n_jobs=-1, verbose=2)

# Fit the model
grid_search.fit(X_train, y_train)

# Print the best parameters and the corresponding score
print(f"Best parameters found by GridSearchCV: {grid_search.best_params_}")
print(f"Best cross-validation score: {grid_search.best_score_:.4f}")


Fitting 5 folds for each of 20 candidates, totalling 100 fits
Best parameters found by GridSearchCV: {'max_depth': None, 'n_estimators': 150}
Best cross-validation score: 0.9271


In [5]:
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import randint

# Define the parameter distribution
param_dist = {
    'n_estimators': randint(50, 200),
    'max_depth': [None, 10, 20, 30, 40]
}

# Initialize RandomizedSearchCV
random_search = RandomizedSearchCV(estimator=rf, param_distributions=param_dist, n_iter=10, cv=5, n_jobs=-1, verbose=2, random_state=42)

# Fit the model
random_search.fit(X_train, y_train)

# Print the best parameters and the corresponding score
print(f"Best parameters found by RandomizedSearchCV: {random_search.best_params_}")
print(f"Best cross-validation score: {random_search.best_score_:.4f}")

Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best parameters found by RandomizedSearchCV: {'max_depth': 30, 'n_estimators': 142}
Best cross-validation score: 0.9300


In [6]:
from sklearn.metrics import accuracy_score

# Predictions using GridSearchCV
grid_best_model = grid_search.best_estimator_
grid_predictions = grid_best_model.predict(X_test)

# Predictions using RandomizedSearchCV
random_best_model = random_search.best_estimator_
random_predictions = random_best_model.predict(X_test)

# Calculate and print accuracy
grid_accuracy = accuracy_score(y_test, grid_predictions)
random_accuracy = accuracy_score(y_test, random_predictions)

print(f"Test set accuracy using GridSearchCV: {grid_accuracy:.4f}")
print(f"Test set accuracy using RandomizedSearchCV: {random_accuracy:.4f}")


Test set accuracy using GridSearchCV: 0.9267
Test set accuracy using RandomizedSearchCV: 0.9300
