In [8]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.model_selection import RandomizedSearchCV, KFold
from sklearn.preprocessing import StandardScaler

# Load the Iris dataset
iris = load_iris()
X = iris.data
y = iris.target

# Standardize features for SVM
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Define the hyperparameter grid for RandomizedSearchCV for Random Forest
param_dist_rf = {
    'n_estimators': np.arange(10, 200, 10),
    'max_depth': [None] + list(np.arange(1, 20)),
    'min_samples_split': np.arange(2, 11),
    'min_samples_leaf': np.arange(1, 11),
    'bootstrap': [True, False],
    'criterion': ['gini', 'entropy']
}

# Define the hyperparameter grid for RandomizedSearchCV for SVM
param_dist_svm = {
    'C': np.logspace(-3, 3, 100),  # Regularization parameter
    'kernel': ['linear', 'rbf', 'poly'],  # Kernel types
    'degree': np.arange(1, 6),  # Degree of the polynomial kernel (for poly kernel)
    'gamma': ['scale', 'auto'] + list(np.logspace(-3, 3, 500)),  # Kernel coefficient (for rbf and poly kernels)
}

# Create a K-Fold cross-validation object
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# Create a RandomizedSearchCV object for Random Forest
random_search_rf = RandomizedSearchCV(
    estimator=RandomForestClassifier(),
    param_distributions=param_dist_rf,
    n_iter=500,  # Increase the number of attempts to 500
    scoring='accuracy',
    n_jobs=-1,  # Use all available CPU cores for parallelism
    cv=kf,  # Use K-Fold cross-validation
    verbose=1,
    random_state=42
)

# Create a RandomizedSearchCV object for SVM
random_search_svm = RandomizedSearchCV(
    estimator=SVC(),
    param_distributions=param_dist_svm,
    n_iter=500,  # Increase the number of attempts to 500
    scoring='accuracy',
    n_jobs=-1,  # Use all available CPU cores for parallelism
    cv=kf,  # Use K-Fold cross-validation
    verbose=1,
    random_state=42
)

# Fit the RandomizedSearchCV for Random Forest to the data
random_search_rf.fit(X, y)

# Fit the RandomizedSearchCV for SVM to the data
random_search_svm.fit(X, y)

# Print the best hyperparameters and corresponding accuracy for Random Forest
print("Random Forest - Best Hyperparameters:", random_search_rf.best_params_)
print("Random Forest - Best Accuracy:", random_search_rf.best_score_)

# Print the best hyperparameters and corresponding accuracy for SVM
print("SVM - Best Hyperparameters:", random_search_svm.best_params_)
print("SVM - Best Accuracy:", random_search_svm.best_score_)


Fitting 5 folds for each of 500 candidates, totalling 2500 fits
Fitting 5 folds for each of 500 candidates, totalling 2500 fits
Random Forest - Best Hyperparameters: {'n_estimators': 110, 'min_samples_split': 7, 'min_samples_leaf': 3, 'max_depth': 14, 'criterion': 'gini', 'bootstrap': False}
Random Forest - Best Accuracy: 0.9733333333333334
SVM - Best Hyperparameters: {'kernel': 'rbf', 'gamma': 0.04563716281924759, 'degree': 1, 'C': 5.72236765935022}
SVM - Best Accuracy: 0.9800000000000001


In [5]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.model_selection import RandomizedSearchCV, KFold
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load the Breast Cancer dataset
data = load_breast_cancer()
X = data.data
y = data.target

# Standardize features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the hyperparameter grid for RandomizedSearchCV for Random Forest
param_dist_rf = {
    'n_estimators': np.arange(10, 200, 10),
    'max_depth': [None] + list(np.arange(1, 20)),
    'min_samples_split': np.arange(2, 11),
    'min_samples_leaf': np.arange(1, 11),
    'bootstrap': [True, False],
    'criterion': ['gini', 'entropy']
}

# Define the hyperparameter grid for RandomizedSearchCV for Gradient Boosting
param_dist_gb = {
    'n_estimators': np.arange(10, 200, 10),
    'learning_rate': [0.01, 0.1, 0.2, 0.3, 0.5],
    'max_depth': [3, 4, 5, 6, 7, 8],
    'min_samples_split': np.arange(2, 11),
    'min_samples_leaf': np.arange(1, 11)
}

# Create a K-Fold cross-validation object
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# Create a RandomizedSearchCV object for Random Forest
rf_classifier = RandomForestClassifier()
random_search_rf = RandomizedSearchCV(
    estimator=rf_classifier,
    param_distributions=param_dist_rf,
    n_iter=500,  # Increase the number of attempts to 500
    scoring='accuracy',
    n_jobs=-1,  # Use all available CPU cores for parallelism
    cv=kf,  # Use K-Fold cross-validation
    verbose=1,
    random_state=42
)

# Create a RandomizedSearchCV object for Gradient Boosting
gb_classifier = GradientBoostingClassifier()
random_search_gb = RandomizedSearchCV(
    estimator=gb_classifier,
    param_distributions=param_dist_gb,
    n_iter=500,  # Increase the number of attempts to 500
    scoring='accuracy',
    n_jobs=-1,  # Use all available CPU cores for parallelism
    cv=kf,  # Use K-Fold cross-validation
    verbose=1,
    random_state=42
)

# Fit the RandomizedSearchCV for Random Forest to the training data
random_search_rf.fit(X_train, y_train)

# Fit the RandomizedSearchCV for Gradient Boosting to the training data
random_search_gb.fit(X_train, y_train)

# Evaluate the models on the test set and print accuracy
rf_pred = random_search_rf.predict(X_test)
gb_pred = random_search_gb.predict(X_test)

rf_accuracy = accuracy_score(y_test, rf_pred)
gb_accuracy = accuracy_score(y_test, gb_pred)

print("Random Forest - Best Hyperparameters:", random_search_rf.best_params_)
print("Random Forest - Best Accuracy:", ((random_search_rf.best_score_)*100).round(2))
print("Random Forest - Test Accuracy:", ((rf_accuracy)*100).round(2))

print("\nGradient Boosting - Best Hyperparameters:", random_search_gb.best_params_)
print("Gradient Boosting - Best Accuracy:", ((random_search_gb.best_score_)*100).round(2))
print("Gradient Boosting - Test Accuracy:", ((gb_accuracy)*100).round(2))


Fitting 5 folds for each of 500 candidates, totalling 2500 fits
Fitting 5 folds for each of 500 candidates, totalling 2500 fits
Random Forest - Best Hyperparameters: {'n_estimators': 100, 'min_samples_split': 6, 'min_samples_leaf': 2, 'max_depth': 17, 'criterion': 'entropy', 'bootstrap': False}
Random Forest - Best Accuracy: 0.9626373626373628
Random Forest - Test Accuracy: 0.9649122807017544

Gradient Boosting - Best Hyperparameters: {'n_estimators': 60, 'min_samples_split': 6, 'min_samples_leaf': 6, 'max_depth': 3, 'learning_rate': 0.5}
Gradient Boosting - Best Accuracy: 0.964835164835165
Gradient Boosting - Test Accuracy: 0.9649122807017544
