In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, RandomizedSearchCV, cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score

# Step 1: Load the Iris dataset
iris = load_iris()
X = iris.data
y = iris.target

# Step 2: Convert the data into a DataFrame
data = pd.DataFrame(data= np.c_[iris['data'], iris['target']],
                     columns= iris['feature_names'] + ['target'])

# Step 3: Preprocess the data
# Assuming 'target' is the target column, drop irrelevant columns
if 'target' in data.columns:
    data.drop(['target'], axis=1, inplace=True)
else:
    print("'target' column not found in the dataset.")

# Step 4: Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 5: Define the model
rf_model = RandomForestClassifier()

# Step 6: Define hyperparameters to tune
param_dist = {
    'n_estimators': [50, 100, 200],
    'max_depth': [10, 20, 30, None],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

# Step 7: Perform Randomized Search with Cross-Validation
random_search = RandomizedSearchCV(estimator=rf_model, param_distributions=param_dist, n_iter=10, cv=5, scoring='accuracy', n_jobs=-1)
random_search.fit(X_train, y_train)

# Step 8: Get the best model
best_rf_model = random_search.best_estimator_

# Step 9: Evaluate the best model using cross-validation
cv_accuracy = cross_val_score(best_rf_model, X_train, y_train, cv=5, scoring='accuracy')
cv_precision = cross_val_score(best_rf_model, X_train, y_train, cv=5, scoring='precision_weighted')
cv_recall = cross_val_score(best_rf_model, X_train, y_train, cv=5, scoring='recall_weighted')

# Step 10: Fit the best model on the full training set
best_rf_model.fit(X_train, y_train)

# Step 11: Evaluate the best model on the test set
y_pred = best_rf_model.predict(X_test)

# Step 12: Calculate evaluation metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')

# Step 13: Print evaluation metrics
print("Cross-Validation Accuracy:", np.mean(cv_accuracy))
print("Cross-Validation Precision:", np.mean(cv_precision))
print("Cross-Validation Recall:", np.mean(cv_recall))
print("Test Accuracy:", accuracy)
print("Test Precision:", precision)
print("Test Recall:", recall)


Cross-Validation Accuracy: 0.9416666666666667
Cross-Validation Precision: 0.9438227513227513
Cross-Validation Recall: 0.9416666666666667
Test Accuracy: 1.0
Test Precision: 1.0
Test Recall: 1.0
