In [None]:
import joblib
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.datasets import load_breast_cancer


In [None]:
# Load data and preparing data
data = load_breast_cancer()

X_train, X_test, y_train, y_test = train_test_split(
    data.data, data.target, test_size=0.2, random_state=0, stratify=data.target
)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
# Define parameters for Random Forest
n_estimators = [100, 200, 300]
max_depth = [None, 10, 20, 30]

experiment_results = []

for n_est in n_estimators:
    for max_depth_val in max_depth:
        rf = RandomForestClassifier(
            n_estimators=n_est,
            max_depth=max_depth_val,
            random_state=0
        )
        rf.fit(X_train, y_train)
        y_pred = rf.predict(X_test)
        accuracy = accuracy_score(y_test, y_pred)
        experiment_results.append({
            'n_estimators': n_est,
            'max_depth': max_depth_val,
            'accuracy': accuracy
        })

# Convert to DataFrame for better visualization
results_df = pd.DataFrame(experiment_results)

# Find the best model
best_model = results_df.loc[results_df['accuracy'].idxmax()]

# Save the experiment results
results_df.to_csv('results/random_forest_tuning_results.csv', index=False)

# Print the best model
print(f"Best Model: n_estimators={best_model['n_estimators']}, max_depth={best_model['max_depth']}")

# Save the scaler
scaler_filename = 'models/scaler.pkl'
joblib.dump(scaler, scaler_filename)

# Save the best model
best_model_filename = 'models/best_random_forest.pkl'
joblib.dump(best_model, best_model_filename)
