In [1]:
%store -r df_pca
%store -r df
%store -r df_selected

In [2]:
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Baseline model
rf = RandomForestClassifier(random_state=42)
rf.fit(df_selected.drop('target', axis=1), df_selected['target'])
baseline_acc = accuracy_score(df_selected['target'], rf.predict(df_selected.drop('target', axis=1)))

# GridSearchCV
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 5, 10, 20],
    'min_samples_split': [2, 5, 10]
}
grid_search = GridSearchCV(RandomForestClassifier(random_state=42), param_grid, cv=5, n_jobs=-1)
grid_search.fit(df_selected.drop('target', axis=1), df_selected['target'])
grid_acc = accuracy_score(df_selected['target'], grid_search.predict(df_selected.drop('target', axis=1)))

# RandomizedSearchCV
param_dist = {
    'n_estimators': [50, 100, 200, 300],
    'max_depth': [None, 5, 10, 20, 30],
    'min_samples_split': [2, 5, 10, 15]
}
random_search = RandomizedSearchCV(RandomForestClassifier(random_state=42), param_dist, n_iter=10, cv=5, n_jobs=-1, random_state=42)
random_search.fit(df_selected.drop('target', axis=1), df_selected['target'])
random_acc = accuracy_score(df_selected['target'], random_search.predict(df_selected.drop('target', axis=1)))

# Compare results
print("Baseline Accuracy:", baseline_acc)
print("GridSearchCV Best Accuracy:", grid_acc)
print("RandomizedSearchCV Best Accuracy:", random_acc)
print("Best Model (GridSearchCV):", grid_search.best_estimator_)
print("Best Model (RandomizedSearchCV):", random_search.best_estimator_)

Baseline Accuracy: 1.0
GridSearchCV Best Accuracy: 0.9966996699669967
RandomizedSearchCV Best Accuracy: 0.9966996699669967
Best Model (GridSearchCV): RandomForestClassifier(min_samples_split=5, n_estimators=200, random_state=42)
Best Model (RandomizedSearchCV): RandomForestClassifier(max_depth=30, min_samples_split=5, n_estimators=200,
                       random_state=42)


In [4]:
import joblib

# Save the best model from GridSearchCV
joblib.dump(grid_search.best_estimator_, "model.pkl")
print("✅ Model saved as model.pkl")

✅ Model saved as model.pkl
