# 📊 ML Model Training, Evaluation & Hyperparameter Tuning

This notebook trains multiple ML models, evaluates them, and applies GridSearchCV and RandomizedSearchCV to optimize model performance.

In [None]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
import warnings
warnings.filterwarnings('ignore')

In [None]:
data = load_breast_cancer()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
models = {
    'Logistic Regression': LogisticRegression(),
    'Decision Tree': DecisionTreeClassifier(),
    'Random Forest': RandomForestClassifier(),
    'SVM': SVC()
}

In [None]:
def evaluate_model(name, model, X_test, y_test):
    y_pred = model.predict(X_test)
    return {
        'Model': name,
        'Accuracy': accuracy_score(y_test, y_pred),
        'Precision': precision_score(y_test, y_pred),
        'Recall': recall_score(y_test, y_pred),
        'F1 Score': f1_score(y_test, y_pred)
    }

In [None]:
results = []
for name, model in models.items():
    model.fit(X_train_scaled, y_train)
    result = evaluate_model(name, model, X_test_scaled, y_test)
    results.append(result)

results_df = pd.DataFrame(results)
print(results_df)

In [None]:
param_grid_rf = {
    'n_estimators': [50, 100, 150],
    'max_depth': [4, 6, 8, None]
}
grid_rf = GridSearchCV(RandomForestClassifier(), param_grid_rf, cv=5, scoring='f1')
grid_rf.fit(X_train_scaled, y_train)
best_rf = grid_rf.best_estimator_

In [None]:
param_dist_svc = {
    'C': np.logspace(-3, 2, 10),
    'gamma': ['scale', 'auto'],
    'kernel': ['rbf', 'linear']
}
rand_svc = RandomizedSearchCV(SVC(), param_distributions=param_dist_svc, n_iter=10, cv=5, scoring='f1', random_state=42)
rand_svc.fit(X_train_scaled, y_train)
best_svc = rand_svc.best_estimator_

In [None]:
tuned_results = []
tuned_results.append(evaluate_model('Tuned Random Forest', best_rf, X_test_scaled, y_test))
tuned_results.append(evaluate_model('Tuned SVM', best_svc, X_test_scaled, y_test))

tuned_df = pd.DataFrame(tuned_results)
final_df = pd.concat([results_df, tuned_df], ignore_index=True)
print('
Final Model Comparison (Sorted by F1 Score):')
print(final_df.sort_values(by='F1 Score', ascending=False))