<a href="https://colab.research.google.com/github/Lavanya8484/Celebal_Summer_Internship/blob/main/Week-6/ModelEvaluation_and_HyperparameterTuning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from scipy.stats import randint
import warnings
warnings.filterwarnings('ignore')


In [2]:
# Load Iris dataset (iris-BuiltIn dataset from sci-kit learn)
iris = load_iris()
X = iris.data
y = iris.target

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [3]:
# Training multiple models and evaluating
models = {
    'Logistic Regression': LogisticRegression(),
    'Support Vector Machine': SVC(),
    'Random Forest': RandomForestClassifier(),
    'K-Nearest Neighbors': KNeighborsClassifier()
}

# Evaluation storage
results = []

# Evaluation function
def evaluate_model(name, model):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    results.append({
        'Model': name,
        'Accuracy': accuracy_score(y_test, y_pred),
        'Precision': precision_score(y_test, y_pred, average='macro'),
        'Recall': recall_score(y_test, y_pred, average='macro'),
        'F1 Score': f1_score(y_test, y_pred, average='macro')
    })

# Run evaluations
for name, model in models.items():
    evaluate_model(name, model)

# Show results
pd.DataFrame(results).sort_values(by='F1 Score', ascending=False)


Unnamed: 0,Model,Accuracy,Precision,Recall,F1 Score
0,Logistic Regression,1.0,1.0,1.0,1.0
1,Support Vector Machine,1.0,1.0,1.0,1.0
2,Random Forest,1.0,1.0,1.0,1.0
3,K-Nearest Neighbors,1.0,1.0,1.0,1.0


In [4]:
# HyperParameter Tuning
svm_params = {
    'C': [0.1, 1, 10],
    'kernel': ['linear', 'rbf'],
    'gamma': ['scale', 'auto']
}
grid_svm = GridSearchCV(SVC(), svm_params, cv=5)
grid_svm.fit(X_train, y_train)
best_svm = grid_svm.best_estimator_
evaluate_model("Tuned SVM (GridSearch)", best_svm)


In [5]:
rf_params = {
    'n_estimators': randint(50, 200),
    'max_depth': randint(2, 10),
    'min_samples_split': randint(2, 10),
    'min_samples_leaf': randint(1, 10)
}
random_rf = RandomizedSearchCV(RandomForestClassifier(), rf_params, n_iter=10, cv=5, random_state=42)
random_rf.fit(X_train, y_train)
best_rf = random_rf.best_estimator_
evaluate_model("Tuned RF (RandomizedSearch)", best_rf)


In [6]:
# final comparision table
# Display all model evaluations including tuned ones
final_results = pd.DataFrame(results)
final_results = final_results.sort_values(by='F1 Score', ascending=False).reset_index(drop=True)
print(final_results)


                         Model  Accuracy  Precision    Recall  F1 Score
0          Logistic Regression  1.000000   1.000000  1.000000  1.000000
1       Support Vector Machine  1.000000   1.000000  1.000000  1.000000
2                Random Forest  1.000000   1.000000  1.000000  1.000000
3          K-Nearest Neighbors  1.000000   1.000000  1.000000  1.000000
4  Tuned RF (RandomizedSearch)  1.000000   1.000000  1.000000  1.000000
5       Tuned SVM (GridSearch)  0.966667   0.972222  0.962963  0.965899


In [8]:
# Select the best-performing model
best_model_name = final_results.iloc[0]['Model']
print(f"\n Best-performing model: {best_model_name}")



 Best-performing model: Logistic Regression
