# Week 6 – Model Evaluation and Hyperparameter Tuning (Wine Dataset)

In [1]:
# 1. Import libraries
import pandas as pd
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression

In [2]:
# 2. Load Wine dataset
wine = datasets.load_wine()
X = wine.data
y = wine.target

In [3]:
# 3. Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [4]:
# 4. Standardize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [5]:
# 5. Train a simple SVM classifier
svm = SVC(kernel='linear', random_state=42)
svm.fit(X_train_scaled, y_train)
y_pred = svm.predict(X_test_scaled)

In [6]:
# 6. Evaluate SVM model
print("=== Simple SVM Evaluation ===")
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision (macro):", precision_score(y_test, y_pred, average='macro'))
print("Recall (macro):", recall_score(y_test, y_pred, average='macro'))
print("F1-score (macro):", f1_score(y_test, y_pred, average='macro'))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

=== Simple SVM Evaluation ===
Accuracy: 0.9722222222222222
Precision (macro): 0.9629629629629629
Recall (macro): 0.9761904761904763
F1-score (macro): 0.9680464778503994

Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        14
           1       1.00      0.93      0.96        14
           2       0.89      1.00      0.94         8

    accuracy                           0.97        36
   macro avg       0.96      0.98      0.97        36
weighted avg       0.98      0.97      0.97        36



In [7]:
# 7. GridSearchCV for SVM hyperparameter tuning
param_grid = {
    'C': [0.1, 1, 10, 100],
    'kernel': ['linear', 'rbf', 'poly'],
    'gamma': ['scale', 'auto']
}

grid = GridSearchCV(SVC(), param_grid, cv=3, scoring='accuracy')
grid.fit(X_train_scaled, y_train)

print("=== Best SVM Parameters (GridSearchCV) ===")
print(grid.best_params_)
print(f"Best CV Accuracy: {grid.best_score_:.4f}")

=== Best SVM Parameters (GridSearchCV) ===
{'C': 10, 'gamma': 'scale', 'kernel': 'rbf'}
Best CV Accuracy: 0.9789


In [8]:
# 8. RandomizedSearchCV for Random Forest hyperparameter tuning
param_rf = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 5, 10, 20],
    'min_samples_split': [2, 5, 10]
}

random_rf = RandomizedSearchCV(RandomForestClassifier(random_state=42), param_rf, n_iter=5, cv=3, scoring='accuracy', random_state=42)
random_rf.fit(X_train, y_train)

print("\n=== Best Random Forest Parameters (RandomizedSearchCV) ===")
print(random_rf.best_params_)
print(f"Best CV Accuracy: {random_rf.best_score_:.4f}")



=== Best Random Forest Parameters (RandomizedSearchCV) ===
{'n_estimators': 100, 'min_samples_split': 10, 'max_depth': 5}
Best CV Accuracy: 0.9858


In [10]:
# 9. Train Logistic Regression as comparison
logreg = LogisticRegression(max_iter=1000, random_state=42)
logreg.fit(X_train_scaled, y_train)
y_log = logreg.predict(X_test_scaled)

print("\n=== Logistic Regression Evaluation ===")
print("Accuracy:", accuracy_score(y_test, y_log))
print("Precision (macro):", precision_score(y_test, y_log, average='macro'))
print("Recall (macro):", recall_score(y_test, y_log, average='macro'))
print("F1-score (macro):", f1_score(y_test, y_log, average='macro'))



=== Logistic Regression Evaluation ===
Accuracy: 1.0
Precision (macro): 1.0
Recall (macro): 1.0
F1-score (macro): 1.0


In [11]:
# 10. Final Model Selection Summary

print("\n=== Final Model Selection Summary ===")
print("SVM (GridSearchCV tuned) CV Accuracy:", grid.best_score_)
print("Random Forest (RandomizedSearchCV tuned) CV Accuracy:", random_rf.best_score_)
print("Logistic Regression Test Accuracy:", accuracy_score(y_test, y_log))


=== Final Model Selection Summary ===
SVM (GridSearchCV tuned) CV Accuracy: 0.978871158392435
Random Forest (RandomizedSearchCV tuned) CV Accuracy: 0.9858156028368793
Logistic Regression Test Accuracy: 1.0
