In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.metrics import (accuracy_score, precision_score, recall_score, f1_score, classification_report)

In [4]:
# Load and split data
data = load_wine()
X_train, X_test, y_train, y_test = train_test_split(
    data.data, data.target, test_size=0.2, random_state=42, stratify=data.target
)

In [5]:
# Define models and hyperparameter grids
models = {
    'RandomForest': {
        'estimator': RandomForestClassifier(random_state=42),
        'param_grid': {
            'n_estimators': [100, 200],
            'max_depth': [None, 5, 10],
            'min_samples_split': [2, 5]
        }
    },
    'SVM': {
        'estimator': SVC(random_state=42, probability=True),
        'param_grid': {
            'C': [0.1, 1, 10],
            'kernel': ['linear', 'rbf'],
            'gamma': ['scale', 'auto']
        }
    },
    'GBM': {
        'estimator': GradientBoostingClassifier(random_state=42),
        'param_grid': {
            'n_estimators': [100, 200],
            'learning_rate': [0.01, 0.1],
            'max_depth': [3, 5]
        }
    }
}

results = {}

for name, cfg in models.items():
    print(f"\n=== {name} - GridSearchCV ===")
    gs = GridSearchCV(cfg['estimator'], cfg['param_grid'], cv=5, n_jobs=-1, scoring='accuracy')
    gs.fit(X_train, y_train)
    print("Best params:", gs.best_params_)
    best = gs.best_estimator_
    
    y_pred = best.predict(X_test)
    res = {
        'accuracy': accuracy_score(y_test, y_pred),
        'precision': precision_score(y_test, y_pred, average='weighted'),
        'recall': recall_score(y_test, y_pred, average='weighted'),
        'f1': f1_score(y_test, y_pred, average='weighted'),
        'classification_report': classification_report(y_test, y_pred, digits=4)
    }
    results[name] = res
    print(res['classification_report'])


=== RandomForest - GridSearchCV ===
Best params: {'max_depth': None, 'min_samples_split': 2, 'n_estimators': 100}
              precision    recall  f1-score   support

           0     1.0000    1.0000    1.0000        12
           1     1.0000    1.0000    1.0000        14
           2     1.0000    1.0000    1.0000        10

    accuracy                         1.0000        36
   macro avg     1.0000    1.0000    1.0000        36
weighted avg     1.0000    1.0000    1.0000        36


=== SVM - GridSearchCV ===
Best params: {'C': 0.1, 'gamma': 'scale', 'kernel': 'linear'}
              precision    recall  f1-score   support

           0     1.0000    1.0000    1.0000        12
           1     0.9333    1.0000    0.9655        14
           2     1.0000    0.9000    0.9474        10

    accuracy                         0.9722        36
   macro avg     0.9778    0.9667    0.9710        36
weighted avg     0.9741    0.9722    0.9720        36


=== GBM - GridSearchCV ===
Best 

In [6]:

# Optionally performing RandomizedSearchCV for SVM to compare
print("\n=== SVM - RandomizedSearchCV ===")
from scipy.stats import uniform
param_dist = {
    'C': uniform(0.1, 10),
    'kernel': ['linear', 'rbf'],
    'gamma': ['scale', 'auto']
}
rs = RandomizedSearchCV(SVC(random_state=42), param_dist, n_iter=10, cv=5,
                        n_jobs=-1, scoring='accuracy', random_state=42)
rs.fit(X_train, y_train)
print("Best params (RS):", rs.best_params_)
y_pred_rs = rs.best_estimator_.predict(X_test)
results['SVM_RS'] = {
    'accuracy': accuracy_score(y_test, y_pred_rs),
    'precision': precision_score(y_test, y_pred_rs, average='weighted'),
    'recall': recall_score(y_test, y_pred_rs, average='weighted'),
    'f1': f1_score(y_test, y_pred_rs, average='weighted'),
    'classification_report': classification_report(y_test, y_pred_rs, digits=4)
}
print(results['SVM_RS']['classification_report'])


=== SVM - RandomizedSearchCV ===
Best params (RS): {'C': 0.6808361216819946, 'gamma': 'auto', 'kernel': 'linear'}
              precision    recall  f1-score   support

           0     1.0000    1.0000    1.0000        12
           1     0.8750    1.0000    0.9333        14
           2     1.0000    0.8000    0.8889        10

    accuracy                         0.9444        36
   macro avg     0.9583    0.9333    0.9407        36
weighted avg     0.9514    0.9444    0.9432        36



In [7]:
# Summary comparison
print("\n=== Summary Metrics ===")
summary = pd.DataFrame(results).T[['accuracy', 'precision', 'recall', 'f1']]
print(summary)


=== Summary Metrics ===
              accuracy precision    recall        f1
RandomForest       1.0       1.0       1.0       1.0
SVM           0.972222  0.974074  0.972222   0.97197
GBM           0.944444  0.946581  0.944444  0.944269
SVM_RS        0.944444  0.951389  0.944444   0.94321
