# Classification Model Comparison

This notebook compares Logistic Regression, k-NN, and Decision Tree classifiers using the Wine dataset. It includes data preprocessing, model training, evaluation, and visualization.

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np

## Load and Prepare Dataset

In [None]:
data = load_wine()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = pd.Series(data.target)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

## Train Models

In [None]:
log_reg = LogisticRegression(max_iter=1000)
knn = KNeighborsClassifier(n_neighbors=5)
dt = DecisionTreeClassifier(random_state=42)

log_reg.fit(X_train_scaled, y_train)
knn.fit(X_train_scaled, y_train)
dt.fit(X_train, y_train)

## Predict and Evaluate

In [None]:
y_pred_log = log_reg.predict(X_test_scaled)
y_pred_knn = knn.predict(X_test_scaled)
y_pred_dt = dt.predict(X_test)

report_log = classification_report(y_test, y_pred_log, output_dict=True)
report_knn = classification_report(y_test, y_pred_knn, output_dict=True)
report_dt = classification_report(y_test, y_pred_dt, output_dict=True)

df_log = pd.DataFrame(report_log).transpose()
df_knn = pd.DataFrame(report_knn).transpose()
df_dt = pd.DataFrame(report_dt).transpose()

## Confusion Matrices

In [None]:
cm_log = confusion_matrix(y_test, y_pred_log)
cm_knn = confusion_matrix(y_test, y_pred_knn)
cm_dt = confusion_matrix(y_test, y_pred_dt)

fig, axes = plt.subplots(1, 3, figsize=(18, 5))
sns.heatmap(cm_log, annot=True, fmt='d', cmap='Blues', ax=axes[0])
axes[0].set_title('Confusion Matrix: Logistic Regression')
sns.heatmap(cm_knn, annot=True, fmt='d', cmap='Greens', ax=axes[1])
axes[1].set_title('Confusion Matrix: k-NN')
sns.heatmap(cm_dt, annot=True, fmt='d', cmap='Oranges', ax=axes[2])
axes[2].set_title('Confusion Matrix: Decision Tree')
plt.tight_layout()
plt.show()

## Correlation Heatmap

In [None]:
plt.figure(figsize=(12, 10))
sns.heatmap(X.corr(), cmap='coolwarm', annot=False)
plt.title('Feature Correlation Heatmap - Wine Dataset')
plt.show()

## Macro Average Metrics Comparison

In [None]:
metrics = ['precision', 'recall', 'f1-score']
avg_scores = {
    'Logistic Regression': [df_log.loc['macro avg', m] for m in metrics],
    'k-NN': [df_knn.loc['macro avg', m] for m in metrics],
    'Decision Tree': [df_dt.loc['macro avg', m] for m in metrics]
}
df_avg_scores = pd.DataFrame(avg_scores, index=metrics)
df_avg_scores.plot(kind='bar', figsize=(10, 6))
plt.title('Macro Average Precision, Recall, and F1-Score Comparison')
plt.ylabel('Score')
plt.ylim(0, 1)
plt.legend(loc='lower right')
plt.tight_layout()
plt.show()