In [1]:
from sklearn.datasets import load_wine
import pandas as pd

# Load the dataset
wine = load_wine()

In [2]:
# Convert to DataFrame
X = pd.DataFrame(wine.data, columns=wine.feature_names)
y = pd.Series(wine.target, name='target')

In [3]:
# Short Summary
print("Features Shape:", X.shape)
print("Target Classes:", wine.target_names)
print("Feature Names:", wine.feature_names)
print("Class Distribution:\n", y.value_counts())

Features Shape: (178, 13)
Target Classes: ['class_0' 'class_1' 'class_2']
Feature Names: ['alcohol', 'malic_acid', 'ash', 'alcalinity_of_ash', 'magnesium', 'total_phenols', 'flavanoids', 'nonflavanoid_phenols', 'proanthocyanins', 'color_intensity', 'hue', 'od280/od315_of_diluted_wines', 'proline']
Class Distribution:
 target
1    71
0    59
2    48
Name: count, dtype: int64


In [4]:
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report

In [5]:
# Train/Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

X: Features (13 wine chemical attributes)

y: Target labels (wine class: 0, 1, or 2)

test_size=0.2: Reserves 20% of the data for testing and uses 80% for training.

random_state=42: Ensures reproducibility — same split every time you run the code.

X_train: Feature values for the training set (80%)

X_test: Feature values for the test set (20%)

y_train: Labels for the training set

y_test: Labels for the test set

If X has 178 samples:

X_train: (142, 13)

X_test: (36, 13)

y_train: (142,)

y_test: (36,)

In [7]:
print("Training samples:", len(X_train))
print("Testing samples:", len(X_test))

Training samples: 142
Testing samples: 36


In [6]:
# Kernels to test
kernels = ['linear', 'rbf', 'poly', 'sigmoid']
results = {}

for kernel in kernels:
    model = SVC(kernel=kernel, gamma='auto')
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    report = classification_report(y_test, y_pred, output_dict=True)
    results[kernel] = report
    print(f"\nKernel: {kernel}")
    print(classification_report(y_test, y_pred))


Kernel: linear
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        14
           1       1.00      1.00      1.00        14
           2       1.00      1.00      1.00         8

    accuracy                           1.00        36
   macro avg       1.00      1.00      1.00        36
weighted avg       1.00      1.00      1.00        36


Kernel: rbf
              precision    recall  f1-score   support

           0       1.00      0.07      0.13        14
           1       0.41      1.00      0.58        14
           2       1.00      0.12      0.22         8

    accuracy                           0.44        36
   macro avg       0.80      0.40      0.31        36
weighted avg       0.77      0.44      0.33        36


Kernel: poly
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        14
           1       1.00      1.00      1.00        14
           2       1.00      1.0

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [8]:
comparison = pd.DataFrame({
    kernel: {
        'precision': results[kernel]['weighted avg']['precision'],
        'recall': results[kernel]['weighted avg']['recall'],
        'accuracy': results[kernel]['accuracy']
    }
    for kernel in results
}).T

print("\n🔍 Kernel Performance Comparison:\n")
print(comparison.round(3))



🔍 Kernel Performance Comparison:

         precision  recall  accuracy
linear       1.000   1.000     1.000
rbf          0.771   0.444     0.444
poly         1.000   1.000     1.000
sigmoid      0.151   0.389     0.389


Linear kernel performed best, indicating that the data is linearly separable.

RBF and Polynomial kernels also performed well but slightly lower.

Sigmoid kernel performed worst, which is common unless data is well suited for it.