In [2]:
"""Model building for Wine Cultivar Origin Prediction.

This notebook loads the sklearn Wine dataset, selects six features,
trains a scaled SVM classifier, evaluates it, and saves the trained
pipeline to 'wine_cultivar_model.pkl' (in this same 'model' folder).
"""

from pathlib import Path
from typing import List

import json

import joblib
import numpy as np
import pandas as pd
from sklearn import metrics
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC

# Path to save the trained model (relative to this notebook's folder)
MODEL_PATH = Path("wine_cultivar_model.pkl")

# Choose any six allowed features (excluding target/cultivar)
SELECTED_FEATURES: List[str] = [
    "alcohol",
    "malic_acid",
    "alcalinity_of_ash",
    "flavanoids",
    "color_intensity",
    "proline",
]

# 1. Load dataset
wine = load_wine(as_frame=True)
df: pd.DataFrame = wine.frame

X_raw = df[SELECTED_FEATURES].copy()
y = df["target"]

# 2. Basic preprocessing (handle any potential missing values)
X = X_raw.replace([np.inf, -np.inf], np.nan).dropna()
y = y.loc[X.index]

# 3. Train/test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

# 4. Build pipeline: scaling + SVM classifier
pipeline = Pipeline(
    steps=[
        ("scaler", StandardScaler()),
        ("classifier", SVC(kernel="rbf", gamma="scale", probability=True)),
    ]
)

# 5. Train model
pipeline.fit(X_train, y_train)

# 6. Evaluation
y_pred = pipeline.predict(X_test)
accuracy = metrics.accuracy_score(y_test, y_pred)
precision, recall, f1, _ = metrics.precision_recall_fscore_support(
    y_test, y_pred, average="macro", zero_division=0
)
report = metrics.classification_report(
    y_test, y_pred, output_dict=True, zero_division=0
)

print("Accuracy:", accuracy)
print("Precision (macro):", precision)
print("Recall (macro):", recall)
print("F1-score (macro):", f1)
print("\nClassification report (dict):")
print(json.dumps(report, indent=2))

# 7. Save trained model and feature metadata into a single pickle file
MODEL_PATH.unlink(missing_ok=True)
joblib.dump({"model": pipeline, "features": SELECTED_FEATURES}, MODEL_PATH)
print(f"Saved trained model to: {MODEL_PATH.resolve()}")



Accuracy: 0.9444444444444444
Precision (macro): 0.9583333333333334
Recall (macro): 0.9333333333333332
F1-score (macro): 0.9407407407407407

Classification report (dict):
{
  "0": {
    "precision": 1.0,
    "recall": 1.0,
    "f1-score": 1.0,
    "support": 12.0
  },
  "1": {
    "precision": 0.875,
    "recall": 1.0,
    "f1-score": 0.9333333333333333,
    "support": 14.0
  },
  "2": {
    "precision": 1.0,
    "recall": 0.8,
    "f1-score": 0.8888888888888888,
    "support": 10.0
  },
  "accuracy": 0.9444444444444444,
  "macro avg": {
    "precision": 0.9583333333333334,
    "recall": 0.9333333333333332,
    "f1-score": 0.9407407407407407,
    "support": 36.0
  },
  "weighted avg": {
    "precision": 0.9513888888888888,
    "recall": 0.9444444444444444,
    "f1-score": 0.9432098765432099,
    "support": 36.0
  }
}
Saved trained model to: C:\Users\Fizzy Babz\Downloads\WineCultivar_Project_Mirabel_23AG\model\wine_cultivar_model.pkl
