In [None]:
# Section header
# ===============================
# Wine Cultivar Model Building
# ===============================
# Import pandas for data manipulation
import pandas as pd
# Import numpy for numerical computations
import numpy as np
# Import wine dataset loader from sklearn
from sklearn.datasets import load_wine
# Import train-test split function
from sklearn.model_selection import train_test_split
# Import standard scaler for feature scaling
from sklearn.preprocessing import StandardScaler
# Import metrics for evaluation
from sklearn.metrics import accuracy_score, classification_report
# Import logistic regression model
from sklearn.linear_model import LogisticRegression
# Import random forest classifier
from sklearn.ensemble import RandomForestClassifier
# Import support vector classifier
from sklearn.svm import SVC
# Import k-neighbors classifier
from sklearn.neighbors import KNeighborsClassifier
# Import Gaussian naive bayes
from sklearn.naive_bayes import GaussianNB
# Import multi-layer perceptron classifier
from sklearn.neural_network import MLPClassifier
# Import lightgbm library
import lightgbm as lgb
# Import joblib for model saving
import joblib
# Import os for file operations
import os

In [None]:
# Load the wine dataset as a pandas DataFrame
wine = load_wine(as_frame=True)
# Extract the DataFrame from the loaded data
df = wine.frame

In [None]:
# Define the list of selected features
features = [
    "alcohol",
    "malic_acid",
    "magnesium",
    "total_phenols",
    "color_intensity",
    "proline"
]
# Select features as input variables
X = df[features]
# Select target variable
y = df["target"]

In [None]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

In [None]:
# Initialize the standard scaler
scaler = StandardScaler()
# Fit scaler on training data and transform
X_train_scaled = scaler.fit_transform(X_train)
# Transform test data using the fitted scaler
X_test_scaled = scaler.transform(X_test)

In [None]:
# Define a dictionary of machine learning models
models = {
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "Random Forest": RandomForestClassifier(n_estimators=200, random_state=42),
    "SVM": SVC(kernel="rbf", probability=True),
    "KNN": KNeighborsClassifier(n_neighbors=5),
    "Naive Bayes": GaussianNB(),
    "Neural Network": MLPClassifier(hidden_layer_sizes=(64, 32), max_iter=1000),
    "LightGBM": lgb.LGBMClassifier()
}

In [None]:
# Loop through each model for training and evaluation
for name, model in models.items():
    # Fit the model on scaled training data
    model.fit(X_train_scaled, y_train)
    # Make predictions on scaled test data
    preds = model.predict(X_test_scaled)
    # Print model name
    print(f"\n{name}")
    # Print accuracy score
    print("Accuracy:", accuracy_score(y_test, preds))
    # Print classification report
    print(classification_report(y_test, preds))


Logistic Regression
Accuracy: 0.9444444444444444
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        12
           1       0.88      1.00      0.93        14
           2       1.00      0.80      0.89        10

    accuracy                           0.94        36
   macro avg       0.96      0.93      0.94        36
weighted avg       0.95      0.94      0.94        36


Random Forest
Accuracy: 0.9722222222222222
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        12
           1       0.93      1.00      0.97        14
           2       1.00      0.90      0.95        10

    accuracy                           0.97        36
   macro avg       0.98      0.97      0.97        36
weighted avg       0.97      0.97      0.97        36


SVM
Accuracy: 0.9444444444444444
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        12
 



In [None]:
# Create model directory if it doesn't exist
os.makedirs("model", exist_ok=True)
# Save the best model (LightGBM) to file
joblib.dump(models["LightGBM"], "model/wine_cultivar_model.pkl")
# Save the scaler to file
joblib.dump(scaler, "model/scaler.pkl")

['model/scaler.pkl']