In [1]:
#!pip install xgboost==2.0.3
#!pip install scikit-learn==1.8.0 joblib==1.3.2


import pandas as pd
import numpy as np
import joblib
import os
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
import xgboost as xgb
from sklearn.metrics import accuracy_score, roc_auc_score, precision_score, recall_score, f1_score, matthews_corrcoef

In [2]:


# Load dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv"
df = pd.read_csv(url, sep=";")

df.head()
df.shape


(1599, 12)

In [3]:
#Preprocessing

X = df.drop("quality", axis=1)
y = LabelEncoder().fit_transform(df["quality"])

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)



In [4]:
# Define models
models = {
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "Decision Tree": DecisionTreeClassifier(random_state=42),
    "KNN": KNeighborsClassifier(n_neighbors=5),
    "Naive Bayes": GaussianNB(),
    "Random Forest": RandomForestClassifier(random_state=42),
    "XGBoost": xgb.XGBClassifier(eval_metric="mlogloss")
}

In [5]:
os.makedirs("model", exist_ok=True)

results = []

# Train, evaluate, and save
for name, clf in models.items():
    pipeline = Pipeline([
        ("scaler", StandardScaler()),
        ("clf", clf)
    ])
    pipeline.fit(X_train, y_train)
    y_pred = pipeline.predict(X_test)
    try:
        y_prob = pipeline.predict_proba(X_test)
        auc = roc_auc_score(y_test, y_prob, multi_class="ovr")
    except:
        auc = np.nan

    results.append({
        "Model": name,
        "Accuracy": accuracy_score(y_test, y_pred),
        "AUC": auc,
        "Precision": precision_score(y_test, y_pred, average="weighted",zero_division=0),
        "Recall": recall_score(y_test, y_pred, average="weighted",zero_division=0),
        "F1": f1_score(y_test, y_pred, average="weighted",zero_division=0),
        "MCC": matthews_corrcoef(y_test, y_pred)
    })

    # Save pipeline
    filename = name.lower().replace(" ", "_") + ".pkl"
    joblib.dump(pipeline,f"model/{filename}")

In [6]:
# Comparison table in CSV format
results_df = pd.DataFrame(results)
print(results_df)
#results_df.to_csv("/content/project-folder/comparison_table.csv", index=False)

                 Model  Accuracy       AUC  Precision    Recall        F1  \
0  Logistic Regression  0.590625  0.763990   0.569525  0.590625  0.567298   
1        Decision Tree  0.609375  0.658352   0.612092  0.609375  0.609477   
2                  KNN  0.609375  0.698329   0.584116  0.609375  0.595887   
3          Naive Bayes  0.562500  0.683783   0.574461  0.562500  0.568067   
4        Random Forest  0.675000  0.766131   0.650369  0.675000  0.660332   
5              XGBoost  0.662500  0.835566   0.658716  0.662500  0.654401   

        MCC  
0  0.325020  
1  0.398241  
2  0.373313  
3  0.329911  
4  0.476837  
5  0.462040  


In [7]:
# Comparison Table in Markdown format

results_df = pd.DataFrame(results)
results_df

markdown_table = results_df.to_markdown(index=False)
print(markdown_table)
#with open("/content/project-folder/README.md", "w") as f: f.write(markdown_table)

| Model               |   Accuracy |      AUC |   Precision |   Recall |       F1 |      MCC |
|:--------------------|-----------:|---------:|------------:|---------:|---------:|---------:|
| Logistic Regression |   0.590625 | 0.76399  |    0.569525 | 0.590625 | 0.567298 | 0.32502  |
| Decision Tree       |   0.609375 | 0.658352 |    0.612092 | 0.609375 | 0.609477 | 0.398241 |
| KNN                 |   0.609375 | 0.698329 |    0.584116 | 0.609375 | 0.595887 | 0.373313 |
| Naive Bayes         |   0.5625   | 0.683783 |    0.574461 | 0.5625   | 0.568067 | 0.329911 |
| Random Forest       |   0.675    | 0.766131 |    0.650369 | 0.675    | 0.660332 | 0.476837 |
| XGBoost             |   0.6625   | 0.835566 |    0.658716 | 0.6625   | 0.654401 | 0.46204  |
