In [16]:
# Exercise: Wine Quality Classification
# Task: Build a decision tree classifier to predict the quality of wine

import pandas as pd
# URL of the dataset
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv'

# Load the dataset
wine_quality = pd.read_csv(url, sep=';')
 
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import LabelEncoder, StandardScaler, OneHotEncoder
from sklearn.metrics import accuracy_score,classification_report,confusion_matrix
import joblib

X = wine_quality.drop('quality', axis=1)
y = wine_quality['quality']

scaler = StandardScaler()
X = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=42)

model = DecisionTreeClassifier()
model.fit(X_train,y_train)

y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test,y_pred)
print(f'Accuracy: {accuracy:.2f}')
print('Classification Report:')
print(classification_report(y_test,y_pred))
print('Confusion Matrix:')
print(confusion_matrix(y_test,y_pred))

joblib.dump(model, 'wine_qualityDTM.joblib')

loaded_model = joblib.load('wine_qualityDTM.joblib')

loaded_model_prediction = loaded_model.predict(X_test)
print('Loaded Model Prediction:', loaded_model_prediction)

# To make predictions with the loaded model for a specific number of samples
num_samples = 5  # Number of samples to predict
X_new = X_test[:num_samples]  # Select the first `num_samples` samples from the test set
predictions_new = loaded_model.predict(X_new)
print(f"\nPredictions for the first {num_samples} samples:")
print(predictions_new)

Accuracy: 0.57
Classification Report:
              precision    recall  f1-score   support

           3       0.00      0.00      0.00         1
           4       0.00      0.00      0.00        10
           5       0.65      0.66      0.66       130
           6       0.57      0.55      0.56       132
           7       0.53      0.55      0.54        42
           8       0.00      0.00      0.00         5

    accuracy                           0.57       320
   macro avg       0.29      0.29      0.29       320
weighted avg       0.57      0.57      0.57       320

Confusion Matrix:
[[ 0  0  0  1  0  0]
 [ 0  0  3  7  0  0]
 [ 1  7 86 33  3  0]
 [ 0  4 38 73 15  2]
 [ 0  0  4 13 23  2]
 [ 0  0  1  2  2  0]]
Loaded Model Prediction: [5 6 6 4 6 5 5 5 6 6 7 6 6 5 5 6 6 6 7 5 5 6 6 6 6 6 6 6 5 6 6 6 6 4 6 6 6
 6 5 6 6 5 5 5 5 6 6 5 5 6 5 5 6 7 5 6 6 5 6 5 6 5 6 5 6 4 6 6 7 5 6 6 5 6
 6 5 6 5 5 7 5 7 5 6 7 7 5 6 6 6 6 5 5 5 5 5 5 6 5 5 5 5 5 7 6 7 6 5 7 6 6
 5 7 5 5 5 5 6 5 6 6 6 6