<a href="https://colab.research.google.com/github/The-Godfatherr/LAB-AIML/blob/main/Lab_10_Abhinav_Verma_E23CSEU1335.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# 1. Import Required Libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score, classification_report


In [2]:
# 2. Read the Dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-white.csv"
df = pd.read_csv(url, sep=';')
df.head()


Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.0,0.27,0.36,20.7,0.045,45.0,170.0,1.001,3.0,0.45,8.8,6
1,6.3,0.3,0.34,1.6,0.049,14.0,132.0,0.994,3.3,0.49,9.5,6
2,8.1,0.28,0.4,6.9,0.05,30.0,97.0,0.9951,3.26,0.44,10.1,6
3,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9,6
4,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9,6


In [3]:
# 3. Extract the Independent and Dependent Variable
X = df.drop('quality', axis=1)
y = df['quality']


In [4]:
# 4. Convert Output Quality into 3 Categories (poor, average, best)
def label_quality(q):
    if q <= 4:    # poor
        return 0
    elif q <= 6:  # average
        return 1
    else:         # best
        return 2

y_cat = y.apply(label_quality)
y_cat.value_counts()


Unnamed: 0_level_0,count
quality,Unnamed: 1_level_1
1,3655
2,1060
0,183


In [5]:
# 5. Split Dataset into Train/Test (75-25 division)
X_train, X_test, y_train, y_test = train_test_split(
    X, y_cat, test_size=0.25, random_state=42, stratify=y_cat
)


In [6]:
# 6. Perform Normalization on Numerical Features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [7]:
# 7. Build Random Forest Classification Model, Predict Test Values
rf = RandomForestClassifier(random_state=42)
rf.fit(X_train_scaled, y_train)
y_pred_rf = rf.predict(X_test_scaled)


In [8]:
# 8. Check the Model Performance (Confusion Matrix & Metrics)
cm_rf = confusion_matrix(y_test, y_pred_rf)
accuracy_rf = accuracy_score(y_test, y_pred_rf)
precision_rf = precision_score(y_test, y_pred_rf, average='macro')
recall_rf = recall_score(y_test, y_pred_rf, average='macro')
f1_rf = f1_score(y_test, y_pred_rf, average='macro')

print("Random Forest Classifier Results:")
print("Confusion Matrix:\n", cm_rf)
print(f"Accuracy: {accuracy_rf:.4f}")
print(f"Precision: {precision_rf:.4f}")
print(f"Recall: {recall_rf:.4f}")
print(f"F1-Score: {f1_rf:.4f}")
print("\nClassification Report:\n", classification_report(y_test, y_pred_rf))


Random Forest Classifier Results:
Confusion Matrix:
 [[ 10  34   2]
 [  2 864  48]
 [  0 103 162]]
Accuracy: 0.8457
Precision: 0.8202
Recall: 0.5913
F1-Score: 0.6421

Classification Report:
               precision    recall  f1-score   support

           0       0.83      0.22      0.34        46
           1       0.86      0.95      0.90       914
           2       0.76      0.61      0.68       265

    accuracy                           0.85      1225
   macro avg       0.82      0.59      0.64      1225
weighted avg       0.84      0.85      0.83      1225



In [9]:
# 9. Compare with Decision Tree Classifier
dt = DecisionTreeClassifier(random_state=42)
dt.fit(X_train_scaled, y_train)
y_pred_dt = dt.predict(X_test_scaled)

cm_dt = confusion_matrix(y_test, y_pred_dt)
accuracy_dt = accuracy_score(y_test, y_pred_dt)
precision_dt = precision_score(y_test, y_pred_dt, average='macro')
recall_dt = recall_score(y_test, y_pred_dt, average='macro')
f1_dt = f1_score(y_test, y_pred_dt, average='macro')

print("Decision Tree Classifier Results:")
print("Confusion Matrix:\n", cm_dt)
print(f"Accuracy: {accuracy_dt:.4f}")
print(f"Precision: {precision_dt:.4f}")
print(f"Recall: {recall_dt:.4f}")
print(f"F1-Score: {f1_dt:.4f}")
print("\nClassification Report:\n", classification_report(y_test, y_pred_dt))

# Summary Table
results = pd.DataFrame({
    "Model": ["Random Forest", "Decision Tree"],
    "Accuracy": [accuracy_rf, accuracy_dt],
    "Precision": [precision_rf, precision_dt],
    "Recall": [recall_rf, recall_dt],
    "F1-Score": [f1_rf, f1_dt],
})
print("\nComparison Table:\n", results)


Decision Tree Classifier Results:
Confusion Matrix:
 [[ 19  26   1]
 [ 33 775 106]
 [  2  90 173]]
Accuracy: 0.7894
Precision: 0.6132
Recall: 0.6379
F1-Score: 0.6245

Classification Report:
               precision    recall  f1-score   support

           0       0.35      0.41      0.38        46
           1       0.87      0.85      0.86       914
           2       0.62      0.65      0.63       265

    accuracy                           0.79      1225
   macro avg       0.61      0.64      0.62      1225
weighted avg       0.80      0.79      0.79      1225


Comparison Table:
            Model  Accuracy  Precision    Recall  F1-Score
0  Random Forest  0.845714   0.820207  0.591336  0.642141
1  Decision Tree  0.789388   0.613173  0.637932  0.624529
