# 🍷 Wine Quality Classification - AI Project
This project follows the full ML pipeline with fuzzy logic and decision tree tuning as per the requirements.

## 📥 1. Load Dataset and Train/Test Split

In [None]:

import pandas as pd
from sklearn.model_selection import train_test_split

# Load the dataset (change the path if necessary)
df = pd.read_csv("https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv", sep=';')
df.head()
    

: 

## 🧹 2. Clean Missing Values and Remove Duplicates

In [None]:

# Check and handle missing values
print("Missing values:\n", df.isnull().sum())
df.dropna(inplace=True)

# Check and drop duplicates
print("Duplicate rows:", df.duplicated().sum())
df.drop_duplicates(inplace=True)
    

## 📊 3. Exploratory Plots (Training Set)

In [None]:

import seaborn as sns
import matplotlib.pyplot as plt

# Train/Test split
X = df.drop('quality', axis=1)
y = df['quality']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Visualize feature distributions
X_train.hist(bins=20, figsize=(15, 10))
plt.tight_layout()
plt.show()
    

## 🔄 4. Scale Numeric Features

In [None]:

from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
X_train_scaled = pd.DataFrame(scaler.fit_transform(X_train), columns=X_train.columns)
X_test_scaled = pd.DataFrame(scaler.transform(X_test), columns=X_test.columns)
    

## 🌫️ 5. Create Fuzzy Features

In [None]:

import numpy as np

def triangular(x, a, b, c):
    return np.maximum(np.minimum((x - a) / (b - a), (c - x) / (c - b)), 0)

# Example on two features
for feature in ['alcohol', 'sulphates']:
    X_train_scaled[f'{feature}_low'] = triangular(X_train_scaled[feature], X_train_scaled[feature].min(), X_train_scaled[feature].min(), X_train_scaled[feature].mean())
    X_train_scaled[f'{feature}_medium'] = triangular(X_train_scaled[feature], X_train_scaled[feature].min(), X_train_scaled[feature].mean(), X_train_scaled[feature].max())
    X_train_scaled[f'{feature}_high'] = triangular(X_train_scaled[feature], X_train_scaled[feature].mean(), X_train_scaled[feature].max(), X_train_scaled[feature].max())

# Apply same for test set
for feature in ['alcohol', 'sulphates']:
    X_test_scaled[f'{feature}_low'] = triangular(X_test_scaled[feature], X_train_scaled[feature].min(), X_train_scaled[feature].min(), X_train_scaled[feature].mean())
    X_test_scaled[f'{feature}_medium'] = triangular(X_test_scaled[feature], X_train_scaled[feature].min(), X_train_scaled[feature].mean(), X_train_scaled[feature].max())
    X_test_scaled[f'{feature}_high'] = triangular(X_test_scaled[feature], X_train_scaled[feature].mean(), X_train_scaled[feature].max(), X_train_scaled[feature].max())
    

## 🌳 6. Train Decision Tree and Hill-Climbing Optimization

In [None]:

from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

best_depth = 1
best_score = 0
scores = []

for depth in range(1, 11):
    clf = DecisionTreeClassifier(max_depth=depth, random_state=42)
    clf.fit(X_train_scaled, y_train)
    val_score = clf.score(X_train_scaled, y_train)
    scores.append((depth, val_score))
    if val_score > best_score:
        best_score = val_score
        best_depth = depth

print("Hill Climbing Best Depth:", best_depth)
    

## 🔍 7. Compare with Brute-Force Grid Search

In [None]:

# Brute-force Grid Search
from sklearn.model_selection import GridSearchCV

param_grid = {'max_depth': list(range(1, 11))}
grid_search = GridSearchCV(DecisionTreeClassifier(random_state=42), param_grid, cv=5)
grid_search.fit(X_train_scaled, y_train)

print("Grid Search Best Depth:", grid_search.best_params_['max_depth'])
    

## 🌲 8. Visualize Final Tree

In [None]:

from sklearn.tree import plot_tree

final_clf = DecisionTreeClassifier(max_depth=grid_search.best_params_['max_depth'], random_state=42)
final_clf.fit(X_train_scaled, y_train)

plt.figure(figsize=(20, 10))
plot_tree(final_clf, feature_names=X_train_scaled.columns, class_names=[str(c) for c in sorted(y_train.unique())], filled=True)
plt.show()
    

## 📈 9. Evaluate on Test Set

In [None]:

from sklearn.metrics import classification_report, confusion_matrix

y_pred = final_clf.predict(X_test_scaled)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
    