# 🍷 Wine Quality Prediction
**Objective:** Predict the quality of wine based on its chemical properties using machine learning models.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import SGDClassifier
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score


## 📥 Load Dataset

In [None]:
df = pd.read_csv('winequality-red.csv')  # or winequality-white.csv
df.head()

## 🔎 Data Exploration

In [None]:
df.info()
df.describe()
df.isnull().sum()

In [None]:
sns.countplot(data=df, x='quality')
plt.title('Wine Quality Distribution')
plt.show()

## 📊 Correlation Heatmap

In [None]:
plt.figure(figsize=(10, 6))
sns.heatmap(df.corr(), annot=True, cmap='coolwarm')
plt.title("Feature Correlation Heatmap")
plt.show()

## ✂️ Prepare Data

In [None]:
X = df.drop('quality', axis=1)
y = df['quality']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

## 🤖 Random Forest Classifier

In [None]:
rf = RandomForestClassifier(random_state=42)
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)
print("Random Forest Accuracy:", accuracy_score(y_test, y_pred_rf))
print(classification_report(y_test, y_pred_rf))

## 🤖 SGD Classifier

In [None]:
sgd = SGDClassifier(max_iter=1000, tol=1e-3, random_state=42)
sgd.fit(X_train, y_train)
y_pred_sgd = sgd.predict(X_test)
print("SGD Accuracy:", accuracy_score(y_test, y_pred_sgd))
print(classification_report(y_test, y_pred_sgd))

## 🤖 SVC Classifier

In [None]:
svc = SVC()
svc.fit(X_train, y_train)
y_pred_svc = svc.predict(X_test)
print("SVC Accuracy:", accuracy_score(y_test, y_pred_svc))
print(classification_report(y_test, y_pred_svc))

## 📊 Confusion Matrices

In [None]:
models = {'Random Forest': y_pred_rf, 'SGD': y_pred_sgd, 'SVC': y_pred_svc}
for name, preds in models.items():
    cm = confusion_matrix(y_test, preds)
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues")
    plt.title(f"{name} Confusion Matrix")
    plt.xlabel("Predicted")
    plt.ylabel("Actual")
    plt.show()

## ✅ Summary
- Random Forest performs best for this dataset.
- SGD is fast but can underperform.
- SVC is accurate but computationally expensive.
- Alcohol, volatile acidity, and citric acid strongly influence wine quality.