# Wine Quality Analysis Project

This notebook contains two main activities:

**Activity 1**: Data preprocessing and visualization

**Activity 2**: Machine learning model to predict wine quality

## Activity 1: Data Preprocessing and Visualization

In [None]:
import pandas as pd

# Load dataset
data = pd.read_csv("WineQT.csv")
data = data.drop(columns=['Id'])  # Drop unnecessary Id column
data.head()

In [None]:
# Check for missing values
print(data.isnull().sum())

In [None]:
from sklearn.preprocessing import StandardScaler

# Scale features
features = data.columns.drop('quality')
scaler = StandardScaler()
data[features] = scaler.fit_transform(data[features])
data.head()

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

# Alcohol vs Quality
sns.scatterplot(x='alcohol', y='quality', data=data)
plt.title("Alcohol vs Wine Quality")
plt.grid(True)
plt.show()

In [None]:
# Boxplot: Fixed Acidity vs Quality
sns.boxplot(x='quality', y='fixed acidity', data=data)
plt.title("Fixed Acidity across Wine Quality")
plt.show()

In [None]:
# Correlation Heatmap
plt.figure(figsize=(10,8))
sns.heatmap(data.corr(), annot=True, cmap='coolwarm')
plt.title("Feature Correlation Heatmap")
plt.show()

## Activity 2: Prediction Using Machine Learning

In [None]:
from sklearn.model_selection import train_test_split

X = data.drop('quality', axis=1)
y = data['quality']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
from sklearn.ensemble import RandomForestClassifier

model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

In [None]:
from sklearn.metrics import accuracy_score, classification_report

y_pred = model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))