In [None]:
import pandas as pd

wine_data = pd.read_csv('winequality-red.csv')  # or 'winequality-white.csv'
print(wine_data.head())

In [None]:
print(wine_data.info())
print(wine_data.describe())


In [None]:
wine_data = wine_data.dropna()  # Simple approach if missing values are few


In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

sns.histplot(wine_data['quality'], kde=True)
plt.show()


In [None]:
plt.figure(figsize=(10, 8))
sns.heatmap(wine_data.corr(), annot=True, cmap='coolwarm')
plt.show()


In [None]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
wine_data_scaled = scaler.fit_transform(wine_data.drop('quality', axis=1))


In [None]:
from sklearn.model_selection import train_test_split

X = wine_data.drop('quality', axis=1)
y = wine_data['quality']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
from sklearn.ensemble import RandomForestClassifier

model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)


In [None]:
from sklearn.metrics import accuracy_score, classification_report

y_pred = model.predict(X_test)
print(accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))


In [None]:
from sklearn.model_selection import GridSearchCV

param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20, 30]
}
grid_search = GridSearchCV(model, param_grid, cv=5)
grid_search.fit(X_train, y_train)
