<a href="https://colab.research.google.com/github/Benedictakel/Wine_Quality_Prediction/blob/main/Wine_Quality_Prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score


In [None]:
url = "https://raw.githubusercontent.com/selva86/datasets/master/winequality-red.csv"
df = pd.read_csv(url)
df.head()


In [None]:
df.info()
df.describe()

# Plot wine quality distribution
sns.countplot(x='quality', data=df)
plt.title("Wine Quality Distribution")
plt.show()


In [None]:
# Features and target
X = df.drop('quality', axis=1)
y = df['quality']

# Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)


In [None]:
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)


In [None]:
y_pred = model.predict(X_test)

print("R² Score:", r2_score(y_test, y_pred))
print("RMSE:", np.sqrt(mean_squared_error(y_test, y_pred)))

# Scatter plot of predictions vs actual
plt.scatter(y_test, y_pred, alpha=0.6)
plt.xlabel("Actual Quality")
plt.ylabel("Predicted Quality")
plt.title("Actual vs Predicted Wine Quality")
plt.grid(True)
plt.show()


In [None]:
sample = X.iloc[0]
sample_scaled = scaler.transform([sample])
prediction = model.predict(sample_scaled)

print("Predicted Wine Quality Score:", prediction[0])
