Import Necessary Libraries:

In [5]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

Load and Explore the Dataset

In [6]:
# Load the wine quality dataset
data = pd.read_csv('winequality-red.csv')

# Explore the data
print(data.head())

   fixed acidity  volatile acidity  citric acid  residual sugar  chlorides  \
0            7.4              0.70         0.00             1.9      0.076   
1            7.8              0.88         0.00             2.6      0.098   
2            7.8              0.76         0.04             2.3      0.092   
3           11.2              0.28         0.56             1.9      0.075   
4            7.4              0.70         0.00             1.9      0.076   

   free sulfur dioxide  total sulfur dioxide  density    pH  sulphates  \
0                 11.0                  34.0   0.9978  3.51       0.56   
1                 25.0                  67.0   0.9968  3.20       0.68   
2                 15.0                  54.0   0.9970  3.26       0.65   
3                 17.0                  60.0   0.9980  3.16       0.58   
4                 11.0                  34.0   0.9978  3.51       0.56   

   alcohol  quality  
0      9.4        5  
1      9.8        5  
2      9.8        5 

Data Preprocessing

In [7]:
# Split the data into features (X) and target (y)
X = data.drop('quality', axis=1)
y = data['quality']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

Feature Scaling

In [8]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

Model Training

In [9]:
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

Model Evaluation

In [10]:
# Make predictions
y_pred = model.predict(X_test)

# Calculate Mean Squared Error (MSE)
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse}")

# Calculate R-squared (R2) score
r2 = r2_score(y_test, y_pred)
print(f"R-squared (R2) Score: {r2}")

Mean Squared Error: 0.3006603124999999
R-squared (R2) Score: 0.5399271357910311


Predict New Wine Quality

In [11]:
# Example of predicting wine quality for a new sample (replace with actual feature values)
new_wine_features = np.array([[7.0, 0.27, 0.36, 20.7, 0.045, 45, 170, 1.001, 3.0, 0.45, 8.8]])
scaled_new_wine_features = scaler.transform(new_wine_features)
predicted_quality = model.predict(scaled_new_wine_features)
print(f"Predicted Wine Quality: {predicted_quality[0]:.2f}")

Predicted Wine Quality: 4.50




Saving the model

In [12]:
import joblib

# Save the trained model to a file
model_filename = 'wine_quality_model.pkl'
joblib.dump(model, model_filename)
print(f"Model saved as {model_filename}")

Model saved as wine_quality_model.pkl
