# Wine Quality Prediction

In [33]:
# Importing required modules for this project.

import pandas as pd

## Importing the dataset

Data set containing fixed acidity, valatile acidity, citric acidity, residual sugar, chlorides, free sulfur dioxide chlorides, free sulfur dioxide, total sulfur dioxide, density, pH, sulhaptes, alcohol, quality and id

Dataset source >> https://www.kaggle.com/datasets/yasserh/wine-quality-dataset

### Let's explore the dataset

In [34]:
wine_df = pd.read_csv("WineQT.csv")
wine_df.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality,Id
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5,0
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5,1
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5,2
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6,3
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5,4


In [35]:
# Checking how many values are possible in the alcohol quality
wine_df["quality"].unique()

array([5, 6, 7, 4, 8, 3])

### Cleaning the data

In [36]:
# Check for null values
wine_df.isna().sum()

fixed acidity           0
volatile acidity        0
citric acid             0
residual sugar          0
chlorides               0
free sulfur dioxide     0
total sulfur dioxide    0
density                 0
pH                      0
sulphates               0
alcohol                 0
quality                 0
Id                      0
dtype: int64

In [37]:
# Let's drop the id column. We don't need id values for the prediction problem

wine_df.drop("Id", axis=1, inplace=True)

In [38]:
wine_df.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5


## Traning model for prediction

In [39]:
# Creating simple prediction model for the evaluation

from sklearn.model_selection import train_test_split

X = wine_df.drop("quality", axis=1)
y = wine_df["quality"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [40]:
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier(n_estimators=200, random_state=12)

model.fit(X_train, y_train)

model.score(X_test, y_test)

0.6462882096069869

In [41]:
fixed_acidity = 7.2
volatile_acidity = 0.1
citric_acid = 1.0
residual_sugar = 2.3
chlorides = 0.012
free_sulfur_dioxide = 2.1
total_sulfur_dioxide = 42.1
density = 0.234
pH = 1.22
sulphates = 0.22
alcohol = 1.0


quality = model.predict([[fixed_acidity, 
                volatile_acidity, 
                citric_acid, 
                residual_sugar, 
                chlorides, 
                free_sulfur_dioxide, 
                total_sulfur_dioxide, 
                density, 
                pH, 
                sulphates, 
               alcohol]])

print(f"The rating of the wine quality is : {quality[0]}")

The rating of the wine quality is : 6


