<a href="https://colab.research.google.com/github/AbhayRajawat-cloud/ML_project/blob/main/WineQuality.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Importing Libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

#Loading the Dataset

In [None]:
df=pd.read_csv('wine_data.csv')
x=df.iloc[:,:-1].values
y=df.iloc[:,-1].values
print(x)
print(y)

[[ 7.4    0.7    0.    ...  3.51   0.56   9.4  ]
 [ 7.8    0.88   0.    ...  3.2    0.68   9.8  ]
 [ 7.8    0.76   0.04  ...  3.26   0.65   9.8  ]
 ...
 [ 6.3    0.51   0.13  ...  3.42   0.75  11.   ]
 [ 5.9    0.645  0.12  ...  3.57   0.71  10.2  ]
 [ 6.     0.31   0.47  ...  3.39   0.66  11.   ]]
[5 5 5 ... 6 5 6]


#Finding Most Frequent, Minimum, and Maximum Wine Quality


In [None]:
most_freq=df['quality'].mode()[0]
min=df['quality'].min()
max=df['quality'].max()

print("Most Frequent Wine Quality:",most_freq)
print("Highest Quality:", max)
print("Lowest Quality:",min)

Most Frequent Wine Quality: 5
Highest Quality: 8
Lowest Quality: 3


#Checking Correlation of Fixed Acidity, Alcohol, and Free Sulfur Dioxide with Quality

In [None]:
correlations = df[['fixed acidity', 'alcohol', 'free sulfur dioxide', 'quality']].corr()
print(correlations['quality'])

fixed acidity          0.124052
alcohol                0.476166
free sulfur dioxide   -0.050656
quality                1.000000
Name: quality, dtype: float64


#Calculating Average Residual Sugar for Best and Worst Quality Wines

In [None]:
best = df[df['quality'] == df['quality'].max()]['residual sugar'].mean()
worst = df[df['quality'] == df['quality'].min()]['residual sugar'].mean()

print(f"Average Residual Sugar (Best Quality):",best)
print(f"Average Residual Sugar (Worst Quality):", worst)

Average Residual Sugar (Best Quality): 2.5777777777777775
Average Residual Sugar (Worst Quality): 2.6350000000000002


#Analyzing the Effect of Volatile Acidity on Wine Quality

In [None]:
correlation = df["volatile acidity"].corr(df["quality"])
print('Correlation between Volatile Acidity and Quality:',correlation)
#correlation is negative, higher volatile acidity is associated with lower wine quality.

Correlation between Volatile Acidity and Quality: -0.390557780264007


#Spliting the Dataset And Applying Feature Scaling

In [None]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.27,random_state=0)
print(x_train)
print(x_test)
print(y_train)
print(y_test)

[[ 8.8   0.61  0.19 ...  3.22  0.5  10.  ]
 [ 6.7   0.41  0.43 ...  3.42  1.16 10.6 ]
 [ 8.1   0.38  0.48 ...  3.3   1.05  9.4 ]
 ...
 [ 7.9   0.57  0.31 ...  3.29  0.69  9.5 ]
 [13.    0.47  0.49 ...  3.3   0.68 12.7 ]
 [ 9.8   0.98  0.32 ...  3.25  0.48  9.4 ]]
[[10.8   0.47  0.43 ...  3.17  0.76 10.8 ]
 [ 8.1   0.82  0.   ...  3.36  0.53  9.6 ]
 [ 9.1   0.29  0.33 ...  3.26  0.84 11.7 ]
 ...
 [ 9.7   0.69  0.32 ...  3.29  0.62 10.1 ]
 [ 8.7   0.46  0.31 ...  3.1   0.74  9.6 ]
 [ 7.    0.43  0.3  ...  3.33  0.46 11.9 ]]
[6 6 5 ... 6 6 5]
[6 5 7 6 5 6 5 6 4 5 5 5 6 5 6 6 7 5 5 4 7 6 6 4 6 5 5 7 5 6 5 6 5 6 7 7 5
 6 6 7 5 7 6 6 5 5 6 6 6 5 5 5 6 6 6 5 5 5 6 5 5 6 6 6 5 6 5 5 6 6 6 6 4 6
 5 6 5 5 5 6 6 5 6 6 6 5 6 5 5 5 5 6 4 5 7 6 6 5 6 5 8 6 6 6 5 5 5 5 7 5 6
 5 7 5 6 6 6 7 6 6 5 7 5 5 6 6 5 5 5 6 6 6 6 6 6 5 6 5 8 5 6 5 6 5 4 6 7 6
 5 6 6 5 5 5 6 6 3 6 6 6 6 6 6 6 5 5 6 6 6 6 5 5 5 8 5 6 6 7 7 5 5 7 5 6 6
 4 5 6 5 5 6 5 6 6 5 5 5 5 5 5 5 6 6 5 6 6 5 6 7 6 6 6 5 5 5 6 5 6 6 5 5 5
 6 6

In [None]:
sc=StandardScaler()
x_train_scaled=sc.fit_transform(x_train)
x_test_scaled=sc.transform(x_test)
print(x_train_scaled)
print(x_test_scaled)

[[ 0.26681116  0.43098082 -0.41657823 ... -0.57472801 -0.89478304
  -0.3837784 ]
 [-0.94294669 -0.65425947  0.81664479 ...  0.7215758   2.81028548
   0.17816563]
 [-0.13644145 -0.81704552  1.07356625 ... -0.05620649  2.19277406
  -0.94572243]
 ...
 [-0.25165649  0.21393276  0.20003328 ... -0.12102168  0.17182759
  -0.85206509]
 [ 2.68632686 -0.32868739  1.12495054 ... -0.05620649  0.11569019
   2.14496974]
 [ 0.84288633  2.43867535  0.25141757 ... -0.38028244 -1.00705785
  -0.94572243]]
[[ 1.4189615  -0.32868739  0.81664479 ... -0.89880396  0.5647894
   0.36548031]
 [-0.13644145  1.57048312 -1.39287978 ...  0.33268465 -0.72637084
  -0.75840775]
 [ 0.43963371 -1.30540365  0.30280187 ... -0.31546725  1.01388862
   1.20839635]
 ...
 [ 0.78527881  0.86507693  0.25141757 ... -0.12102168 -0.22113422
  -0.29012106]
 [ 0.20920365 -0.3829494   0.20003328 ... -1.3525103   0.4525146
  -0.75840775]
 [-0.77012414 -0.54573544  0.14864899 ...  0.13823908 -1.11933265
   1.39571103]]


#Training a Decision Tree Classifier

In [None]:
from sklearn.tree import DecisionTreeClassifier
classifier=DecisionTreeClassifier(criterion='entropy',random_state=0)
classifier.fit(x_train_scaled,y_train)

Predicting Wine Quality for the Test Set

In [None]:
y_pred=classifier.predict(x_test_scaled)
print(np.concatenate((y_pred.reshape(len(y_pred),1),y_test.reshape(len(y_test),1)),1))

[[5 6]
 [5 5]
 [7 7]
 [5 6]
 [5 5]
 [6 6]
 [5 5]
 [6 6]
 [4 4]
 [6 5]
 [5 5]
 [5 5]
 [6 6]
 [4 5]
 [5 6]
 [6 6]
 [7 7]
 [4 5]
 [5 5]
 [4 4]
 [7 7]
 [5 6]
 [6 6]
 [4 4]
 [5 6]
 [5 5]
 [5 5]
 [6 7]
 [5 5]
 [6 6]
 [5 5]
 [6 6]
 [6 5]
 [7 6]
 [7 7]
 [7 7]
 [6 5]
 [6 6]
 [7 6]
 [6 7]
 [5 5]
 [6 7]
 [6 6]
 [7 6]
 [7 5]
 [5 5]
 [5 6]
 [6 6]
 [5 6]
 [6 5]
 [5 5]
 [6 5]
 [6 6]
 [6 6]
 [5 6]
 [5 5]
 [5 5]
 [5 5]
 [7 6]
 [6 5]
 [5 5]
 [6 6]
 [6 6]
 [7 6]
 [5 5]
 [6 6]
 [5 5]
 [5 5]
 [6 6]
 [6 6]
 [6 6]
 [6 6]
 [5 4]
 [6 6]
 [5 5]
 [6 6]
 [5 5]
 [5 5]
 [5 5]
 [7 6]
 [6 6]
 [5 5]
 [6 6]
 [5 6]
 [6 6]
 [6 5]
 [4 6]
 [5 5]
 [7 5]
 [5 5]
 [5 5]
 [6 6]
 [6 4]
 [6 5]
 [5 7]
 [6 6]
 [6 6]
 [5 5]
 [6 6]
 [6 5]
 [7 8]
 [6 6]
 [6 6]
 [6 6]
 [6 5]
 [5 5]
 [6 5]
 [6 5]
 [6 7]
 [5 5]
 [6 6]
 [5 5]
 [8 7]
 [5 5]
 [5 6]
 [6 6]
 [5 6]
 [7 7]
 [6 6]
 [6 6]
 [5 5]
 [4 7]
 [6 5]
 [5 5]
 [7 6]
 [6 6]
 [5 5]
 [5 5]
 [6 5]
 [6 6]
 [5 6]
 [5 6]
 [5 6]
 [6 6]
 [6 6]
 [5 5]
 [6 6]
 [5 5]
 [6 8]
 [5 5]
 [6 6]
 [6 5]
 [5 6]

#Training a Random Forest Classifier

In [None]:
from sklearn.ensemble import RandomForestClassifier
rf_model = RandomForestClassifier(n_estimators=100, criterion='entropy', random_state=42)
rf_model.fit(x_train_scaled, y_train)

Predicting Wine Quality for the Test Set

In [None]:
y_pred_rf = rf_model.predict(x_test_scaled)
print(np.concatenate((y_pred_rf.reshape(len(y_pred_rf),1), y_test.reshape(len(y_test),1)),1))

[[5 6]
 [5 5]
 [7 7]
 [5 6]
 [5 5]
 [6 6]
 [5 5]
 [6 6]
 [5 4]
 [5 5]
 [5 5]
 [5 5]
 [6 6]
 [6 5]
 [5 6]
 [7 6]
 [7 7]
 [5 5]
 [5 5]
 [5 4]
 [6 7]
 [5 6]
 [6 6]
 [7 4]
 [5 6]
 [5 5]
 [5 5]
 [7 7]
 [5 5]
 [6 6]
 [6 5]
 [6 6]
 [6 5]
 [5 6]
 [7 7]
 [7 7]
 [5 5]
 [5 6]
 [6 6]
 [6 7]
 [5 5]
 [6 7]
 [6 6]
 [7 6]
 [6 5]
 [5 5]
 [5 6]
 [6 6]
 [5 6]
 [6 5]
 [5 5]
 [5 5]
 [6 6]
 [6 6]
 [6 6]
 [5 5]
 [5 5]
 [5 5]
 [7 6]
 [5 5]
 [5 5]
 [6 6]
 [6 6]
 [6 6]
 [5 5]
 [6 6]
 [5 5]
 [5 5]
 [7 6]
 [6 6]
 [6 6]
 [5 6]
 [5 4]
 [5 6]
 [6 5]
 [6 6]
 [5 5]
 [5 5]
 [5 5]
 [7 6]
 [6 6]
 [5 5]
 [6 6]
 [6 6]
 [6 6]
 [6 5]
 [6 6]
 [5 5]
 [5 5]
 [5 5]
 [5 5]
 [6 6]
 [6 4]
 [5 5]
 [7 7]
 [6 6]
 [6 6]
 [5 5]
 [5 6]
 [6 5]
 [7 8]
 [6 6]
 [6 6]
 [6 6]
 [5 5]
 [5 5]
 [5 5]
 [5 5]
 [6 7]
 [5 5]
 [6 6]
 [5 5]
 [8 7]
 [5 5]
 [6 6]
 [6 6]
 [5 6]
 [7 7]
 [6 6]
 [6 6]
 [5 5]
 [6 7]
 [5 5]
 [5 5]
 [6 6]
 [7 6]
 [5 5]
 [5 5]
 [6 5]
 [6 6]
 [5 6]
 [5 6]
 [6 6]
 [6 6]
 [6 6]
 [5 5]
 [6 6]
 [5 5]
 [6 8]
 [5 5]
 [6 6]
 [5 5]
 [5 6]

#Accuracy of Both the Models

Decision Tree Classifier

In [None]:
from sklearn.metrics import accuracy_score, confusion_matrix

# Accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy:",accuracy)

# Confusion Matrix
cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:\n", cm)

Model Accuracy: 0.5833333333333334
Confusion Matrix:
 [[  0   0   1   1   0   0]
 [  1   3   6   4   0   0]
 [  1   7 115  54   8   1]
 [  0   2  53 112  16   1]
 [  0   1   6  10  22   2]
 [  0   0   0   2   3   0]]


Random Forest Classifier

In [None]:
# Accuracy
accuracy = accuracy_score(y_test, y_pred_rf)
print("Model Accuracy:",accuracy)

# Confusion Matrix
cm = confusion_matrix(y_test, y_pred_rf)
print("Confusion Matrix:\n", cm)

Model Accuracy: 0.6759259259259259
Confusion Matrix:
 [[  0   0   0   2   0   0]
 [  0   0   9   4   1   0]
 [  0   0 144  39   3   0]
 [  0   0  46 125  13   0]
 [  0   0   2  14  23   2]
 [  0   0   0   2   3   0]]


Accuracy of Random Forest Classifier is More
