Columns description taken from this link:https://archive.ics.uci.edu/ml/datasets/wine+quality



* Importing Libraries
* For ML Models: sklearn
* For Data Processing: numpy, pandas, sklearn
* For Data Visualization: matplotlib, seaborn, plotly**

In [None]:
#Importing libraries 
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
 
from sklearn.linear_model import LinearRegression ,LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error ,mean_squared_error, median_absolute_error,confusion_matrix,accuracy_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC ,SVR
from sklearn.preprocessing import MinMaxScaler

import warnings
warnings.filterwarnings('ignore')

In [None]:
Data = pd.read_csv('/kaggle/input/wine-quality-dataset/WineQT.csv')
del Data['Id']
#Data.drop(columns="Id",inplace=True)
#Data.drop('Id',axis=1)


*Here I am subtracting 3 from the quality column to change the range of quality column from 3-8 to 0-5*

In [None]:
Data['quality'] = Data['quality']-3
Data

In [None]:
Data.head(10)

In [None]:
Data.head(10).style.background_gradient(cmap='Reds')

In [None]:
Data.sample(10).style.background_gradient(cmap='Reds')

In [None]:
Data.info()

In [None]:
Data.columns

In [None]:
Data.describe()

In [None]:
Data.describe().T

In [None]:
Data.describe().T.style.background_gradient(cmap = 'Blues')

In [None]:
Data.mean()

#Exploratory Data Analysis(EDA)

In [None]:
Data.isnull().sum()

In [None]:
Data.isna().sum()

In [None]:
Data.duplicated().sum()

In [None]:
Data.drop_duplicates(inplace = True)


In [None]:
Data.duplicated().sum()

In [None]:
Data['quality'].value_counts()

In [None]:
plt.figure(figsize = (12, 8))
Data.plot()
plt.show()

In [None]:
Data.hist(bins=20, figsize=(10, 10))
plt.show()

In [None]:
plt.bar(Data['quality'], Data['alcohol'])
plt.xlabel('quality')
plt.ylabel('alcohol')
plt.show()

In [None]:
plt.figure(figsize=(12, 5))
sns.boxplot(data=Data[Data.columns[4:8]])
plt.show()

In [None]:
plt.figure(figsize=(12, 10))
sns.heatmap(Data.corr(), cbar=True, square=True, fmt='.2f', annot=True, cmap='rainbow')

plt.show()

In [None]:
plt.figure(figsize=(12, 12))
sns.heatmap(Data.corr() > 0.6, cbar=False, square=True, fmt='.2f', annot=True, annot_kws={'size':10}, cmap='Greens')
plt.show()

From the above heat map we can conclude that the ‘total sulphur dioxide’ and ‘free sulphur dioxide‘ are highly correlated features so, we will remove them

In [None]:
Data = Data.drop('total sulfur dioxide', axis=1)

In [None]:
Data

#  Building a ML Model 


In [None]:
X = Data.drop(columns="quality")           
y = Data["quality"]

In [None]:
# split the data train and test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
norm = MinMaxScaler()
x_train = norm.fit_transform(X_train)
x_test = norm.fit_transform(X_test)

print("X Train : ", X_train.shape)
print("X Test  : ", X_test.shape)
print("Y Train : ", y_train.shape)
print("Y Test  : ", y_test.shape)

# using the model Logistic Regression



In [None]:

Lo_model=LogisticRegression(solver='liblinear')


Lo_model.fit(X_train,y_train)



print("Score the X-train with Y-train is : ", Lo_model.score(X_train,y_train))
print("Score the X-test  with Y-test  is : ", Lo_model.score(X_test,y_test))

y_pred_Lo=Lo_model.predict(X_test)

print( " Model Evaluation Logistic R : mean absolute error is ", mean_absolute_error(y_test,y_pred_Lo))
print(" Model Evaluation Logistic R : mean squared  error is " , mean_squared_error(y_test,y_pred_Lo))
print(" Model Evaluation Logistic R : median absolute error is " ,median_absolute_error(y_test,y_pred_Lo)) 

print(" Model Evaluation Logistic R : accuracy score " , accuracy_score(y_test,y_pred_Lo))


# using the model Decision Tree Classifier


In [None]:
Tree_model=DecisionTreeClassifier(max_depth=50)
Tree_model.fit(X_train,y_train)

print("Score the X-train with Y-train is : ", Tree_model.score(X_train,y_train))
print("Score the X-test  with Y-test  is : ", Tree_model.score(X_test,y_test))

In [None]:
# Select  Important columns

print("The Important columns \n",Tree_model.feature_importances_)

In [None]:
Data.head(0)

Note, the feature importances for quality in the DecisionTree , column Alcohol = 19% ,we said in analysis


In [None]:
print("The classes ",Tree_model.classes_)

y_pred_T =Tree_model.predict(X_test)

print(" Model Evaluation Decision Tree : accuracy score " , accuracy_score(y_test,y_pred_T))

# using the model SVC

In [None]:
svc_model=SVC(C=50,kernel="rbf")

svc_model.fit(X_train,y_train)

y_pred_svc =svc_model.predict(X_test)

print("Score the X-train with Y-train is : ", svc_model.score(X_train,y_train))
print("Score the X-test  with Y-test  is : ", svc_model.score(X_test,y_test))
print(" Model Evaluation Decision Tree : accuracy score " , accuracy_score(y_test,y_pred_svc))

# using the model SVR

In [None]:
svr_model=SVR(degree=1,coef0=1, tol=0.001, C=1.5,epsilon=0.001)

svr_model.fit(X_train,y_train)

y_pred_svr =svc_model.predict(X_test)

print("Score the X-train with Y-train is : ", svr_model.score(X_train,y_train))
print("Score the X-test  with Y-test  is : ", svr_model.score(X_test,y_test))
print(" Model Evaluation Decision Tree : accuracy score " , accuracy_score(y_test,y_pred_svr))

# using the model K Neighbors Classifier
# 

In [None]:

K_model = KNeighborsClassifier(n_neighbors = 8)
K_model.fit(X_train, y_train)

y_pred_k = K_model.predict(X_test)

print("Score the X-train with Y-train is : ", K_model.score(X_train,y_train))
print("Score the X-test  with Y-test  is : ", K_model.score(X_test,y_test))
print(" Model Evaluation K Neighbors Classifier : accuracy score " , accuracy_score(y_test,y_pred_k))

# The End 
 Thank for reading my analysis , if you any questions or advice me please write in the comment 

# Vote
If you liked my workvote me