In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

from warnings import filterwarnings
filterwarnings(action='ignore')

In [None]:
wine = pd.read_csv("winequality-red.csv")
print("Successfully Imported Data!")
wine.head()

In [None]:
print(wine.shape)

In [None]:
wine.describe(include='all')

In [None]:
print(wine.isna().sum())

In [None]:
wine.corr()

In [None]:
wine.groupby('quality').mean()

In [None]:
sns.countplot(wine['quality'])
plt.show()

In [None]:
sns.countplot(wine['fixed acidity'])
plt.show()

In [None]:
sns.kdeplot(wine.query('quality > 2').quality)

In [None]:
sns.distplot(wine['alcohol'])

In [None]:
wine.plot(kind ='box',subplots = True, layout =(4,4),sharex = False)

In [None]:
wine.plot(kind ='density',subplots = True, layout =(4,4),sharex = False)

In [None]:
wine.hist(figsize=(10,10),bins=50)
plt.show()

In [None]:
corr = wine.corr()
sns.heatmap(corr,annot=True)

In [None]:
sns.pairplot(wine)

In [None]:
sns.violinplot(x='quality', y='alcohol', data=wine)

In [None]:
wine['goodquality'] = [1 if x >= 7 else 0 for x in wine['quality']]# Separate feature variables and target variable
X = wine.drop(['quality','goodquality'], axis = 1)
Y = wine['goodquality']

In [None]:
wine['goodquality'].value_counts()

In [None]:
X

In [None]:
print(Y)

In [None]:
from sklearn.linear_model import LogisticRegression
model = LogisticRegression()

from sklearn.ensemble import ExtraTreesClassifier
classifiern = ExtraTreesClassifier()
classifiern.fit(X,Y)
score = classifiern.feature_importances_
print(score)

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X,Y,test_size=0.3,random_state=7)

In [None]:
from sklearn.linear_model import LogisticRegression
model = LogisticRegression()
model.fit(X_train,Y_train)
Y_pred = model.predict(X_test)

from sklearn.metrics import accuracy_score,confusion_matrix
print("Accuracy Score:",accuracy_score(Y_test,Y_pred))

In [None]:
confusion_mat = confusion_matrix(Y_test,Y_pred)
print(confusion_mat)

In [None]:
from sklearn.neighbors import KNeighborsClassifier
model = KNeighborsClassifier(n_neighbors=3)
model.fit(X_train,Y_train)
y_pred = model.predict(X_test)

from sklearn.metrics import accuracy_score
print("Accuracy Score:",accuracy_score(Y_test,y_pred))

In [None]:
from sklearn.svm import SVC
model = SVC()
model.fit(X_train,Y_train)
pred_y = model.predict(X_test)

from sklearn.metrics import accuracy_score
print("Accuracy Score:",accuracy_score(Y_test,pred_y))

In [None]:
from sklearn.tree import DecisionTreeClassifier
model = DecisionTreeClassifier(criterion='entropy',random_state=7)
model.fit(X_train,Y_train)
y_pred = model.predict(X_test)

from sklearn.metrics import accuracy_score
print("Accuracy Score:",accuracy_score(Y_test,y_pred))

In [None]:
from sklearn.naive_bayes import GaussianNB
model3 = GaussianNB()
model3.fit(X_train,Y_train)
y_pred3 = model3.predict(X_test)

from sklearn.metrics import accuracy_score
print("Accuracy Score:",accuracy_score(Y_test,y_pred3))

In [None]:
from sklearn.ensemble import RandomForestClassifier
model2 = RandomForestClassifier(random_state=1)
model2.fit(X_train, Y_train)
y_pred2 = model2.predict(X_test)

from sklearn.metrics import accuracy_score
print("Accuracy Score:",accuracy_score(Y_test,y_pred2))

In [None]:
results = pd.DataFrame({
    'Model': ['Logistic Regression','KNN', 'SVC','Decision Tree' ,'GaussianNB','Random Forest','Xgboost'],
    'Score': [0.870,0.872,0.868,0.864,0.833,0.893,0.879]})

result_df = results.sort_values(by='Score', ascending=False)
result_df = result_df.set_index('Score')
result_df