## **_Importing Libraries_**

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
dataset = pd.read_csv('/content/datasets_4458_8204_winequality-red.csv')

In [None]:
dataset['quality'].unique()

array([5, 6, 7, 4, 8, 3])

## **_Importing dataset_**

In [None]:
x = dataset.iloc[: ,1:-1].values
y = dataset.iloc[:, -1].values

## **_Split to Train and Test Data_**

In [None]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.20)

## **_Standardising Data_**

In [None]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.transform(x_test)

## **_Train on Different Algorithms_**

We totally trained on 5 algorithms:

* LogisticRegression class we have used linear_model library.
* KNeighborsClassifier class we have used neighbors library.
* DecisionTreeClassifier class we have used tree library.
* RandomForestClassifier class we have used ensemble library.
* SVC class we have used svm library.

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC

In [None]:
l_cla = LogisticRegression()
k_cla = KNeighborsClassifier()
d_cla = DecisionTreeClassifier()
r_cla = RandomForestClassifier()
s_cla = SVC(kernel = 'linear')
ks_cla = SVC(kernel = 'rbf')

In [None]:
l_cla.fit(x_train, y_train)
k_cla.fit(x_train, y_train)
d_cla.fit(x_train, y_train)
r_cla.fit(x_train, y_train)
s_cla.fit(x_train, y_train)
ks_cla.fit(x_train, y_train)

SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='rbf',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)

In [None]:
l_pred = l_cla.predict(x_test)
k_pred = k_cla.predict(x_test)
d_pred = d_cla.predict(x_test)
r_pred = r_cla.predict(x_test)
s_pred = s_cla.predict(x_test)
ks_pred = ks_cla.predict(x_test)

## **_Confusion matrix_**

In [None]:
from sklearn.metrics import confusion_matrix

In [None]:
l_c = confusion_matrix(y_test, l_pred)
k_c = confusion_matrix(y_test, k_pred)
d_c = confusion_matrix(y_test, d_pred)
r_c = confusion_matrix(y_test, r_pred)
s_c = confusion_matrix(y_test, s_pred)
ks_c = confusion_matrix(y_test, ks_pred)

In [None]:
l_c

array([[ 1,  0,  1,  0,  0,  0],
       [ 0,  0,  5,  2,  1,  0],
       [ 0,  0, 95, 36,  0,  0],
       [ 0,  0, 46, 76, 10,  0],
       [ 0,  0,  1, 30, 13,  0],
       [ 0,  0,  0,  2,  1,  0]])

In [None]:
k_c

array([[ 0,  1,  1,  0,  0,  0],
       [ 0,  0,  3,  4,  1,  0],
       [ 0,  1, 90, 39,  1,  0],
       [ 0,  1, 45, 74, 12,  0],
       [ 0,  0,  8, 20, 16,  0],
       [ 0,  0,  1,  2,  0,  0]])

In [None]:
d_c

array([[ 0,  2,  0,  0,  0,  0],
       [ 0,  1,  4,  2,  1,  0],
       [ 2,  4, 94, 27,  4,  0],
       [ 0,  3, 39, 75, 12,  3],
       [ 0,  0,  5, 14, 24,  1],
       [ 0,  0,  0,  2,  1,  0]])

In [None]:
r_c

array([[  0,   1,   1,   0,   0,   0],
       [  0,   0,   5,   3,   0,   0],
       [  0,   0, 105,  24,   2,   0],
       [  0,   0,  29,  93,  10,   0],
       [  0,   0,   1,  22,  21,   0],
       [  0,   0,   0,   1,   2,   0]])

In [None]:
s_c

array([[ 0,  0,  2,  0,  0,  0],
       [ 0,  0,  5,  3,  0,  0],
       [ 0,  0, 98, 33,  0,  0],
       [ 0,  0, 47, 85,  0,  0],
       [ 0,  0,  2, 42,  0,  0],
       [ 0,  0,  0,  3,  0,  0]])

In [None]:
ks_c

array([[  0,   0,   2,   0,   0,   0],
       [  0,   0,   3,   4,   1,   0],
       [  0,   0, 100,  30,   1,   0],
       [  0,   0,  50,  78,   4,   0],
       [  0,   0,   4,  25,  15,   0],
       [  0,   0,   0,   2,   1,   0]])

## **_Result_**

In [None]:
from sklearn.metrics import accuracy_score

In [None]:
l_a = accuracy_score(y_test, l_pred)
k_a = accuracy_score(y_test, k_pred)
d_a = accuracy_score(y_test, d_pred)
r_a = accuracy_score(y_test, r_pred)
s_a = accuracy_score(y_test, s_pred)
ks_a = accuracy_score(y_test, ks_pred)

In [None]:
print('Logistic Regression: ' + str(k_a) + '\nDecision Tree: ' + str(d_a) + '\nRandom Forest: ' + str(r_a) + '\nLinear SVC: ' + str(s_a) + '\nKernel SVC: ' + str(l_a))

Logistic Regression: 0.5625
Decision Tree: 0.60625
Random Forest: 0.684375
Linear SVC: 0.571875
Kernel SVC: 0.578125


## **_Conclusion_**

used Classification:

* Since RandomForest is giving more efficiency than other algorithms.
* We can consider RandomForest as the best algorithm for this dataset.