# The following notebook realized a wine quality classification model using k-neighbords algorithm 

### Dependencies 
Download the following library to execute successfully this app, by running the next command :
- pip install sklearn
- pip install pandas
- pip install numpy

### Libraries


In [8]:
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import MinMaxScaler

import pandas as pd
import numpy as np

### Importing Dataset

In [9]:
data_wine = pd.read_csv('Wine_Quality/winequality-white.csv', sep=";" )
data_wine2 = pd.read_csv('Wine_Quality/winequality-red.csv', sep=";" )
data_wine2['quality'].unique()
data_wine2['quality'] = np.where(data_wine2['quality'] >= 6, 1, 0)
data_wine2

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.4,0.700,0.00,1.9,0.076,11.0,34.0,0.99780,3.51,0.56,9.4,0
1,7.8,0.880,0.00,2.6,0.098,25.0,67.0,0.99680,3.20,0.68,9.8,0
2,7.8,0.760,0.04,2.3,0.092,15.0,54.0,0.99700,3.26,0.65,9.8,0
3,11.2,0.280,0.56,1.9,0.075,17.0,60.0,0.99800,3.16,0.58,9.8,1
4,7.4,0.700,0.00,1.9,0.076,11.0,34.0,0.99780,3.51,0.56,9.4,0
...,...,...,...,...,...,...,...,...,...,...,...,...
1594,6.2,0.600,0.08,2.0,0.090,32.0,44.0,0.99490,3.45,0.58,10.5,0
1595,5.9,0.550,0.10,2.2,0.062,39.0,51.0,0.99512,3.52,0.76,11.2,1
1596,6.3,0.510,0.13,2.3,0.076,29.0,40.0,0.99574,3.42,0.75,11.0,1
1597,5.9,0.645,0.12,2.0,0.075,32.0,44.0,0.99547,3.57,0.71,10.2,0


### Training first model

In [10]:
x = data_wine.drop('quality', axis=1)
y = data_wine['quality']
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.1,random_state=3,stratify=y)
print("Predictores Entrenamiento")
print(x_train)
print("Predictores Prueba")
print(x_test)
print("Objetivos Entrenamiento")
print(y_train)
print("Objetivos Prueba")
print(y_test)

Predictores Entrenamiento
      fixed acidity  volatile acidity  citric acid  residual sugar  chlorides  \
2526            6.5              0.18         0.33             1.4      0.029   
1274            8.4              0.35         0.56            13.8      0.048   
2226            7.7              0.28         0.58            12.1      0.046   
1855            8.0              0.22         0.28            14.0      0.053   
4290            5.7              0.26         0.24            17.8      0.059   
...             ...               ...          ...             ...        ...   
3783            6.4              0.27         0.45             8.3      0.050   
4355            6.4              0.31         0.28             2.5      0.039   
1034            7.9              0.64         0.46            10.6      0.244   
2835            6.3              0.25         0.22             3.3      0.048   
4891            5.7              0.21         0.32             0.9      0.038   

 

### Results

In [11]:
knn=KNeighborsClassifier(n_neighbors=1)
knn.fit(x_train,y_train)
predicciones=knn.predict(x_test)
print(confusion_matrix(y_test,predicciones))

[[  1   0   1   0   0   0]
 [  0   4   4   8   0   0]
 [  0   2  96  43   4   1]
 [  0   7  33 133  37  10]
 [  0   0  10  31  44   3]
 [  0   0   0   4   5   9]]


### Training second model

In [12]:
x = data_wine2.copy()
x.drop('quality', axis=1)
scaler = MinMaxScaler(feature_range=(0,1))
x = scaler.fit_transform(x)
x = pd.DataFrame(x)
y = data_wine2['quality']
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.1,random_state=3,stratify=y)
print("Predictores Entrenamiento")
print(x_train)
print("Predictores Prueba")
print(x_test)
print("Objetivos Entrenamiento")
print(y_train)
print("Objetivos Prueba")
print(y_test)

Predictores Entrenamiento
            0         1     2         3         4         5         6   \
848   0.159292  0.356164  0.21  0.061644  0.115192  0.183099  0.088339   
1418  0.283186  0.280822  0.01  0.047945  0.108514  0.028169  0.045936   
933   0.247788  0.335616  0.01  0.075342  0.103506  0.169014  0.113074   
16    0.345133  0.109589  0.56  0.061644  0.133556  0.478873  0.342756   
169   0.256637  0.400685  0.24  0.061644  0.580968  0.197183  0.201413   
...        ...       ...   ...       ...       ...       ...       ...   
1403  0.230088  0.143836  0.33  0.054795  0.081803  0.028169  0.024735   
545   0.398230  0.239726  0.49  0.116438  0.136895  0.521127  0.353357   
187   0.274336  0.400685  0.10  0.116438  0.120200  0.112676  0.070671   
554   0.964602  0.359589  0.49  0.226027  0.138564  0.126761  0.060071   
1561  0.283186  0.328767  0.26  0.075342  0.113523  0.422535  0.441696   

            7         8         9         10   11  
848   0.500734  0.669291  0.19760

### Result

In [13]:
knn=KNeighborsClassifier(n_neighbors=1)
knn.fit(x_train,y_train)
predicciones=knn.predict(x_test)
print(confusion_matrix(y_test,predicciones))

[[74  0]
 [ 0 86]]
