In [None]:
import pandas as pd #Data Manipulation and preprocessing
import numpy as np 
import seaborn as sb #DataViz
import matplotlib.pyplot as plt #DataViz

#Algos
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier

#Tools for modelling
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
from sklearn.model_selection import cross_val_score, train_test_split

#Majic fucntion to show visualisations in the notebook
% matplotlib inline

In [None]:
df1 = pd.read_csv('winequality-red.csv', delimiter = ';') # Bringing in the data

In [None]:
#Creating the heatmap to show correlation between features
dims = (11.7,8.27)
fig,ax = plt.subplots(figsize=dims)
CorMap = sb.heatmap(df1.corr(), annot = True,fmt= '.1f', ax=ax)
plt.title('Correlation')

# The aim here is to classify the quality of the wine

In [None]:
sb.barplot(x = 'quality', y='alcohol', data = df1)

In [None]:
no_class = pd.value_counts(df1['quality']).sort_index()
print(no_class)
plt.bar(no_class.index, no_class, align = 'center')
plt.xticks(no_class.index)

In [None]:
# Turn the integer values into the strings so that they can be mapped to '0' - bad and '1' - good
df1.quality = df1.quality.apply(str) 
df1.quality = df1.quality.map({'3':0, '4':0,'5':0,'6':1,'7':1,'8':1})
df1.head()

In [None]:
#New classes and their number of instances
no_class = pd.value_counts(df1['quality']).sort_index()
print(no_class)
plt.bar(no_class.index, no_class, align = 'center')
plt.xticks(no_class.index)

In [None]:
X = df1[['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar',
       'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density',
       'pH', 'sulphates', 'alcohol']]

y = df1['quality']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state=1) #Train and test split

scaler = StandardScaler(with_mean = False)
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

models = ['LogisticRegression', 'RandomForest','MLP']
scores = []

In [None]:
clf = LogisticRegression()
clf.fit(X_train, y_train)
pred = clf.predict(X_test)
print('Results for: Logistic Regression' + '\n')
print(confusion_matrix(y_test, pred))
print(classification_report(y_test, pred))
Accuracy = accuracy_score(y_test, pred)
print("Accuracy is "+ str(Accuracy)) 
scores.append(Accuracy)

**Logistic Regression** is a classification model that is often used in binary classification problems. 

![title](logistic.png)

In [None]:
clf = RandomForestClassifier()
clf.fit(X_train, y_train)
pred = clf.predict(X_test)
print('Results for: RandomForest' + '\n')
print(confusion_matrix(y_test, pred))
print(classification_report(y_test, pred))
Accuracy = accuracy_score(y_test, pred)
print("Accuracy is "+ str(Accuracy)) 
scores.append(Accuracy)

**Random Forests** are known as ensemble learning methods that use multiple classifiers to achieve a higher predictive accuracy that using single algorithms separately.
![title](randomforest.png)


In [None]:
dn = {'features':X.columns, 'score':clf.feature_importances_}
df = pd.DataFrame.from_dict(data=dn).sort_values(by='score', ascending=False)
plot = sb.barplot(y='score', x='features', data=df)
plot.set(ylabel='Score', xlabel='Features', 
         title='Feature Importance of Random Forest Classifier')
for item in plot.get_xticklabels():
    item.set_rotation(90)

In [None]:
clf = MLPClassifier()
clf.fit(X_train, y_train)
pred = clf.predict(X_test)
print('Results for: MLP' + '\n')
print(confusion_matrix(y_test, pred))
print(classification_report(y_test, pred))
Accuracy = accuracy_score(y_test, pred)
print("Accuracy is "+ str(Accuracy)) 
scores.append(Accuracy)

**MLP (Multiple Layer Perceptron)** is a form of a neural network that consists of more than 3 layers. You will have an Input Layer, an output layer and a number of hidden layers.

![title](nn2.png)

In [None]:
table = pd.DataFrame({'Accuracy':scores, 'Model':models})

table = table[['Model','Accuracy']].sort_values('Accuracy',ascending=False)
table.head()