# **Wine Quality Report Using Multi-Layer Perceptron (MLP) Regressor**

####

##### The dataset that was chosen consists of various attiributes related to the components of wine quality.  The dataset was cleaned to consist of attributes which allow for linear regression anlysis.  These include attributes consisting of fixed acidity, citric acid, residual sugar, pH, sulpates, and alcohol content.  Previously we utilized ordinary least squares (OLS) and linear regression with gradient descent to gain insight into our dataset.  We decided to further expanded upon our analysis using various Multi-Layer Perceptron (MLP) models.  


#### Link to Dataset: https://www.kaggle.com/datasets/uciml/red-wine-quality-cortez-et-al-2009

In [92]:
from sklearn.naive_bayes import GaussianNB
import statsmodels.api as sm
from sklearn import metrics
import numpy as np
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import accuracy_score
from sklearn.metrics import r2_score
from sklearn.linear_model import SGDRegressor
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import Perceptron
from sklearn.neural_network import MLPRegressor
from sklearn.linear_model import Perceptron

import numpy as np
import pandas as pd

# Read in data
dataSet=pd.read_csv('red_wine_quality_r.csv')

#Defining input X and output y values
# df_x=dataSet.drop(['alcohol'],axis=1).drop(['r'],axis=1).values
# df_y=dataSet['alcohol'].values
# df_r = dataSet['r'].values
X = dataSet.iloc[:,:-2]
y = dataSet.iloc[:,5]

#add bias term to X
# X = np.c_[np.ones((X.shape[0], 1)), X]
# X[:5]
X = sm.add_constant(X)

# dataSet.head()
# print(df_y)
print("X:\n", X, '\n')
print("y:\n", y)
# print(df_r)
# print(df_x)

X:
       const  fixed acidity  citric acid  residual sugar    pH  sulphates
0       1.0            7.4         0.00             1.9  3.51       0.56
1       1.0            7.8         0.00             2.6  3.20       0.68
2       1.0            7.8         0.04             2.3  3.26       0.65
3       1.0           11.2         0.56             1.9  3.16       0.58
4       1.0            7.4         0.00             1.9  3.51       0.56
...     ...            ...          ...             ...   ...        ...
1594    1.0            6.2         0.08             2.0  3.45       0.58
1595    1.0            5.9         0.10             2.2  3.52       0.76
1596    1.0            6.3         0.13             2.3  3.42       0.75
1597    1.0            5.9         0.12             2.0  3.57       0.71
1598    1.0            6.0         0.47             3.6  3.39       0.66

[1599 rows x 6 columns] 

y:
 0        9.4
1        9.8
2        9.8
3        9.8
4        9.4
        ... 
1594    10.

### Splitting the dataset (80% training data/20% test data)

In [48]:
from sklearn.model_selection import train_test_split

X_train,X_test,y_train,y_test=train_test_split(X, y, test_size=0.2,random_state=42)

### Setting Up Ordinary Least Squares (OLS)

In [49]:
# w = parameter vector
w = np.matmul(np.matmul(np.linalg.inv(np.matmul(np.array(X).transpose(), np.array(X))), X.transpose()), y)
# print(np.array(w))

# get predictions for y for training: matrix of input * w
ols_y_train_predict = np.matmul(np.array(X_train), w)
# print(ols_y_train_predict)

# get predictions for y for testing: matrix of input * w
ols_y_predict = np.matmul(np.array(X_test), w.transpose())
# print(ols_y_predict)

# -------------- Not from Scratch, Ignore -------------------
# # get predictions for y for training
# olsModel = sm.OLS(y_train, X_train)
# olsResults = olsModel.fit()
# ols_y_train_predict = olsResults.predict(X_train)

# # get predictions for y for testing
# olsModel2 = sm.OLS(y_test, X_test)
# olsResults2 = olsModel2.fit()
# ols_y_predict = olsResults2.predict(X_test)

### Setting Up Linear Regression with Gradient Descent

In [50]:
# get predictions for y for training
linReg = SGDRegressor()
linReg.fit(X_train, y_train)
y_train_predict = linReg.predict(X_train)

# get predictions for y for testing
linReg2 = SGDRegressor()
linReg2.fit(X_test, y_test)
y_predict = linReg2.predict(X_test)

### Evaluation Metrics: (MSE, MAE, R2)

In [52]:
print("Evaluation Metrics Report: Ordinary Least Square (OLS)")
print("w = ", np.array(w))
print("Training MSE: ", mean_squared_error(y_train, ols_y_train_predict))
print("Training MAE: ", mean_absolute_error(y_train, y_train_predict))
print("Training R^2: ", r2_score(y_train, y_train_predict))
print("Testing MSE:  ", mean_squared_error(y_test, ols_y_predict))
print("Testing MAE:  ", mean_absolute_error(y_test, ols_y_predict))
print("Testing R^2:  ", r2_score(y_test, ols_y_predict))

print("\nEvaluation Metrics Report: Linear Regression with Gradient Descent")
print("w = ", linReg.coef_)
print("Intercept: ", linReg.intercept_)
print("Training MSE: ", mean_squared_error(y_train, y_train_predict))
print("Training MAE: ", mean_absolute_error(y_train, y_train_predict))
print("Training R^2: ", r2_score(y_train, y_train_predict))
print("w = ", linReg2.coef_)
print("Intercept: ", linReg.intercept_)
print("Testing MSE:  ", mean_squared_error(y_test, y_predict))
print("Testing MAE:  ", mean_absolute_error(y_test, y_predict))
print("Testing R^2:  ", r2_score(y_test, y_predict), '\n')

Evaluation Metrics Report: Ordinary Least Square (OLS)
w =  [ 1.44019954 -0.01995432  1.64208084  0.02530217  2.51294299  0.48462978]
Training MSE:  0.9684926273843617
Training MAE:  0.8180051908752247
Training R^2:  0.10018906335335054
Testing MSE:   1.1283089095740835
Testing MAE:   0.8523132631768681
Testing R^2:   0.09367546052180065

Evaluation Metrics Report: Linear Regression with Gradient Descent
w =  [0.62976828 0.06516849 0.54318199 0.02226263 2.44629949 0.60063795]
Intercept:  [0.63136004]
Training MSE:  0.9963743347413795
Training MAE:  0.8180051908752247
Training R^2:  0.10018906335335054
w =  [0.63965788 0.12620508 0.09632115 0.07878109 2.40430098 0.39839994]
Intercept:  [0.63136004]
Testing MSE:   1.3027226971441634
Testing MAE:   0.9751155781707073
Testing R^2:   -0.04642402318942174 



### Setting Up 3 Different Multi-Layer Perceptrons (MLP)

In [114]:
# Standard Regressor MLP
mlp_1 = MLPRegressor()

# MLP with altered hidden layers and activation function 'tanh'
#mlp_2 = MLPRegressor(max_iter=500, activation = 'relu')
mlp_2 = MLPRegressor(hidden_layer_sizes=(100,100,100), activation= 'tanh', max_iter=100)

#MLP changing hidden layers and activtion function 'logistic'
mlp_3 = MLPRegressor(hidden_layer_sizes=(10,10), activation= 'logistic', max_iter=1000)





### Training the Dataset 

In [115]:
mlp_1.fit(X_train,y_train)
mlp_2.fit(X_train,y_train)
mlp_3.fit(X_train,y_train)



### Evaluation Metrics 

In [116]:
print("Multi-Layer Perceptrton 1")
print("Training MAE: ", metrics.mean_absolute_error(y_train, mlp_1.predict(X_train)))
print("Training MSE: ", metrics.mean_squared_error(y_train, mlp_1.predict(X_train)))
print("Training R^2: ", metrics.r2_score(y_train, mlp_1.predict(X_train)))

print("Testing MAE: ", metrics.mean_absolute_error(y_test, mlp_1.predict(X_test)))
print("Testing MSE: ", metrics.mean_squared_error(y_test, mlp_1.predict(X_test)))
print("Testing R^2: ", metrics.r2_score(y_test, mlp_1.predict(X_test)), "\n" )


print("Multi-Layer Perceptrton 2")
print("Training MAE: ", metrics.mean_absolute_error(y_train, mlp_2.predict(X_train)))
print("Training MSE: ", metrics.mean_squared_error(y_train, mlp_2.predict(X_train)))
print("Training R^2: ", metrics.r2_score(y_train, mlp_2.predict(X_train)))

print("Testing MAE: ", metrics.mean_absolute_error(y_test, mlp_2.predict(X_test)))
print("Testing MSE: ", metrics.mean_squared_error(y_test, mlp_2.predict(X_test)))
print("Testing R^2: ", metrics.r2_score(y_test, mlp_2.predict(X_test)), "\n" )

print("Multi-Layer Perceptrton 3")
print("Training MAE: ", metrics.mean_absolute_error(y_train, mlp_3.predict(X_train)))
print("Training MSE: ", metrics.mean_squared_error(y_train, mlp_3.predict(X_train)))
print("Training R^2: ", metrics.r2_score(y_train, mlp_3.predict(X_train)))

print("Testing MAE: ", metrics.mean_absolute_error(y_test, mlp_3.predict(X_test)))
print("Testing MSE: ", metrics.mean_squared_error(y_test, mlp_3.predict(X_test)))
print("Testing R^2: ", metrics.r2_score(y_test, mlp_3.predict(X_test)))




Multi-Layer Perceptrton 1
Training MAE:  0.7572205975134473
Training MSE:  0.8738560127625402
Training R^2:  0.2108335492781812
Testing MAE:  0.8288288932731758
Testing MSE:  1.0734035449662165
Testing R^2:  0.1377787011067676 

Multi-Layer Perceptrton 2
Training MAE:  0.8017493305114668
Training MSE:  0.9538405735278874
Training R^2:  0.13860067451408076
Testing MAE:  0.8616918430095396
Testing MSE:  1.1249356901896244
Testing R^2:  0.09638503010796373 

Multi-Layer Perceptrton 3
Training MAE:  0.8632034389506875
Training MSE:  1.108667702837059
Training R^2:  -0.0012214178305183232
Testing MAE:  0.9120064490550451
Testing MSE:  1.2486636040044923
Testing R^2:  -0.0030005579675469463
