In [42]:
import pandas as pd
import matplotlib.pyplot as plt
from sqlalchemy import create_engine
from config import DB_USERNAME, DB_PASSWORD, DB_HOST, DB_PORT, DB_NAME

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
import tensorflow as tf
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.regularizers import l2

# import warnings
# warnings.filterwarnings("ignore")

In [43]:
# Create a connection string with the postgreSQL info
database_access = f"postgresql://{DB_USERNAME}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}"


In [44]:
# Establish a connection to the PostgreSQL database 
engine = create_engine(database_access)


In [45]:
# Write a SQL query to select data from the existing view in PostgreSQL 
query = 'SELECT * FROM "Wines"'


In [46]:
# Use Pandas to read the data from the SQL query and convert into a DataFrame 
wines_df = pd.read_sql_query(query, engine)


In [47]:
# Display the Dataframe
wines_df.head()

Unnamed: 0,id,type,fixed_acidity,volatile_acidity,citric_acid,residual_sugar,chlorides,free_sulfur_dioxide,total_sulfur_dioxide,density,ph,sulphates,alcohol_wine,quality,quality_categorisation
0,4799,White,6.2,0.26,0.2,8.0,0.047,35.0,111.0,0.99445,3.11,0.42,10.4,6,1
1,1159,Red,10.2,0.41,0.43,2.2,0.11,11.0,37.0,0.99728,3.16,0.67,10.8,5,0
2,315,Red,7.1,0.35,0.29,2.5,0.096,20.0,53.0,0.9962,3.42,0.65,11.0,6,1
3,31,Red,6.9,0.685,0.0,2.5,0.105,22.0,37.0,0.9966,3.46,0.57,10.6,6,1
4,3010,White,6.7,0.25,0.31,1.35,0.061,30.5,218.0,0.99388,3.16,0.53,9.5,5,0


## LEWIS - Machine Learning Code 

In [48]:
# Clean the dataframe, removing columns that will not be used in the ML model
cleaned_wines_df = wines_df.drop(columns=['id','type', 'quality'])

# Display the clean dataframe
cleaned_wines_df.head()

Unnamed: 0,fixed_acidity,volatile_acidity,citric_acid,residual_sugar,chlorides,free_sulfur_dioxide,total_sulfur_dioxide,density,ph,sulphates,alcohol_wine,quality_categorisation
0,6.2,0.26,0.2,8.0,0.047,35.0,111.0,0.99445,3.11,0.42,10.4,1
1,10.2,0.41,0.43,2.2,0.11,11.0,37.0,0.99728,3.16,0.67,10.8,0
2,7.1,0.35,0.29,2.5,0.096,20.0,53.0,0.9962,3.42,0.65,11.0,1
3,6.9,0.685,0.0,2.5,0.105,22.0,37.0,0.9966,3.46,0.57,10.6,1
4,6.7,0.25,0.31,1.35,0.061,30.5,218.0,0.99388,3.16,0.53,9.5,0


In [49]:
# Split our preprocessed data into our features and target arrays
X = cleaned_wines_df.drop("quality_categorisation", axis=1)
y = cleaned_wines_df["quality_categorisation"]


In [50]:
# Create a StandardScaler instances
scaler = StandardScaler()

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)

# Fit the StandardScaler
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)



In [51]:
#Define deep neural network model
model = tf.keras.models.Sequential()

# First hidden layer
model.add(tf.keras.layers.Dense(units=80, activation="relu", input_dim=X_train_scaled.shape[1], kernel_regularizer=l2(0.001)))
model.add(tf.keras.layers.Dropout(0.3))

# Second hidden layer
model.add(tf.keras.layers.Dense(units=30, activation="relu", kernel_regularizer=l2(0.001)))
model.add(tf.keras.layers.Dropout(0.3))

# Third hidden layer
model.add(tf.keras.layers.Dense(units=10, activation="relu", kernel_regularizer=l2(0.001)))
model.add(tf.keras.layers.Dropout(0.3))

# Output layer
model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


In [52]:
#Train the model
model.fit(X_train, y_train, epochs=100, batch_size=10, validation_split=0.2)


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.callbacks.History at 0x2557dbbbb08>

In [53]:
model_loss, model_accuracy = model.evaluate(X_test, y_test, verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

61/61 - 0s - loss: 0.6643 - accuracy: 0.6205 - 384ms/epoch - 6ms/step
Loss: 0.6643070578575134, Accuracy: 0.620512843132019


## KAI - Correlation Code
##### (please use the [wines_df] DataFrame as it contains the 'quality' column that you will need to calculate the correlations)
