# Academic Project - Deep Learning

It is recommended to run the activity in Google Colab.

In [1]:
#We import the libraries we will need:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

The first activity consists of predicting the wine quality from the [wine quality dataset](https://archive.ics.uci.edu/dataset/186/wine+quality).

In [2]:
# We download the data with pandas.
df_red = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv', sep=';')
df_white = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-white.csv', sep=';')

df = pd.concat([df_red, df_white])

df.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5


In [3]:
feature_names = ['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar', 'chlorides','free sulfur dioxide', 'total sulfur dioxide',
                 'density', 'pH', 'sulphates', 'alcohol']


# Separate features and target

y = df.pop('quality').values
X = df.copy().values

In [4]:
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)

print('x_train, y_train shapes:', x_train.shape, y_train.shape)
print('x_test, y_test shapes:', x_test.shape, y_test.shape)
print('Some qualities: ', y_train[:5])

x_train, y_train shapes: (4872, 11) (4872,)
x_test, y_test shapes: (1625, 11) (1625,)
Some qualities:  [6 7 8 5 6]


In [5]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(x_train)
X_test_scaled = scaler.transform(x_test)

**Question 1:** Create a sequential model with 4 hidden layers, each containing more than 60 neurons, without regularization, and obtain the results.

In [6]:
#We use 64 neurons as it is a value considered efficient for neural network practices, but other values could be used.
#We use 'tanh' for the hidden layers and 'linear' for the output layer. 'Relu' could be used, but the test loss is higher (0.6313).
#We observe the results with a model summary.

model = tf.keras.models.Sequential()

input_dim=11
output_dim=1
model.add(layers.Dense(64, input_shape=(input_dim,), activation='tanh')) #1st layer
model.add(layers.Dense(64, activation='tanh')) #2nd layer
model.add(layers.Dense(64, activation='tanh')) #3rd layer
model.add(layers.Dense(64, activation='tanh')) #4th layer
model.add(layers.Dense(output_dim, activation='linear'))

model.summary()


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [7]:
# Model compilation.
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['accuracy'])

In [8]:
model.fit(x_train,
          y_train,
          epochs=200,
          batch_size=32,
          validation_split=0.2,
          verbose=1)

Epoch 1/200
[1m122/122[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 10ms/step - accuracy: 0.0000e+00 - loss: 5.1933 - val_accuracy: 0.0000e+00 - val_loss: 0.7530
Epoch 2/200
[1m122/122[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - accuracy: 0.0000e+00 - loss: 0.6953 - val_accuracy: 0.0000e+00 - val_loss: 0.7243
Epoch 3/200
[1m122/122[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.0000e+00 - loss: 0.7279 - val_accuracy: 0.0000e+00 - val_loss: 0.6907
Epoch 4/200
[1m122/122[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.0000e+00 - loss: 0.6673 - val_accuracy: 0.0000e+00 - val_loss: 0.6727
Epoch 5/200
[1m122/122[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.0000e+00 - loss: 0.6539 - val_accuracy: 0.0000e+00 - val_loss: 0.7185
Epoch 6/200
[1m122/122[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - accuracy: 0.0000e+00 - loss: 0.6550 - val_accuracy: 0.0

<keras.src.callbacks.history.History at 0x781604d2e550>

In [9]:
results = model.evaluate(x_test, y_test, verbose=1)
print('Test Loss: {}'.format(results))

[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.0000e+00 - loss: 0.5679
Test Loss: [0.5750027894973755, 0.0]


**Question 2**: Use the same model from the previous question but add at least two different regularization techniques. It is not necessary to reduce the test loss.

In [10]:
model = tf.keras.models.Sequential()

#1st regularization technique: Dropout applied to each layer with a rate of 0.5

input_dim=11
output_dim=1
model.add(layers.Dense(64, input_shape=(input_dim,), activation='tanh')) #1st layer
model.add(layers.Dropout(0.5))
model.add(layers.Dense(64, activation='tanh')) #2nd layer
model.add(layers.Dropout(0.5))
model.add(layers.Dense(64, activation='tanh')) #3rd layer
model.add(layers.Dropout(0.5))
model.add(layers.Dense(64, activation='tanh')) #4th layer
model.add(layers.Dropout(0.5))
model.add(layers.Dense(output_dim, activation='linear'))

In [11]:
# Model compilation.
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['accuracy'])

In [12]:
batch_size=10 #2nd regularization technique: Reducing the batch_size to 10.

In [13]:
model.fit(x_train,
          y_train,
          epochs=200,
          batch_size=batch_size,
          validation_split=0.2,
          verbose=1)

Epoch 1/200
[1m390/390[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4ms/step - accuracy: 0.0000e+00 - loss: 6.3159 - val_accuracy: 0.0000e+00 - val_loss: 1.4795
Epoch 2/200
[1m390/390[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.0000e+00 - loss: 1.8746 - val_accuracy: 0.0000e+00 - val_loss: 1.1272
Epoch 3/200
[1m390/390[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.0000e+00 - loss: 1.5095 - val_accuracy: 0.0000e+00 - val_loss: 0.8843
Epoch 4/200
[1m390/390[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.0000e+00 - loss: 1.3728 - val_accuracy: 0.0000e+00 - val_loss: 0.7834
Epoch 5/200
[1m390/390[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.0000e+00 - loss: 1.3073 - val_accuracy: 0.0000e+00 - val_loss: 0.7645
Epoch 6/200
[1m390/390[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.0000e+00 - loss: 1.2143 - val_accuracy: 0.00

<keras.src.callbacks.history.History at 0x7815ede77910>

In [14]:
results = model.evaluate(x_test, y_test, verbose=1)
print('Test Loss: {}'.format(results))

[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.0000e+00 - loss: 0.5821
Test Loss: [0.5909020304679871, 0.0]


**Question 3:** Use the same model from the previous question but add an early stopping callback. It is not necessary to reduce the test loss.

In [15]:
model = tf.keras.models.Sequential()

input_dim=11
output_dim=1
model.add(layers.Dense(64, input_shape=(input_dim,), activation='tanh')) #1st layer
model.add(layers.Dropout(0.5))
model.add(layers.Dense(64, activation='tanh')) #2nd layer
model.add(layers.Dropout(0.5))
model.add(layers.Dense(64, activation='tanh')) #3rd layer
model.add(layers.Dropout(0.5))
model.add(layers.Dense(64, activation='tanh')) #4th layer
model.add(layers.Dropout(0.5))
model.add(layers.Dense(output_dim, activation='linear'))

In [16]:
# Model compilation.
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['accuracy'])

In [17]:
#To define the early stopping callback, we use a callback with a patience of 5.

es_callback = keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=5,
    verbose=1)

model.fit(x_train,
          y_train,
          epochs=200,
          batch_size=32,
          validation_split=0.2,
          verbose=1,
          callbacks=[es_callback])

Epoch 1/200
[1m122/122[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - accuracy: 0.0000e+00 - loss: 13.8379 - val_accuracy: 0.0000e+00 - val_loss: 3.4107
Epoch 2/200
[1m122/122[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.0000e+00 - loss: 2.3667 - val_accuracy: 0.0000e+00 - val_loss: 2.0807
Epoch 3/200
[1m122/122[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.0000e+00 - loss: 2.1185 - val_accuracy: 0.0000e+00 - val_loss: 1.8841
Epoch 4/200
[1m122/122[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.0000e+00 - loss: 1.7673 - val_accuracy: 0.0000e+00 - val_loss: 1.4225
Epoch 5/200
[1m122/122[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.0000e+00 - loss: 1.7435 - val_accuracy: 0.0000e+00 - val_loss: 1.1723
Epoch 6/200
[1m122/122[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.0000e+00 - loss: 1.5733 - val_accuracy: 0.0

<keras.src.callbacks.history.History at 0x7815ed3f4590>

In [18]:
results = model.evaluate(x_test, y_test, verbose=1)
print('Test Loss: {}'.format(results))

[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.0000e+00 - loss: 0.7716
Test Loss: [0.7888813614845276, 0.0]
