In [201]:
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import tensorflow_datasets as tfds
from tensorflow.keras import regularizers

In [202]:
wine_ds = "https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv"

In [203]:
df = pd.read_csv(wine_ds, delimiter=";")

In [204]:
df.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5


In [205]:
# split the data such as 60% as training set, 20% for each set the validation set and test set
train, validate, test = np.split(df.sample(frac=1, random_state=42), [int(.6*len(df)), int(.8*len(df))])

In [206]:
len(df) == len(train) + len(validate) + len(test)

True

In [207]:
train.sample().shape

(1, 12)

In [208]:
train_input, train_label = train.iloc[:,:-1], train.iloc[:,-1:]
validate_input, validate_label = validate.iloc[:,:-1], validate.iloc[:,-1:]
test_input, test_label = test.iloc[:,:-1], test.iloc[:,-1:]

In [209]:
train_input.shape[1] == 11, train_label.shape[1] == 1

(True, True)

In [210]:
train_input.describe()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol
count,959.0,959.0,959.0,959.0,959.0,959.0,959.0,959.0,959.0,959.0,959.0
mean,8.345255,0.525647,0.270459,2.51147,0.087281,15.786236,46.198123,0.996746,3.307581,0.654661,10.383125
std,1.777409,0.181753,0.192162,1.421391,0.04751,10.42368,32.432648,0.001941,0.157601,0.164917,1.065611
min,4.6,0.12,0.0,0.9,0.012,1.0,6.0,0.99007,2.86,0.33,8.4
25%,7.1,0.39,0.1,1.9,0.07,7.0,22.0,0.99554,3.2,0.55,9.5
50%,7.9,0.52,0.26,2.2,0.079,13.0,37.0,0.99672,3.31,0.62,10.1
75%,9.3,0.63,0.42,2.6,0.09,21.0,61.0,0.99787,3.4,0.73,11.083333
max,15.6,1.58,0.79,15.5,0.467,72.0,278.0,1.00369,4.01,1.98,14.0


## Build Tensorflow Datasets [source](https://medium.com/when-i-work-data/converting-a-pandas-dataframe-into-a-tensorflow-dataset-752f3783c168)

In [211]:
train_tfds = (tf.data.Dataset.from_tensor_slices((
            tf.cast(train_input[train_input.columns].values, tf.float16),
            tf.cast(train_label.values, tf.uint8))))

validate_tfds = (tf.data.Dataset.from_tensor_slices((
            tf.cast(validate_input[validate_input.columns].values, tf.float16),
            tf.cast(validate_label.values, tf.uint8))))

test_tfds = (tf.data.Dataset.from_tensor_slices((
            tf.cast(test_input[test_input.columns].values, tf.float16),
            tf.cast(test_label.values, tf.uint8))))


In [212]:
for features_tensor, target_tensor in test_tfds.take(3):
    print(f'features:{features_tensor} target:{target_tensor}')

features:[ 8.     0.28   0.44   1.8    0.081 28.    68.     0.995  3.36   0.66
 11.2  ] target:[5]
features:[ 7.      0.5     0.14    1.8     0.078  10.     23.      0.9966  3.53
  0.61   10.4   ] target:[5]
features:[ 6.      0.5     0.      1.4     0.057  15.     26.      0.9946  3.36
  0.45    9.5   ] target:[5]


In [213]:
train_median = []
for features_tensor, target_tensor in train_tfds:
    train_median.append(target_tensor[0])

validate_median = []
for features_tensor, target_tensor in validate_tfds:
    validate_median.append(target_tensor[0])

test_median = []
for features_tensor, target_tensor in test_tfds:
    test_median.append(target_tensor[0])


In [214]:
np.median(test_median) == np.median(validate_median) == np.median(train_median)

True

In [215]:
thershold = np.median(test_median) 
def make_binary(target):
    if target >= thershold: 
        return tf.constant(1, dtype=tf.float16)
    return tf.constant(0, dtype=tf.float16)

In [216]:
# credits to group 8 ^.^
batch_size = 50
def preprocessing(ds):
    global batch_size
    print(batch_size)
    ds = ds.map(lambda feature, label: (feature, make_binary(label)))

    ds = ds.cache()

    # to make sure there is no structure within the data, if it was created like (0,0,0,1,1,1,2,2,2,3)
    ds = ds.shuffle(1000)
    # like packaging, I want my network to get many samples at once, comptuationally efficient
    ds = ds.batch(batch_size)
    # prepare many samples
    ds = ds.prefetch(100)
    return ds


In [217]:
train_ds = train_tfds.apply(preprocessing)
validate_ds = validate_tfds.apply(preprocessing)
test_ds = test_tfds.apply(preprocessing)

50
50
50


## Model and Layer of last week's assigment of group 8

In [218]:
# credits to group 8!
class DenseLayer(tf.keras.layers.Layer):
    def __init__(self, units, activation, kernel_regularizer = None):

        super(DenseLayer, self).__init__(kernel_regularizer)
        self.units = units
        self.activation = activation

    def build(self, input_shape):
        self.w = self.add_weight(shape=(input_shape[-1], self.units), initializer='random_normal', trainable=True)
        self.b= self.add_weight(shape=(self.units,), initializer='random_normal', trainable=True)

    def call(self, inputs):
        """Compute forward pass through layer."""
        x = tf.matmul(inputs, self.w) + self.b
        x = self.activation(x)
        return x

In [222]:
class MyModel(tf.keras.Model):
    def __init__(self, loss_function, optimizer, kernel_regularizer= None):
        super(MyModel, self).__init__()

        self.hidden_layer1 = DenseLayer(50, tf.nn.sigmoid, kernel_regularizer= kernel_regularizer)
        self.hidden_layer2 = DenseLayer(50, tf.nn.sigmoid, kernel_regularizer= kernel_regularizer)
        self.output_layer = DenseLayer(1, tf.nn.sigmoid)

        self.loss_function = loss_function
        self.optimizer = optimizer

    
    def call(self, inputs):
        output_hidden_layer1 = self.hidden_layer1(inputs)
        output_hidden_layer2 = self.hidden_layer2(output_hidden_layer1)
        output_network = self.output_layer(output_hidden_layer2)

        return output_network
        
    def train(self, input, target):
        with tf.GradientTape() as tape:
            prediction = self(input)
            loss = self.loss_function(prediction, target)
        gradients = tape.gradient(loss, self.trainable_variables)
        self.optimizer.apply_gradients(zip(gradients, self.trainable_variables))
        same_prediction = target == np.round(prediction, 0)
        accuracy = np.mean(same_prediction)
        return loss, accuracy

    def test(self, test_data):
        """Calculate the mean loss and accuracy of the model over all elements
        of test_data.

        :param test_data: model is evaulated for test_data
        :type test_data: tensorflow 'Dataset'
        :return: mean loss and mean accuracy for all datapoints
        :rtype: tuple of two floats
        """
        # aggregator lists for tracking the loss and accuracy
        test_accuracy_agg = []
        test_loss_agg = []
        # iterate over all input-target pairs in test_data
        for (input, target) in test_data:
            prediction = self(input)
            # print(type(prediction))
            loss = self.loss_function(target, prediction)
            same_prediction = target == np.round(prediction, 0)
            accuracy = np.mean(same_prediction)
            # add loss and accuracy to aggregators
            test_loss_agg.append(loss.numpy())
            test_accuracy_agg.append(np.mean(accuracy))
        # calculate mean loss and accuracy
        test_loss = tf.reduce_mean(test_loss_agg)
        test_accuracy = tf.reduce_mean(test_accuracy_agg)
        return test_loss, test_accuracy

## Training

In [232]:
# Initialize lists for tracking loss and accuracy
train_losses = []
train_accuracies = []
test_losses = []
test_accuracies = []
validate_losses = []
validate_accuracies = []
def initialise():
    global train_losses
    train_losses = []
    global train_accuracies
    train_accuracies = []
    global test_losses 
    test_losses = []
    global test_accuracies
    test_accuracies = []
    global validate_losses 
    validate_losses = []
    global validate_accuracies
    validate_accuracies = []

In [233]:
def train(optimizer, kernel_regularizer = None, sgd = False):    
    tf.keras.backend.clear_session()

    # Setting Hyperparameters
    EPOCHS = 10
    LEARNING_RATE = 0.1

    # Initialize the loss-function
    binary_cross__loss = tf.keras.losses.BinaryCrossentropy()
    # Initialize the optimizer
    if sgd == True:
        optimizer = optimizer(LEARNING_RATE, momentum = 0.9)
    else:
        optimizer = optimizer(LEARNING_RATE)
    # Initialize the model
    model = MyModel(binary_cross__loss, optimizer, kernel_regularizer)

    global train_losses 
    global train_accuracies
    global test_losses
    global test_accuracies
    global validate_losses
    global validate_accuracies

    # Testing models performance before training starts.
    # Test-Dataset
    test_loss, test_accuracy = model.test(test_ds)
    test_losses.append(test_loss)
    test_accuracies.append(test_accuracy)
    # Train-Dataset
    train_loss, train_accuracy = model.test(train_ds)
    train_losses.append(train_loss)
    train_accuracies.append(train_accuracy)


    # Training for EPOCHS.
    for epoch in range(EPOCHS+1):
        print(f'Epoch {str(epoch)} starting with test-accuracy of {np.round(test_accuracies[-1],3)}')
        epoch_loss_agg = []
        epoch_accuracy_agg = []
        for input, target in train_ds:
            train_loss, train_accuracy = model.train(input, target)
            epoch_loss_agg.append(train_loss)
            epoch_accuracy_agg.append(train_accuracy)
            
        # track training loss and accuracy
        train_losses.append(tf.reduce_mean(epoch_loss_agg))
        train_accuracies.append(tf.reduce_mean(epoch_accuracy_agg))
        # track loss and accuracy for test-dataset
        test_loss, test_accuracy = model.test(test_ds)
        test_losses.append(test_loss)
        test_accuracies.append(test_accuracy)

        # track loss and accuracy for validare-dataset
        validate_loss, validate_accuracy = model.test(validate_ds)
        validate_losses.append(validate_loss)
        validate_accuracies.append(validate_accuracy)

In [234]:
def visualize():
    fig, axs = plt.subplots(1, 2)
    fig.set_size_inches(20, 6)

    fig.suptitle('Training Progress for Genomics Bacteria Classification')
    axs[0].plot(train_losses, color='orange', label='train losses')
    axs[0].plot(test_losses, color='green', label='test losses')
    axs[0].plot(validate_losses, color='blue', label='test losses')

    axs[0].set(ylabel='Losses')
    axs[0].legend()
    axs[1].plot(train_accuracies, color='orange', label='train accuracies')
    axs[1].plot(test_accuracies, color='green', label='test accuracies')
    axs[1].plot(validate_accuracies, color='blue', label='test losses')

    axs[1].set(xlabel='Epochs', ylabel='Accuracies')
    axs[1].legend()

In [None]:
def visualize():
    fig, axs = plt.subplots(1, 2)
    fig.set_size_inches(20, 6)

    fig.suptitle('Training Progress for Genomics Bacteria Classification')
    axs[0].plot(train_losses, color='orange', label='train losses')
    axs[0].plot(test_losses, color='green', label='test losses')
    axs[0].plot(validate_losses, color='blue', label='test losses')

    axs[0].set(ylabel='Losses')
    axs[0].legend()
    axs[1].plot(train_accuracies, color='orange', label='train accuracies')
    axs[1].plot(test_accuracies, color='green', label='test accuracies')
    axs[1].plot(validate_accuracies, color='blue', label='test losses')

    axs[1].set(xlabel='Epochs', ylabel='Accuracies')
    axs[1].legend()

In [227]:
train(tf.keras.optimizers.SGD)

Epoch 0 starting with test-accuracy of 0.434
Epoch 1 starting with test-accuracy of 0.439
Epoch 2 starting with test-accuracy of 0.43
Epoch 3 starting with test-accuracy of 0.434
Epoch 4 starting with test-accuracy of 0.434
Epoch 5 starting with test-accuracy of 0.434
Epoch 6 starting with test-accuracy of 0.434
Epoch 7 starting with test-accuracy of 0.447
Epoch 8 starting with test-accuracy of 0.434
Epoch 9 starting with test-accuracy of 0.443
Epoch 10 starting with test-accuracy of 0.43


## SGD with Momentum

In [223]:
train(tf.keras.optimizers.SGD, sgd=True)

Epoch 0 starting with test-accuracy of 0.566


ValueError: too many values to unpack (expected 2)

## Using AdamOptimiser

In [None]:
train(tf.keras.optimizers.Adam)

Epoch 0 starting with test-accuracy of 0.417
Epoch 1 starting with test-accuracy of 0.439
Epoch 2 starting with test-accuracy of 0.447
Epoch 3 starting with test-accuracy of 0.443
Epoch 4 starting with test-accuracy of 0.439
Epoch 5 starting with test-accuracy of 0.451
Epoch 6 starting with test-accuracy of 0.426
Epoch 7 starting with test-accuracy of 0.456
Epoch 8 starting with test-accuracy of 0.451
Epoch 9 starting with test-accuracy of 0.421
Epoch 10 starting with test-accuracy of 0.43


## Using regularizer L1 and Adam

In [None]:
train(tf.keras.optimizers.Adam, kernel_regularizer= regularizers.l1(l1=0.01))

Epoch 0 starting with test-accuracy of 0.426
Epoch 1 starting with test-accuracy of 0.447
Epoch 2 starting with test-accuracy of 0.426
Epoch 3 starting with test-accuracy of 0.443
Epoch 4 starting with test-accuracy of 0.561
Epoch 5 starting with test-accuracy of 0.553
Epoch 6 starting with test-accuracy of 0.583
Epoch 7 starting with test-accuracy of 0.566
Epoch 8 starting with test-accuracy of 0.557
Epoch 9 starting with test-accuracy of 0.549
Epoch 10 starting with test-accuracy of 0.57


## Using regularizer L12 and Adam

In [None]:
train(tf.keras.optimizers.Adam, kernel_regularizer= regularizers.l2(l2=0.01))

Epoch 0 starting with test-accuracy of 0.451
Epoch 1 starting with test-accuracy of 0.557
Epoch 2 starting with test-accuracy of 0.518
Epoch 3 starting with test-accuracy of 0.552
Epoch 4 starting with test-accuracy of 0.543
Epoch 5 starting with test-accuracy of 0.538
Epoch 6 starting with test-accuracy of 0.514
Epoch 7 starting with test-accuracy of 0.531
Epoch 8 starting with test-accuracy of 0.511
Epoch 9 starting with test-accuracy of 0.537
Epoch 10 starting with test-accuracy of 0.477


## Using regularizer L1 and L2 + Adam

In [None]:
train(tf.keras.optimizers.Adam, kernel_regularizer= regularizers.l1_l2(l1=0.01, l2=0.01))

Epoch 0 starting with test-accuracy of 0.447
Epoch 1 starting with test-accuracy of 0.544
Epoch 2 starting with test-accuracy of 0.57
Epoch 3 starting with test-accuracy of 0.574
Epoch 4 starting with test-accuracy of 0.557
Epoch 5 starting with test-accuracy of 0.579
Epoch 6 starting with test-accuracy of 0.553
Epoch 7 starting with test-accuracy of 0.553
Epoch 8 starting with test-accuracy of 0.557
Epoch 9 starting with test-accuracy of 0.553
Epoch 10 starting with test-accuracy of 0.574
