**Simple Test Data Set**

Prepare the data. To this end, we create a simple test set with 3 classes. The feature vector is 4-dimensional.

class 1 will have features [1, 0, 0, 0] 
class 2 will have features [0, 1, 0, 0]
class 3 will have features [0, 0, 1, 0]

Optionally, noise can be added.

Students do not need to change anything here.

In [1]:
#@title utility function for creating test data
import numpy as np

# The function creates a simple test data set
# parameters:
# class_offsett: the offsett that the respective class feature should have
# num_samples_per_class: the number of samples generated for each class. The total number of samples will be three times as much
# add_noise: flag for adding noise to the data
# normalize: flag for normalizing the data in the range between 0 and 1 (recommended)
# add_additional_dimension: flag for making the data compatible with processing of the MNIST data set. When using this data in Tensorflow, the flag needs to be set to False
def _create_samples(class_offsett = 1, num_samples_per_class = 1000, add_noise = True, normalize = True, add_additional_dimension = True):
    # create matrices with 4 features to discern 3 classes
    # class 1 will have features [class_offsett+noise, 0+noise, 0+noise, 0+noise]
    # class 2 will have features [0+noise, class_offsett+noise, 0+noise, 0+noise
    # class 3 will have features [0+noise, 0+noise, class_offsett+noise, 0+noise]
    sigma = 0.5
    zero_offset = 0.01 # in order to avoid overflows when using divide
    if add_noise == True:
        class1_features = np.array(
            [class_offsett + sigma * np.random.randn(), zero_offset + sigma * np.random.randn(), zero_offset + sigma * np.random.randn(), zero_offset + sigma * np.random.randn()])
        class2_features = np.array(
            [zero_offset + sigma * np.random.randn(), class_offsett + sigma * np.random.randn(), zero_offset + sigma * np.random.randn(), zero_offset + sigma * np.random.randn()])
        class3_features = np.array(
            [zero_offset + sigma * np.random.randn(), zero_offset + sigma * np.random.randn(), class_offsett + sigma * np.random.randn(), zero_offset + sigma * np.random.randn()])

        for i in range(1, num_samples_per_class):
            class1_features = np.vstack([class1_features, np.array(
                [class_offsett + sigma * np.random.randn(), zero_offset + sigma * np.random.randn(), zero_offset + sigma * np.random.randn(), zero_offset + sigma * np.random.randn()])])
            class2_features = np.vstack([class2_features, np.array(
                [zero_offset + sigma * np.random.randn(), class_offsett + sigma * np.random.randn(), zero_offset + sigma * np.random.randn(), zero_offset + sigma * np.random.randn()])])
            class3_features = np.vstack([class3_features, np.array(
                [zero_offset + sigma * np.random.randn(), zero_offset + sigma * np.random.randn(), class_offsett + sigma * np.random.randn(), zero_offset + sigma * np.random.randn()])])
    else:
        class1_features = np.array(
            [class_offsett, zero_offset, zero_offset, zero_offset])
        class2_features = np.array(
            [zero_offset, class_offsett, zero_offset, zero_offset])
        class3_features = np.array(
            [zero_offset, zero_offset, class_offsett, zero_offset])

        for i in range(1, num_samples_per_class):
            class1_features = np.vstack([class1_features, np.array(
                [class_offsett, zero_offset, zero_offset, zero_offset])])
            class2_features = np.vstack([class2_features, np.array(
                [zero_offset, class_offsett, zero_offset, zero_offset])])
            class3_features = np.vstack([class3_features, np.array(
                [zero_offset, zero_offset, class_offsett, zero_offset])])

    features = np.vstack([class1_features, class2_features, class3_features])


    if normalize == True:
        fm = np.mean(features)
        fsd = np.std(features)
        for i in range (0, features.shape[0]-1):
            # min-max normalization: y = (x - min) / (max - min)
            features[i, :]  = (features[i, :] - np.amin(features[i, :])) / (np.amax(features[i, :]) - np.amin(features[i, :]))
            # standardization: y = (x-mean)/standard_deviation
           # features[i, :] = (features[i, :] - fm) / fsd



            #print(features[i, :])


    # add another dimension to be compatible with the format of flat_images
    if add_additional_dimension == True:
        features = features[:, :, np.newaxis]


    labels_class1 = np.repeat(0, num_samples_per_class)
    labels_class2 = np.repeat(1, num_samples_per_class)
    labels_class3 = np.repeat(2, num_samples_per_class)

    labels = np.concatenate((labels_class1, labels_class2, labels_class3))

    return features, labels
print("Done")

Done


**MNIST Data Set**

The MNIST data set is loaded. Students do not need to change anything here.

In [2]:
#@title utility functions for loading MNIST data
import numpy as np
import struct

def read_labels(file_name, count):
    # open file
    with open(file_name, mode='rb') as file:
        file_content = file.read()

    # read number to test and amount of labels containted in this file
    [magic_number, labels_count] = struct.unpack('>ii', file_content[0:8])

    # if the magic_number does not match, something went wrong
    if magic_number != 0x801:
        print("Magic Number does not match. (0x{:02X})".format(magic_number))
        return

    # create format string
    # this string defines the layout of file_content
    # '>' = big endian
    # 'B' = unsigned byte
    labels_format_string = '>{}B'.format(labels_count)

    # itnerpret file_content as specified by label_format_string
    labels = np.array(struct.unpack(labels_format_string, file_content[8:]))
    print("Labels loaded.")
    return labels[0:count]


def read_images(file_name, count):
    # open file
    with open(file_name, mode='rb') as file:
        file_content = file.read()

    # load number to test against, number of images, row count and column count
    [magic_number, images_count, rows,
     cols] = struct.unpack('>iiii', file_content[0:16])

    # if magic_number does not match, something went wrong
    if magic_number != 0x0803:
        print("Magic Number does not match. (0x{:02X})".format(magic_number))
        return

    if not count is None:
        images_count = count
    # create image array
    images = np.zeros((images_count, rows, cols), np.float64)
    # define format string of one image row (image in mnist files is presented row wise)
    row_format_string = '>{}B'.format(cols)
    # define start variable of actual content
    start = 16

    for image in range(0, images_count):
        for row in range(0, rows):
            end = start + cols  # define end of image row
            # load image row
            images[image, row] = np.array(
                struct.unpack(row_format_string, file_content[start:end])) / 255

            start = end  # set start to next image

        print("Images loaded: {}".format(image + 1), end='\r')

    print()
    return images

def _flat_images(images, normalize = True):
    flatted = np.zeros((np.size(images, 0), np.size(images, 1) * np.size(images, 2), 1))
    for idx in range(np.size(images, 0)):
        for row in range(np.size(images, 1)):
            for col in range(np.size(images, 2)):
                flatted[idx][row * np.size(images, 1) + col][0] = images[idx][row][col]

    return flatted
print("Done")

Done


In [3]:
num_mnist_train = 5000
num_mnist_test = 1000
print("Reading training labels")
labels_training  = read_labels('res/training/train-labels-idx1-ubyte', num_mnist_train)
print("Reading training images")
images_training  = read_images('res/training/train-images-idx3-ubyte', num_mnist_train)

print("Reading test labels")
labels_test = read_labels('res/testing/t10k-labels-idx1-ubyte', num_mnist_test)
print("Reading test images")
images_test = read_images('res/testing/t10k-images-idx3-ubyte', num_mnist_test)

print("Flattening training images")
flat_images_training = _flat_images(images_training)

print("Flattening test images")
flat_images_test = _flat_images(images_test)

print("Done")


Reading training labels
Labels loaded.
Reading training images


Images loaded: 1
Images loaded: 2
Images loaded: 3
Images loaded: 4
Images loaded: 5
Images loaded: 6
Images loaded: 7
Images loaded: 8
Images loaded: 9
Images loaded: 10
Images loaded: 11
Images loaded: 12
Images loaded: 13
Images loaded: 14
Images loaded: 15
Images loaded: 16
Images loaded: 17
Images loaded: 18
Images loaded: 19
Images loaded: 20
Images loaded: 21
Images loaded: 22
Images loaded: 23
Images loaded: 24
Images loaded: 25
Images loaded: 26
Images loaded: 27
Images loaded: 28
Images loaded: 29
Images loaded: 30
Images loaded: 31
Images loaded: 32
Images loaded: 33
Images loaded: 34
Images loaded: 35
Images loaded: 36
Images loaded: 37
Images loaded: 38
Images loaded: 39
Images loaded: 40
Images loaded: 41
Images loaded: 42
Images loaded: 43
Images loaded: 44
Images loaded: 45
Images loaded: 46
Images loaded: 47
Images loaded: 48
Images loaded: 49
Images loaded: 50
Images loaded: 51
Images loaded: 52
Images loaded: 53
Im

Images loaded: 1172
Images loaded: 1173
Images loaded: 1174
Images loaded: 1175
Images loaded: 1176
Images loaded: 1177
Images loaded: 1178
Images loaded: 1179
Images loaded: 1180
Images loaded: 1181
Images loaded: 1182
Images loaded: 1183
Images loaded: 1184
Images loaded: 1185
Images loaded: 1186
Images loaded: 1187
Images loaded: 1188
Images loaded: 1189
Images loaded: 1190
Images loaded: 1191
Images loaded: 1192
Images loaded: 1193
Images loaded: 1194
Images loaded: 1195
Images loaded: 1196
Images loaded: 1197
Images loaded: 1198
Images loaded: 1199
Images loaded: 1200
Images loaded: 1201
Images loaded: 1202
Images loaded: 1203
Images loaded: 1204
Images loaded: 1205
Images loaded: 1206
Images loaded: 1207
Images loaded: 1208
Images loaded: 1209
Images loaded: 1210
Images loaded: 1211
Images loaded: 1212
Images loaded: 1213
Images loaded: 1214
Images loaded: 1215
Images loaded: 1216
Images loaded: 1217
Images loaded: 1218
Images loaded

Images loaded: 1810
Images loaded: 1811
Images loaded: 1812
Images loaded: 1813
Images loaded: 1814
Images loaded: 1815
Images loaded: 1816
Images loaded: 1817
Images loaded: 1818
Images loaded: 1819
Images loaded: 1820
Images loaded: 1821
Images loaded: 1822
Images loaded: 1823
Images loaded: 1824
Images loaded: 1825
Images loaded: 1826
Images loaded: 1827
Images loaded: 1828
Images loaded: 1829
Images loaded: 1830
Images loaded: 1831
Images loaded: 1832
Images loaded: 1833
Images loaded: 1834
Images loaded: 1835
Images loaded: 1836
Images loaded: 1837
Images loaded: 1838
Images loaded: 1839
Images loaded: 1840
Images loaded: 1841
Images loaded: 1842
Images loaded: 1843
Images loaded: 1844
Images loaded: 1845
Images loaded: 1846
Images loaded: 1847
Images loaded: 1848
Images loaded: 1849
Images loaded: 1850
Images loaded: 1851
Images loaded: 1852
Images loaded: 1853
Images loaded: 1854
Images loaded: 1855
Images loaded: 1856
Images loaded

Images loaded: 2443
Images loaded: 2444
Images loaded: 2445
Images loaded: 2446
Images loaded: 2447
Images loaded: 2448
Images loaded: 2449
Images loaded: 2450
Images loaded: 2451
Images loaded: 2452
Images loaded: 2453
Images loaded: 2454
Images loaded: 2455
Images loaded: 2456
Images loaded: 2457
Images loaded: 2458
Images loaded: 2459
Images loaded: 2460
Images loaded: 2461
Images loaded: 2462
Images loaded: 2463
Images loaded: 2464
Images loaded: 2465
Images loaded: 2466
Images loaded: 2467
Images loaded: 2468
Images loaded: 2469
Images loaded: 2470
Images loaded: 2471
Images loaded: 2472
Images loaded: 2473
Images loaded: 2474
Images loaded: 2475
Images loaded: 2476
Images loaded: 2477
Images loaded: 2478
Images loaded: 2479
Images loaded: 2480
Images loaded: 2481
Images loaded: 2482
Images loaded: 2483
Images loaded: 2484
Images loaded: 2485
Images loaded: 2486
Images loaded: 2487
Images loaded: 2488
Images loaded: 2489
Images loaded

Images loaded: 3077
Images loaded: 3078
Images loaded: 3079
Images loaded: 3080
Images loaded: 3081
Images loaded: 3082
Images loaded: 3083
Images loaded: 3084
Images loaded: 3085
Images loaded: 3086
Images loaded: 3087
Images loaded: 3088
Images loaded: 3089
Images loaded: 3090
Images loaded: 3091
Images loaded: 3092
Images loaded: 3093
Images loaded: 3094
Images loaded: 3095
Images loaded: 3096
Images loaded: 3097
Images loaded: 3098
Images loaded: 3099
Images loaded: 3100
Images loaded: 3101
Images loaded: 3102
Images loaded: 3103
Images loaded: 3104
Images loaded: 3105
Images loaded: 3106
Images loaded: 3107
Images loaded: 3108
Images loaded: 3109
Images loaded: 3110
Images loaded: 3111
Images loaded: 3112
Images loaded: 3113
Images loaded: 3114
Images loaded: 3115
Images loaded: 3116
Images loaded: 3117
Images loaded: 3118
Images loaded: 3119
Images loaded: 3120
Images loaded: 3121
Images loaded: 3122
Images loaded: 3123
Images loaded

Images loaded: 3700
Images loaded: 3701
Images loaded: 3702
Images loaded: 3703
Images loaded: 3704
Images loaded: 3705
Images loaded: 3706
Images loaded: 3707
Images loaded: 3708
Images loaded: 3709
Images loaded: 3710
Images loaded: 3711
Images loaded: 3712
Images loaded: 3713
Images loaded: 3714
Images loaded: 3715
Images loaded: 3716
Images loaded: 3717
Images loaded: 3718
Images loaded: 3719
Images loaded: 3720
Images loaded: 3721
Images loaded: 3722
Images loaded: 3723
Images loaded: 3724
Images loaded: 3725
Images loaded: 3726
Images loaded: 3727
Images loaded: 3728
Images loaded: 3729
Images loaded: 3730
Images loaded: 3731
Images loaded: 3732
Images loaded: 3733
Images loaded: 3734
Images loaded: 3735
Images loaded: 3736
Images loaded: 3737
Images loaded: 3738
Images loaded: 3739
Images loaded: 3740
Images loaded: 3741
Images loaded: 3742
Images loaded: 3743
Images loaded: 3744
Images loaded: 3745
Images loaded: 3746
Images loaded

Images loaded: 4327
Images loaded: 4328
Images loaded: 4329
Images loaded: 4330
Images loaded: 4331
Images loaded: 4332
Images loaded: 4333
Images loaded: 4334
Images loaded: 4335
Images loaded: 4336
Images loaded: 4337
Images loaded: 4338
Images loaded: 4339
Images loaded: 4340
Images loaded: 4341
Images loaded: 4342
Images loaded: 4343
Images loaded: 4344
Images loaded: 4345
Images loaded: 4346
Images loaded: 4347
Images loaded: 4348
Images loaded: 4349
Images loaded: 4350
Images loaded: 4351
Images loaded: 4352
Images loaded: 4353
Images loaded: 4354
Images loaded: 4355
Images loaded: 4356
Images loaded: 4357
Images loaded: 4358
Images loaded: 4359
Images loaded: 4360
Images loaded: 4361
Images loaded: 4362
Images loaded: 4363
Images loaded: 4364
Images loaded: 4365
Images loaded: 4366
Images loaded: 4367
Images loaded: 4368
Images loaded: 4369
Images loaded: 4370
Images loaded: 4371
Images loaded: 4372
Images loaded: 4373
Images loaded

Images loaded: 4947
Images loaded: 4948
Images loaded: 4949
Images loaded: 4950
Images loaded: 4951
Images loaded: 4952
Images loaded: 4953
Images loaded: 4954
Images loaded: 4955
Images loaded: 4956
Images loaded: 4957
Images loaded: 4958
Images loaded: 4959
Images loaded: 4960
Images loaded: 4961
Images loaded: 4962
Images loaded: 4963
Images loaded: 4964
Images loaded: 4965
Images loaded: 4966
Images loaded: 4967
Images loaded: 4968
Images loaded: 4969
Images loaded: 4970
Images loaded: 4971
Images loaded: 4972
Images loaded: 4973
Images loaded: 4974
Images loaded: 4975
Images loaded: 4976
Images loaded: 4977
Images loaded: 4978
Images loaded: 4979
Images loaded: 4980
Images loaded: 4981
Images loaded: 4982
Images loaded: 4983
Images loaded: 4984
Images loaded: 4985
Images loaded: 4986
Images loaded: 4987
Images loaded: 4988
Images loaded: 4989
Images loaded: 4990
Images loaded: 4991
Images loaded: 4992
Images loaded: 4993
Images loaded

Images loaded: 1
Images loaded: 2
Images loaded: 3
Images loaded: 4
Images loaded: 5
Images loaded: 6
Images loaded: 7
Images loaded: 8
Images loaded: 9
Images loaded: 10
Images loaded: 11
Images loaded: 12
Images loaded: 13
Images loaded: 14
Images loaded: 15
Images loaded: 16
Images loaded: 17
Images loaded: 18
Images loaded: 19
Images loaded: 20
Images loaded: 21
Images loaded: 22
Images loaded: 23
Images loaded: 24
Images loaded: 25
Images loaded: 26
Images loaded: 27
Images loaded: 28
Images loaded: 29
Images loaded: 30
Images loaded: 31
Images loaded: 32
Images loaded: 33
Images loaded: 34
Images loaded: 35
Images loaded: 36
Images loaded: 37
Images loaded: 38
Images loaded: 39
Images loaded: 40
Images loaded: 41
Images loaded: 42
Images loaded: 43
Images loaded: 44
Images loaded: 45
Images loaded: 46
Images loaded: 47
Images loaded: 48
Images loaded: 49
Images loaded: 50
Images loaded: 51
Images loaded: 52
Images loaded: 53
Im

Flattening test images
Done


**Loss Functions**

Students need to define the cross_entropy loss (in function `cross_entropy_loss(y, a)`) and its derivative (in function `d_cross_entropy_loss(y, a)`). See comments in the code for hints. The results, when using cross-entropy loss should be similar to the results achieved when using log loss. Please be aware, that in the derivative of the log loss function, the input vector is not checked for having zero elements. However, this should be done for the derivative of the cross entropy function.


In [4]:
#@title cross_entropy_loss(y, a)
def cross_entropy_loss(y, a):
    ### STUDENT TODO START ###
    return np.multiply(y, -np.log2(a))
    ### STUDENT TODO END ###
print ("Done")

Done


In [5]:
#@title d_cross_entropy_loss(y, a)
def d_cross_entropy_loss(y, a):
    ### STUDENT TODO START ###
    # we need to handle the case when the output a == 0
    if np.all(a):  # returns true if no  element is zero
        return -y / (a*np.log(2))
    else:
        # we need to identify the zero elements. For those the result of the division is set to zero instead of NAN 
        res = y
        for i in range(0, y.size):
            if a[i] == 0:
                res[i] = 0
            else:
                res[i] = -y[i] / (a[i]*np.log(2))
        return res
    ### STUDENT TODO END ###
print("Done")    

Done


In [6]:

# y is the one hot encoded label vector
# a is the output vector of the network
def log_loss(y, a):
    return -(y * np.log(a) + (1 - y) * np.log(1 - a))

# y is the one hot encoded label vector
# a is the output vector of the network
def d_log_loss(y, a):
    a[a > 0.99] = 0.99
    return (a - y) / (a * (1 - a))
print("Done")        

Done


In [7]:
# here, the available loss functions are defined
loss_functions = {
    'cross_entropy': [cross_entropy_loss, d_cross_entropy_loss],
    'log': [log_loss, d_log_loss],
}
print("Done")    

Done


**Activation Functions**

Students need to define the softmax function (``softmax(x)``) and its derivative (``d_softmax(x)``) as well as the ReLu function (``relu(x)``) and its derivative (``d_relu(x)``).
For testing purposes the sigmoid function and its derivative are defined and can be used in a simple neural network along the log loss function.






# Solution Activation Function

In [8]:
#@title softmax, relu and derivatives
# x is the vector of input values (sum of weights * output of previous layer)
def softmax(x):
    ### STUDENT TODO START ###
    ex = np.exp(x)
    sumex = ex.sum()
    if sumex == 0:
        return 0
    else:
        return ex / sumex
    ### STUDENT TODO END ###

def d_softmax(x):
    ### STUDENT TODO START ###
    ex = np.exp(x)
    sumex = ex.sum()
    sumex_sq = sumex*sumex
    derivative = np.zeros((x.size, x.size))
    if sumex != 0:
        for i in range(x.shape[0]):
            for j in range(x.shape[0]):
                if i == j:
                    derivative[i][j] += ex[i] / sumex * (1 - ex[i] / sumex)
                else:
                    derivative[i][j] += (-ex[i] * ex[j]) / sumex_sq
    return derivative
    ### STUDENT TODO END ###

def relu(x):   
    p=x
    for i in range(0, len(x)):
        p[i]=max(0, x[i])
    return p   
  

print("Done")   


def d_relu(x):
  
    p = x
    for i in range(0, len(x)):
        if x[i] > 0:
            p[i] = 1
        else:
            p[i] = 0
    return p

Done


In [9]:
# x is the vector of input values (sum of weights * output of previous layer)
def sigmoid(x):
    return 1/(1 + np.exp(-x))


# x is the vector of input values (sum of weights * output of previous layer)
def d_sigmoid(x):
    return (1 - sigmoid(x)) * sigmoid(x)


print("Done")        

Done


In [10]:
activation_functions = {
    'softmax': [softmax, d_softmax],
    'relu': [relu, d_relu],
    'sigmoid': [sigmoid, d_sigmoid],
}
print("Done") 

Done


**Layers**

Here, a single (hidden or output) layer of a neural network is setup.

# Solution Layers

In [11]:
#@title Layer
class Layer(object):

    def __init__(self, inputs: int, neurons: int, activation: str):
        sigma = 2 / inputs; # he initalization
        self.W = sigma * np.random.randn(neurons, inputs)
        
        self.b = sigma * np.random.randn(neurons, 1)
        self.activation = activation
        act = activation_functions.get(activation)
       
        self.act = act[0]
        self.d_act = act[1]

        self.initialW = self.W
        self.initialb = self.b

    def __repr__(self):
        return "\nLayer:(neurons: {}\n inputs: {}\n activation function: {}\n initial weights: {}\nfinal weights: {}\n initial biases: {}\nfinal biases: {})".format(np.size(self.W, 0), np.size(self.W, 1), self.activation, self.initialW, self.W, self.initialb, self.b)

    def __str__(self):
        return self.__repr__()

    def __len__(self):
        return np.size(self.W, 0)

    # Feedforward pass.
    # The method takes output values of a previous layer (or the input values of the network) and applies
    # the transfer function (sum of the product between weights and input values).
    # It then applies the activation function (softmax, ReLu, Sigmoid, ...).
    def feedforward(self, A_prev):
        self.A_prev = A_prev
        self.Z = np.dot(self.W, self.A_prev) + self.b
        # Normalize the input from the previous layer to avoid buffer overflow in the exponential function.
        self.Z = (self.Z - np.mean(self.Z)) / np.std(self.Z)
        self.A = self.act(self.Z)
        return self.A


        # Backpropagation.
    # This method takes in an error (dA) and a learning rate (learning_rate).
    # In an output layer, dA is the average loss between the network output and the expected values (labels)
    # In a hidden  layer, dA is the sum of the product between the weights (self.W) of that layer and the the loss (dA) of the next layer (closer to the output).
    # After calculating and applying the weight adjustments it propagates the error back to the previous layer.
    def backprop(self, dA, learning_rate):
        # drdo is the partial derivative of the loss function r with respect to the activation function o (dr/do).
        drdo = dA  # dr/do : mx1

        # dods is the partial derivative of the activation function with respect to the transfer function.
        dods = self.d_act(self.Z)                #do/ds : mxm for Softmax or mx1 for ReLu.

        # dods_drdo is the product of dods and drdo
        dods_drdo = 0

        ### STUDENT TODO START ###
        # Multiply the derivative of the activation function w.r.t the transfer function
        # with the derivative of the loss function w.r.t. the activation function.

        # You need to discern between vector- and matrix-valued input
        # Hint: use np.multiply and np.dot respectively
        if dods.shape == drdo.shape: # ReLu, etc. which return a vector as output
            dods_drdo = dods_drdo
        else:                      # Softmax, which returns a matrix as output
            dods_drdo =  dods_drdo
        ### STUDENT TODO END ###



        ### STUDENT TODO START ###
        # Calculate  the change of the weights dW as
        # To this end, you need to sum the product between
        # the output of the previous layer (self.A_prev) and the joint product of dods and drdo.
        # Hint: use np.dot for efficient computation.

        dW = np.dot(dods_drdo, self.A_prev.T)
        # db is the change of the bias terms
        db = 1/dA.shape[0] * dods_drdo #  mx1
        ### STUDENT TODO END ###

        # Normalize the weights to prevent them from getting too large.
        dW = 1/dA.shape[0] * dW     # o * do/ds * dr/do : mx1 * 1xn = mxn

        

        # updating weights and bias terms
        self.W = self.W - learning_rate * dW
        self.b = self.b - learning_rate * db

        # Normalize both weights and biases in order to avoid them to get too large,
        # which could lead to a buffer overflow in softmax in the next feedforward pass.
        self.W = (self.W - np.mean(self.W)) / np.std(self.W)
        self.b = (self.b - np.mean(self.b)) / np.std(self.b)

        # Flag for printing the internal values like the intermediate weights, calculated during backpropagation
        _printValues = False
        if _printValues:
            print("dods: ", dods)
            print("drdo: ", drdo)
            print("dods_drdo: ", dods_drdo)
            print("activation:", self.activation)
            print("dW:", dW)
            print("db:", db)

        ### STUDENT TODO START ###
        # Calculate the error dA_prev  which is passed to the previous layer
        # (the neighboring layer closer to the input of the network.
        # Compute dA_prev as the sum of the product between the weights (self.W)
        # and our product of the partial derivatives dods and drdo

        dA_prev = 0
        # dA_prev = # ...
        dA_prev = dA_prev  # dr/do <- W * do/ds * dr/do

        ### STUDENT TODO END ###
        # dA_prev gets then backpropagated to the previous layer
        return dA_prev
print("Done")
 

Done


Setup the Neural Network Model

In [12]:
import random

class Model(object):

    def __init__(self, layers, loss, learning_rate, regL1, regL2):
        self.layers = []
        for idx in range(0, len(layers)):
            self.layers.append(Layer(layers[idx][0], layers[idx][1], layers[idx][2]))

        self.loss_name = loss
        loss_arr = loss_functions.get(loss)
        self.loss = loss_arr[0]
        self.d_loss = loss_arr[1]

        self.learning_rate = learning_rate
        self.regL1 = regL1
        self.regL2 = regL2


    def __repr__(self):
        return "Layers: {}\nLoss function: {}\n".format(self.layers, self.loss_name)
       # return "Layer: {}\nLoss function: {}\nOptimizer:{}".format(self.layers, self.loss_name, self.optimizer_name)

    
    def feedforward(self, X):
        A = X
        for layer in self.layers:
            A = layer.feedforward(A)

        return A


    def _next_batch(self, X, y, batch_size):
        for i in np.arange(0, X.shape[0], batch_size):
            # yield a tuple of the current batched data and labels
            
            start = int(random.uniform(0, X.shape[0] - batch_size))
            if i + batch_size >= X.shape[0]-1:
                yield (X[start:X.shape[0]], y[start:X.shape[0]])
            else:
                yield (X[start:start + batch_size], y[start:start + batch_size])


    def _backprop(self, dA):
        for layer in reversed(self.layers):
            dA = layer.backprop(dA, self.learning_rate)


    def train(self, X, y, epochs, batch_size):       
        for epoch in range(epochs):
            print("epoch " + str(epoch+1) + " out of " + str(epochs))
            batch = 0
            for (batch_X, batch_y) in self._next_batch(X, y, batch_size):

                curr_batch_size = batch_X.shape[0]
                dA = np.zeros((len(self.layers[-1]), 1))


                weight_sum : np.float64 = 0
                weight_sum_sq : np.float64 = 0
                for layer in self.layers:
                    weight_sum += np.sum(np.abs(layer.W))
                    weight_sum_sq += np.sum(layer.W**2) ## overflow can occur

                loss = 0
                for i in range(curr_batch_size):
                    c_X = batch_X[i]
                    
                    c_y = np.zeros((len(self.layers[-1]), 1))
                    c_y[batch_y[i]][0] = 1
                    
                    A = self.feedforward(c_X)
                    loss = self.d_loss(c_y, A)
                    dA += loss

                    #print(epoch + 1, ":", batch + 1, "-", i + 1, "          ", end='\r')
                
                
                dA = dA / curr_batch_size
                dA +=  + self.regL1 * weight_sum + self.regL2 * weight_sum_sq
                self._backprop(dA)
                batch = batch + 1
        print("")

    def evaluate(self, X, y):
        curr_batch_size = X.shape[0]
        sum_loss=0
        correct=0
        for i in range(curr_batch_size):
            c_X = X[i]
            c_y = np.zeros((len(self.layers[-1]), 1))
            c_y[y[i]][0] = 1
            A = self.feedforward(c_X)
            loss = self.loss(c_y, A)
            #loss = self.d_loss(c_y, A)
            sum_loss+=np.linalg.norm(loss)
                
            if y[i]==A.argmax():
                correct+=1
        print("AVG LOSS:",sum_loss/curr_batch_size, "  ACCURACY:", correct/curr_batch_size*100,"%.")
print("Done")


Done


Main function. Students should should validate the performance of their implemantiation by running this functions. After implementing backpropagation, the accuracy should already around 80% for both the simple and the MNIST data set. After implementing softmax, relu and cross entropy loss (and their derivatives), the structure of the neural network needs to be adopted in `nn_mnist = Model(...`. The struture for the simple data set is already given and only needs to be uncommented.

In [13]:
from os import read
import sys
import numpy as np

batch_size = 1 # you can increase the batch size to increase training. However, this will decrease accuracy
epochs = 10
use_mnist = False # first evaluate your implementation with the sample data. Afterwards, switch to MNIST

if use_mnist == True:
  print("Training and Evaluating MNIST")   
  print("setting up model")

  ### TODO STUDENT START ###
  # try out different network structures: only softmax, relu + softmax, ...

  ### TODO STUDENT END ###
  nn_mnist = Model([(784, 10, 'softmax')], 'cross_entropy', 0.1, 0.0001, 0.00001)
  print("before training the model looks like this:")
  print(nn_mnist)
  print("evaluating model with training data before training")
  nn_mnist.evaluate(flat_images_training, labels_training)
  print("evaluating model with test data before training")
  nn_mnist.evaluate(flat_images_test, labels_test)

  print("training model")
  nn_mnist.train(flat_images_training, labels_training, epochs, batch_size)
  print("after training the model looks like this:")
  print(nn_simple)
  print("evaluating model with training data")
  nn_mnist.evaluate(flat_images_training, labels_training)
  print("evaluating model with test data")
  nn_mnist.evaluate(flat_images_test, labels_test)
else:
  print("training and evaluating sample data")   
  num_samples_per_class_training = 300
  num_samples_per_class_test = 500 
  with_noise = False
  training_features, training_labels = _create_samples(1, num_samples_per_class_training, with_noise, True, True)
  test_features, test_labels = _create_samples(1, num_samples_per_class_test, with_noise, True, True)
  print("features:")
  print(training_features)
  print("labels:")
  print(training_labels)
  print("setting up model")
  nn_simple = Model([
     (4, 4, 'relu'),
     (4, 4, 'softmax')],     
    'cross_entropy',       
     0.1, 0.0001, 0.00001)
  print("before training the model looks like this:")
  print(nn_simple)
  print("evaluating model with training features before training")
  nn_simple.evaluate(training_features, training_labels)
  print("evaluating model with test features before training")
  nn_simple.evaluate(test_features, test_labels)
  print("training model")
  nn_simple.train(training_features, training_labels, epochs, batch_size)
  print("after training the model looks like this:")
  print(nn_simple)
  print("evaluating model with training features")
  nn_simple.evaluate(training_features, training_labels)
  print("evaluating model with test features")
  nn_simple.evaluate(test_features, test_labels)



training and evaluating sample data
features:
[[[1.  ]
  [0.  ]
  [0.  ]
  [0.  ]]

 [[1.  ]
  [0.  ]
  [0.  ]
  [0.  ]]

 [[1.  ]
  [0.  ]
  [0.  ]
  [0.  ]]

 ...

 [[0.  ]
  [0.  ]
  [1.  ]
  [0.  ]]

 [[0.  ]
  [0.  ]
  [1.  ]
  [0.  ]]

 [[0.01]
  [0.01]
  [1.  ]
  [0.01]]]
labels:
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 

UnboundLocalError: local variable 'dW' referenced before assignment