In [1]:
import tensorflow as tf

2023-06-03 09:55:13.797578: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-06-03 09:55:13.822121: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-06-03 09:55:13.964304: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-06-03 09:55:13.964935: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
class DenseLayer(tf.keras.layers.Layer):
    def __init__(self, input_size, output_size):
        super(DenseLayer, self).__init__()
        self.myWeights = tf.Variable(np.random.randn(output_size, input_size), dtype=tf.float32)
        self.bias = tf.Variable(np.random.randn(output_size, 1), dtype=tf.float32)

    def call(self, inputs):
        self.myInput = inputs
        return tf.matmul(self.myWeights, self.myInput) + self.bias

    def backpropagation(self, output_gradient, learning_rate):
        weights_gradient = tf.matmul(output_gradient, tf.transpose(self.myInput))
        self.myWeights.assign_sub(learning_rate * weights_gradient)
        self.bias.assign_sub(learning_rate * output_gradient)
        return tf.matmul(tf.transpose(self.myWeights), output_gradient)

In [57]:
class Activation(tf.keras.layers.Layer):
    def __init__(self, activation, activation_prime):
        super(Activation, self).__init__()
        self.activation = activation
        self.activation_prime = activation_prime

    def call(self, inputs):
        self.my_input = inputs
        return self.activation(self.my_input)

    def backpropagation(self, output_gradient, learning_rate):
        return tf.multiply(output_gradient, self.activation_prime(self.my_input))


In [58]:
def mse(y_true, y_pred):
    return tf.reduce_mean(tf.pow(y_true - y_pred, 2))

def mse_prime(y_true, y_pred):
    return 2 * (y_pred - y_true) / tf.size(y_true)



In [59]:
class Sigmoid(Activation):
    def __init__(self):
        def sigmoid(x):
            return tf.math.sigmoid(x)

        def sigmoid_prime(x):
            s = sigmoid(x)
            return s * (1 - s)

        super(Sigmoid, self).__init__(sigmoid, sigmoid_prime)


class ReLU(Activation):
    def __init__(self):
        def relu(x):
            return tf.nn.relu(x)

        def relu_prime(x):
            return tf.where(x > 0, 1.0, 0.0)

        super(ReLU, self).__init__(relu, relu_prime)

In [64]:
class DenseNetwork:
    @staticmethod
    def predict(network, inputs):
        output = inputs
        for layer in network:
            output = layer(output)
        return output

    @staticmethod
    def train(network, loss, loss_prime, x_train, y_train, epochs=1000, learning_rate=0.01, verbose=True):
        for e in range(epochs):
            error = 0
            for x, y in zip(x_train, y_train):
                x = tf.convert_to_tensor(x)
                y = tf.convert_to_tensor(y)

                with tf.GradientTape() as tape:
                    output = DenseNetwork.predict(network, x)
                    error += loss(y, output)

                grads = tape.gradient(error, [var for layer in network for var in layer.trainable_variables])
                index = 0
                for layer in network:
                    layer_params = layer.trainable_variables
                    num_params = len(layer_params)
                    for i in range(num_params):
                        layer_params[i].assign_sub(learning_rate * grads[index])
                        index += 1

            error /= len(x_train)
            if verbose:
                print(f"{e + 1}/{epochs}, error={error}")

In [65]:
X = tf.constant([[0, 0], [0, 1], [1, 0], [1, 1]], dtype=tf.float32)
X = tf.reshape(X, (4, 2, 1))
Y = tf.constant([[0], [1], [1], [0]], dtype=tf.float32)
Y = tf.reshape(Y, (4, 1, 1))

network = [
    DenseLayer(2, 3),
    ReLU(),
    DenseLayer(3, 1),
    Sigmoid()
]
dense_network = DenseNetwork()
dense_network.train(network, mse, mse_prime, X, Y, epochs=100, learning_rate=0.1)

1/100, error=0.26362863183021545
2/100, error=0.25958144664764404
3/100, error=0.2557365596294403
4/100, error=0.2520930767059326
5/100, error=0.24864739179611206
6/100, error=0.24539339542388916
7/100, error=0.24232307076454163
8/100, error=0.23942692577838898
9/100, error=0.2366943359375
10/100, error=0.23411419987678528
11/100, error=0.23167484998703003
12/100, error=0.229364812374115
13/100, error=0.2271726131439209
14/100, error=0.22508743405342102
15/100, error=0.22309866547584534
16/100, error=0.22119645774364471
17/100, error=0.2193717062473297
18/100, error=0.2176159918308258
19/100, error=0.2159215211868286
20/100, error=0.21428121626377106
21/100, error=0.2126888781785965
22/100, error=0.2111387699842453
23/100, error=0.20962592959403992
24/100, error=0.20814594626426697
25/100, error=0.2066948115825653
26/100, error=0.20526927709579468
27/100, error=0.20402514934539795
28/100, error=0.20281732082366943
29/100, error=0.20242418348789215
30/100, error=0.2009626030921936
31/10