# Tugas Besar 1 IF3270 Pembelajaran Mesin <br /> Feedforward Neural Network

## Kelompok 39

- Dzaky Satrio Nugroho - 13522059
- Julian Caleb Simandjuntak - 13522099
- Rafiki Prawhira Harianto - 13522065

In [73]:
# Import dulu
import numpy as np


In [74]:
# Fungsi Aktivasi 

class ActivationFunction:
    
    # Fungsi linear
    @staticmethod
    def linear(x: np.ndarray) -> np.ndarray:
        return x

    # Fungsi ReLU
    @staticmethod
    def relu(x: np.ndarray) -> np.ndarray:
        return np.maximum(0, x)

    # Fungsi Sigmoid
    @staticmethod
    def sigmoid(x: np.ndarray) -> np.ndarray:
        return 1 / (1 + np.exp(-x))

    # Fungsi Hyperbolic Tangent
    @staticmethod
    def tanh(x: np.ndarray) -> np.ndarray:
        return np.tanh(x)

    # Fungsi Softmax
    @staticmethod
    def softmax(x: np.ndarray) -> np.ndarray:
        e_x = np.exp(x - np.max(x))
        return e_x / e_x.sum(axis=0)

    # Fungsi Leaky ReLU
    @staticmethod
    def leaky_relu(x: np.ndarray, alpha=0.1) -> np.ndarray:
        return np.maximum(alpha*x, x)

    # Fungsi Swish
    @staticmethod
    def swish(x: np.ndarray) -> np.ndarray:
        return x * ActivationFunction.sigmoid(x)

In [75]:
# Fungsi Loss

class LossFunction:
    
    # Mean Squared Error
    @staticmethod
    def mse(y_pred: np.ndarray, y_true: np.ndarray) -> float:
        mse = np.sum((y_true - y_pred) ** 2) / len(y_true)
        return mse

    # Binary Cross-Entropy
    @staticmethod
    def bce(y_pred: np.ndarray, y_true: np.ndarray) -> float:
        bce = -(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred)).mean()
        return bce

    # Categorical Cross-Entropy
    @staticmethod
    def cce(y_pred: np.ndarray, y_true: np.ndarray) -> float:
        cce = -1 / len(y_true) * np.sum(np.sum(y_true * np.log(y_pred)))
        return cce
    
    def mse_derivative(y_pred: np.ndarray, y_true: np.ndarray) -> np.ndarray:
        return 2 * (y_pred - y_true) / len(y_true) # times dy_pred/dw 
    
    def bce_derivative(y_pred: np.ndarray, y_true: np.ndarray) -> np.ndarray:
        return (y_pred - y_true) / (y_pred * (1 - y_pred) * len(y_true)) # times dy_pred/dw 
    
    def cce_derivative(y_pred: np.ndarray, y_true: np.ndarray) -> np.ndarray:
        return (y_true / (y_pred * len(y_true))) # times dy_pred/dw 
    
    # @staticmethod
    # def calculate_loss(loss_type: str, y_pred: np.ndarray, y_true: np.ndarray) -> float:
    #     if loss_type == 'mse':
    #         return LossFunction.mse(y_pred, y_true)
    #     elif loss_type == 'bce':
    #         return LossFunction.bce(y_pred, y_true)
    #     elif loss_type == 'cce':
    #         return LossFunction.cce(y_pred, y_true)
    #     else:
    #         raise ValueError(f"Jenis loss tidak dikenal.")

In [76]:
import numpy as np

"""
Inisialisasi 1 layer bobot dengan parameter wajib shape yang merupakan tuple berisi ukuran matrix bobot
Contoh: 
shape=(3, 4) berarti:
- Untuk layer dengan 3 neuron awal dan layer dengan 4 neuron berikutnya
- Menghasilkan matrix bobot dengan 4 kolom berdasarkan bias + neuron layer awal dikali 4 kolom berdasarkan neuron layer berikutnya
"""
class WeightInitializer:    
    @staticmethod
    def zeros(shape):
        w = np.zeros((shape[1], shape[0]))
        b = np.zeros((shape[1], 1))
        return np.hstack((b, w))

    @staticmethod
    def uniform(shape, lower_bound=-0.1, upper_bound=0.1, seed=None):
        if seed is not None:
            np.random.seed(seed)
        w = np.random.uniform(lower_bound, upper_bound, (shape[1], shape[0]))
        b = np.random.uniform(lower_bound, upper_bound, (shape[1], 1))
        return np.hstack((b, w))

    @staticmethod
    def normal(shape, mean=0.0, variance=1.0, seed=None):
        if seed is not None:
            np.random.seed(seed)
        std_dev = np.sqrt(variance)  # Konversi variance ke standard deviation
        w = np.random.normal(mean, std_dev, (shape[1], shape[0]))
        b = np.random.normal(mean, std_dev, (shape[1], 1))
        return np.hstack((b, w))
    
    # @staticmethod
    # def initialize_weights(initialization_type: str, shape, bias=1, lower_bound=-0.1, upper_bound=0.1, mean=0.0, variance=1.0, seed=None):
    #     if initialization_type == 'zeros':
    #         return WeightInitializer.zeros(shape, bias=bias)
    #     elif initialization_type == 'uniform':
    #         return WeightInitializer.uniform(shape, bias=bias, lower_bound=lower_bound, upper_bound=upper_bound, seed=seed)
    #     elif initialization_type == 'normal':
    #         return WeightInitializer.normal(shape, bias=bias, mean=mean, variance=variance, seed=seed)
    #     else:
    #         raise ValueError(f"Jenis inisialisasi '{initialization_type}' tidak dikenal.")
    
# Contoh penggunaan
# zero_weights = WeightInitializer.zeros((3,4))
# uniform_weights = WeightInitializer.uniform((3,4))
# normal_weights = WeightInitializer.normal((3,4))
# print(zero_weights)
# print(uniform_weights)
# print(normal_weights)
# output:
# [[0. 0. 0. 0.]
#  [0. 0. 0. 0.]
#  [0. 0. 0. 0.]
#  [0. 0. 0. 0.]]
# [[-0.00770413  0.05834501  0.00577898  0.01360891]
#  [ 0.05610584  0.08511933 -0.08579279 -0.08257414]
#  [-0.07634511 -0.09595632  0.06652397  0.05563135]
#  [ 0.0279842   0.07400243  0.09572367  0.05983171]]
# [[-0.88778575 -2.55298982  0.6536186   0.8644362 ]
#  [-1.98079647 -0.74216502  2.26975462 -1.45436567]
#  [-0.34791215  0.04575852 -0.18718385  1.53277921]
#  [ 0.15634897  1.46935877  0.15494743  0.37816252]]

In [77]:
# Turunan Fungsi Aktivasi

class ActivationFunctionDerivative:

    # Fungsi Linear
    @staticmethod
    def linear(x: np.ndarray) -> np.ndarray:
        return np.ones_like(x)
    
    # Fungsi RelU
    @staticmethod
    def relu(x: np.ndarray) -> np.ndarray:
        return np.where(x > 0, 1, 0)
    
    # Fungsi Sigmoid
    @staticmethod
    def sigmoid(x: np.ndarray) -> np.ndarray:
        sigmoidx = ActivationFunction.sigmoid(x)
        return sigmoidx * (1 - sigmoidx)
    
    # Fungsi Hyperbolic Tangent
    @staticmethod
    def tanh(x: np.ndarray) -> np.ndarray:
        return (2 / (2 * np.sinh(x))) ** 2

    # Fungsi Softmax
    @staticmethod
    def softmax(x: np.ndarray) -> np.ndarray:
        softmaxx = ActivationFunction.softmax(x)
        n = x.size
        matrix = []
        for i in range(1,n+1):
            row = []
            for j in range(1,n+1):
                row.append(softmaxx[i-1] * ((i == j) - softmaxx[j-1]))
            matrix.append(row)

        return np.array(matrix)

    # Fungsi Leaky ReLU
    @staticmethod
    def leaky_relu(x: np.ndarray, alpha=0.1) -> np.ndarray:
        return np.where(x > 0, 1, alpha)
    
    # Fungsi Swish
    @staticmethod
    def swish(x: np.ndarray) -> np.ndarray:
        sigmoidx = ActivationFunction.sigmoid(x)
        return sigmoidx * (1 + x - x * sigmoidx)

In [78]:
# Mencoba membuat FFNN 

# Yang menjadi ketentuan parameter FFNN:
# - Jumlah layer
# - Jumlah neuron tiap layer
# - Fungsi aktivasi tiap layer
# - Fungsi loss dari model
# - Metode inisialisasi bobot

# Method FFNN:
# - Inisialisasi bobot
# - Menyimpan bobot
# - Menyimpan gradien bobot
# - Menampilkan model struktur jaringan, bobot, dan gradien
# - Menampilkan distribusi bobot
# - Menampilkan distribusi gradien bobot
# - Save and load
# - Forward propagation
# - Backward propagation
# - Weight update dengan gradient descent

# Parameter pelatihan FFNN:
# - Batch size
# - Learning rate
# - Jumlah epoch
# - Verbose


class FFNN:
    def __init__(self, layers, activations=["sigmoid", "sigmoid"], loss="mse", initialization="uniform", seed=0, batch_size=1, learning_rate=0.01, epochs=10, verbose=1, weights=None):
        # Parameter-parameter
        # Menerima jumlah neuron dari setiap layer (sekaligus jumlah layernya) termasuk input dan output
        self.layers = layers # Contoh: [1, 2, 3]
        # Menerima fungsi aktivasi tiap layer
        self.activations = activations # Contoh: ["sigmoid", "relu"]
        # Menerima fungsi loss
        self.loss = loss # Contoh: "mse"
        # Menerima metode inisialisasi bobot
        self.initialization = initialization # Contoh: "zeros"
        self.seed = seed # Jika bobot bukan zeros, menerima seeding
        self.batch_size = batch_size # Jumlah data yang diproses dalam satu iterasi
        self.learning_rate = learning_rate
        self.epochs = epochs # Jumlah iterasi
        self.verbose = verbose # 1 berarti menampilkan progress bar beserta kondisi training loss dan validation loss saat itu, jika 0 tidak usah
        
        # Inisialisasi bias dan bobot, beserta gradiennya
        if self.initialization == 'custom':
            self.weights = weights
        else:
            self.weights = []
        self.gradients_w = []

   
        for i in range(1, len(self.layers)):
            in_size, out_size = self.layers[i - 1], self.layers[i]
            if self.initialization == 'zeros':
                w = WeightInitializer.zeros((in_size, out_size))
            elif self.initialization == 'uniform':
                w = WeightInitializer.uniform((in_size, out_size), seed=self.seed)
            elif self.initialization == 'normal':
                w = WeightInitializer.normal((in_size, out_size), seed=self.seed)
            elif self.initialization == 'custom':
                continue
            else:
                raise ValueError("Metode inisialisasi tidak valid.")
            
            self.weights.append(w)

            
    # Saatnya forward propagation
    def forward_propagation(self, input_data):
        values = np.array(input_data)
        value_matrix = [values]

        for i in range(len(self.weights)):
            values = np.insert(values, 0, 1)  # Add bias term
            new_values = ActivationFunction.sigmoid(np.dot(self.weights[i], values)) # Matrix dot multiplication antar weights di layer i dan values
            values = new_values
            value_matrix.append(values)
            
        return value_matrix
    
    def backward_propagation(self, target_output, value_matrix):
        target_output = np.array(target_output)
        gradients = [np.zeros_like(w) for w in self.weights]

        # Compute output layer error
        output_values = value_matrix[-1]
        if self.loss == "mse":
            delta = LossFunction.mse_derivative(y_pred=output_values, y_true=target_output) * ActivationFunctionDerivative.sigmoid(output_values)
        elif self.loss == "bce":
            delta = LossFunction.bce_derivative(y_pred=output_values, y_true=target_output) * ActivationFunctionDerivative.sigmoid(output_values)
        elif self.loss == "cce":
            delta = LossFunction.cce_derivative(y_pred=output_values, y_true=target_output) * ActivationFunctionDerivative.sigmoid(output_values)
        else:
            raise NotImplementedError("Loss function not implemented")

        # Backpropagate through layers
        # TODO: need to debug and test this part to make sure the gradients are correct
        for i in reversed(range(len(self.weights))):
            delta = delta.reshape(-1, 1)
            prev_values = np.insert(value_matrix[i], 0, 1).reshape(1, -1)

            # Membuat shape delta (neurons, 1) dan prev_values (1, inputs)
            gradients[i] = np.dot(delta.reshape(-1, 1), prev_values.reshape(1, -1))

            if i > 0:
                delta = np.dot(self.weights[i].T[1:], delta).flatten() * ActivationFunctionDerivative.sigmoid(value_matrix[i])

        self.gradients_w = gradients
        return gradients
    
    def update_weights(self):
        for i in range(len(self.weights)):
            if self.verbose:
                print(f"Layer {i}")
                print(self.weights[i])
                print(self.gradients_w[i])
            
            # += or -=? Assume its -= until tested
            self.weights[i] -= self.learning_rate * self.gradients_w[i]

    def train(self, X, y):
        X, y = np.array(X), np.array(y)
        for epoch in range(self.epochs):
            if self.verbose:
                print(f"Epoch {epoch}")
            total_loss = 0
            for i in range(len(X)):
                value_matrix = self.forward_propagation(X[i])
                self.backward_propagation(y[i], value_matrix)
                self.update_weights()

                # Compute loss (MSE)
                if self.loss == "mse":
                    loss = LossFunction.mse(y_pred=value_matrix[-1], y_true=y[i])
                elif self.loss == "bce":
                    loss = LossFunction.bce(y_pred=value_matrix[-1], y_true=y[i])
                elif self.loss == "cce":
                    loss = LossFunction.cce(y_pred=value_matrix[-1], y_true=y[i])
                total_loss += loss

            avg_loss = total_loss / len(X)
            if self.verbose and epoch % 1 == 0:
                print(f"Epoch {epoch}, Loss: {avg_loss:.5f}")

    def predict(self, X):
        X = np.array(X)
        return [self.forward_propagation(x)[-1] for x in X]

    # Untuk debugging
    def debug(self):
        return self.weights[0]
        
        
        

In [79]:
# Contoh: XOR problem
# X = [[0, 0], [0, 1], [1,0], [1, 1]]
# y = [[0], [1], [1], [0]]

# ffnn = FFNN(layers=[2, 2, 1], initialization="zeros", learning_rate=0.05, epochs=1, verbose=1)
# ffnn.train(X, y)

# prediction = ffnn.predict([[0,1]])
# print("Prediction:", prediction)

X = [[0.5, 0.1]]
y = [[0.01, 0.99]]

weight = np.array([
    [
        [0.35, 0.15, 0.25],
        [0.35, 0.2, 0.3]
    ],
    [
        [0.6, 0.4, 0.5],
        [0.6, 0.45, 0.55]
    ]
])

ffnn = FFNN(layers=[2, 2, 2], activations=["sigmoid", "sigmoid"], loss="mse", initialization
="custom", weights=weight, learning_rate=0.5, epochs=1, verbose=1)
ffnn.train(X, y)
print(ffnn.debug())

Epoch 0
Layer 0
[[0.35 0.15 0.25]
 [0.35 0.2  0.3 ]]
[[0.00999832 0.00499916 0.00099983]
 [0.01260612 0.00630306 0.00126061]]
Layer 1
[[0.6  0.4  0.5 ]
 [0.6  0.45 0.55]]
[[ 0.16284609  0.09944021  0.10059783]
 [-0.04733267 -0.02890319 -0.02923966]]
Epoch 0, Loss: 0.30527
[[0.34500084 0.14750042 0.24950008]
 [0.34369694 0.19684847 0.29936969]]
