In [1]:
import numpy as np

from src.tensor import Tensor
from src.activation_function import Linear, ReLU, Sigmoid, Softmax
from src.loss_function import MeanSquaredError
from src.layer import Dense
from src.model import FFNN
from src.weight_initializer import GlorotUniformInitializer

### **Tensor Test Case**

In [2]:
# Basic operations
a = Tensor(np.array([1,2]))
b = Tensor(np.array([3,4]))
c = a + b
d = a - b
e = a * b
f = a / b

print(a)
print(b)
print(c)
print(d)
print(e)
print(f)

Value: [1. 2.], Gradient: [0. 0.], Op: None
Value: [3. 4.], Gradient: [0. 0.], Op: None
Value: [4. 6.], Gradient: [0. 0.], Op: +
Value: [-2. -2.], Gradient: [0. 0.], Op: +
Value: [3. 8.], Gradient: [0. 0.], Op: *
Value: [0.33333333 0.5       ], Gradient: [0. 0.], Op: *


In [3]:
# Activation function and loss function
a = Tensor(np.array([1,2]))
b = a.compute_activation(Linear)
c = a.compute_activation(ReLU)
d = b.compute_loss(Tensor(np.array([3,4])), MeanSquaredError)
e = c.compute_loss(Tensor(np.array([3,4])), MeanSquaredError)

print(a)
print(b)
print(c)
print(d)
print(e)

Value: [1. 2.], Gradient: [0. 0.], Op: None
Value: [1. 2.], Gradient: [0. 0.], Op: Linear
Value: [1. 2.], Gradient: [0. 0.], Op: ReLU
Value: [4.], Gradient: [0.], Op: MeanSquaredError
Value: [4.], Gradient: [0.], Op: MeanSquaredError


In [4]:
# Automatic differentiation
a = Tensor(np.array([1,2]))
b = Tensor(np.array([3,4]))
c = a + b
d = a - b
e = c * d
f = e.compute_activation(Linear)
g = e.compute_activation(ReLU)
h = f.compute_loss(np.array([1,1]), MeanSquaredError)
i = g.compute_loss(np.array([1,1]), MeanSquaredError)

print("---------- Before backpropagation ----------")
print(a)
print(b)
print(c)
print(d)
print(e)
print(f)
print(g)
print(h)
print(i)

h.backward()
i.backward()

print("\n---------- After backpropagation ----------")
print(a)
print(b)
print(c)
print(d)
print(e)
print(f)
print(g)
print(h)
print(i)

---------- Before backpropagation ----------
Value: [1. 2.], Gradient: [0. 0.], Op: None
Value: [3. 4.], Gradient: [0. 0.], Op: None
Value: [4. 6.], Gradient: [0. 0.], Op: +
Value: [-2. -2.], Gradient: [0. 0.], Op: +
Value: [ -8. -12.], Gradient: [0. 0.], Op: *
Value: [ -8. -12.], Gradient: [0. 0.], Op: Linear
Value: [0. 0.], Gradient: [0. 0.], Op: ReLU
Value: [125.], Gradient: [0.], Op: MeanSquaredError
Value: [1.], Gradient: [0.], Op: MeanSquaredError

---------- After backpropagation ----------
Value: [1. 2.], Gradient: [-108. -312.], Op: None
Value: [3. 4.], Gradient: [108. 156.], Op: None
Value: [4. 6.], Gradient: [ 72. 104.], Op: +
Value: [-2. -2.], Gradient: [-144. -312.], Op: +
Value: [ -8. -12.], Gradient: [-18. -26.], Op: *
Value: [ -8. -12.], Gradient: [-18. -26.], Op: Linear
Value: [0. 0.], Gradient: [-2. -2.], Op: ReLU
Value: [125.], Gradient: [1.], Op: MeanSquaredError
Value: [1.], Gradient: [1.], Op: MeanSquaredError


In [5]:
## Simulation of one layer with two neurons (h1 and h2)

# Initial values
x = Tensor(np.array([1, 2, 3]), tensor_type="input")            # input, x[0] is always 1
y = np.array([16, 14])                                          # correct class / y_true
wh1 = Tensor(np.array([2, 3, 4]), tensor_type="weight")         # weights of neuron h1, wh1[0] = b1 (bias)
wh2 = Tensor(np.array([3, 4, 5]), tensor_type="weight")         # weights of neuron h2, wh2[0] = b2 (bias)

# Calculate net
wh1_x = wh1 * x
wh2_x = wh2 * x
net1 = wh1_x.sum()
net2 = wh2_x.sum()

# Calculate output
o1 = net1.compute_activation(ReLU)
o2 = net2.compute_activation(ReLU)

# Calculate loss
output = o1.concat([o2])
loss = output.compute_loss(y, MeanSquaredError)

print("---------- Before backpropagation ----------")
print(wh1)
print(wh2)
print(wh1_x)
print(wh2_x)
print(net1)
print(net2)
print(o1)
print(o2)
print(output)
print(loss)

# Initiate automated differentiation
loss.backward()

print("\n---------- After backpropagation ----------")
print(wh1)
print(wh2)
print(wh1_x)
print(wh2_x)
print(net1)
print(net2)
print(o1)
print(o2)
print(output)
print(loss)


---------- Before backpropagation ----------
Value: [2. 3. 4.], Gradient: [0. 0. 0.], Op: None, Type: weight
Value: [3. 4. 5.], Gradient: [0. 0. 0.], Op: None, Type: weight
Value: [ 2.  6. 12.], Gradient: [0. 0. 0.], Op: *
Value: [ 3.  8. 15.], Gradient: [0. 0. 0.], Op: *
Value: [20.], Gradient: [0.], Op: sum
Value: [26.], Gradient: [0.], Op: sum
Value: [20.], Gradient: [0.], Op: ReLU
Value: [26.], Gradient: [0.], Op: ReLU
Value: [20. 26.], Gradient: [0. 0.], Op: concat
Value: [80.], Gradient: [0.], Op: MeanSquaredError

---------- After backpropagation ----------
Value: [2. 3. 4.], Gradient: [ 8. 16. 24.], Op: None, Type: weight
Value: [3. 4. 5.], Gradient: [24. 48. 72.], Op: None, Type: weight
Value: [ 2.  6. 12.], Gradient: [8. 8. 8.], Op: *
Value: [ 3.  8. 15.], Gradient: [24. 24. 24.], Op: *
Value: [20.], Gradient: [8.], Op: sum
Value: [26.], Gradient: [24.], Op: sum
Value: [20.], Gradient: [8.], Op: ReLU
Value: [26.], Gradient: [24.], Op: ReLU
Value: [20. 26.], Gradient: [ 8. 24.

In [6]:
## Simulation of 3-layered (excluding input layer) network with n = [3, 5, 4] number of neurons

# Initial values
x = Tensor(np.array([1, 2, 3]), tensor_type="input")            
y = np.array([50, 64, 62, 55])                                          
wh1 = Tensor(np.array([2, 3, 4]), tensor_type="weight")
wh2 = Tensor(np.array([3, 4, 5]), tensor_type="weight")
wh3 = Tensor(np.array([4, 5, 6]), tensor_type="weight")
wh4 = Tensor(np.array([2, 3, 4, 5]), tensor_type="weight")
wh5 = Tensor(np.array([3, 4, 5, 6]), tensor_type="weight")
wh6 = Tensor(np.array([4, 5, 6, 7]), tensor_type="weight")
wh7 = Tensor(np.array([5, 6, 7, 8]), tensor_type="weight")
wh8 = Tensor(np.array([6, 7, 8, 9]), tensor_type="weight")
wh9 = Tensor(np.array([2, 3, 4, 5, 6, 7]), tensor_type="weight")
wh10 = Tensor(np.array([3, 4, 5, 6, 7, 8]), tensor_type="weight")
wh11 = Tensor(np.array([4, 5, 6, 7, 8, 9]), tensor_type="weight")
wh12 = Tensor(np.array([5, 6, 7, 8, 9, 10]), tensor_type="weight")


# Layer 1
wh1_x = wh1 * x
wh2_x = wh2 * x
wh3_x = wh3 * x
net1 = wh1_x.sum()
net2 = wh2_x.sum()
net3 = wh3_x.sum()
o1 = net1.compute_activation(ReLU)
o2 = net2.compute_activation(ReLU)
o3 = net3.compute_activation(ReLU)
output_l1 = o1.concat([o2, o3])

# Layer 2
input_l2 = output_l1.add_x0()
wh4_l2 = wh4 * input_l2
wh5_l2 = wh5 * input_l2
wh6_l2 = wh6 * input_l2
wh7_l2 = wh7 * input_l2
wh8_l2 = wh8 * input_l2
net4 = wh4_l2.sum()
net5 = wh5_l2.sum()
net6 = wh6_l2.sum()
net7 = wh7_l2.sum()
net8 = wh8_l2.sum()
o4 = net4.compute_activation(ReLU)
o5 = net5.compute_activation(ReLU)
o6 = net6.compute_activation(ReLU)
o7 = net7.compute_activation(ReLU)
o8 = net8.compute_activation(ReLU)
output_l2 = o4.concat([o5, o6, o7, o8])

# Layer 3
input_l3 = output_l2.add_x0()
wh9_l3 = wh9 * input_l3
wh10_l3 = wh10 * input_l3
wh11_l3 = wh11 * input_l3
wh12_l3 = wh12 * input_l3
net9 = wh9_l3.sum()
net10 = wh10_l3.sum()
net11 = wh11_l3.sum()
net12 = wh12_l3.sum()
o9 = net9.compute_activation(ReLU)
o10 = net10.compute_activation(ReLU)
o11 = net11.compute_activation(ReLU)
o12 = net12.compute_activation(ReLU)
output_l3 = o9.concat([o10, o11, o12])
loss = output_l3.compute_loss(y, MeanSquaredError)


# Backpropagation
loss.backward()

print("============ Layer 1 ============")
print(wh1_x)
print(wh2_x)
print(wh3_x)
print(net1)
print(net2)
print(net3)
print(o1)
print(o2)
print(o3)
print(output_l1)
print("\n============ Layer 2 ============")
print(input_l2)
print(wh4_l2)
print(wh5_l2)
print(wh6_l2)
print(wh7_l2)
print(wh8_l2)
print(net4)
print(net5)
print(net6)
print(net7)
print(net8)
print(o4)
print(o5)
print(o6)
print(o7)
print(o8)
print(output_l2)
print("\n============ Layer 3 ============")
print(input_l3)
print(wh9_l3)
print(wh10_l3)
print(wh11_l3)
print(wh12_l3)
print(net9)
print(net10)
print(net11)
print(net12)
print(o9)
print(o10)
print(o11)
print(o12)
print(output_l3)
print(loss)

Value: [ 2.  6. 12.], Gradient: [16291060. 16291060. 16291060.], Op: *
Value: [ 3.  8. 15.], Gradient: [20034510. 20034510. 20034510.], Op: *
Value: [ 4. 10. 18.], Gradient: [23777960. 23777960. 23777960.], Op: *
Value: [20.], Gradient: [16291060.], Op: sum
Value: [26.], Gradient: [20034510.], Op: sum
Value: [32.], Gradient: [23777960.], Op: sum
Value: [20.], Gradient: [16291060.], Op: ReLU
Value: [26.], Gradient: [20034510.], Op: ReLU
Value: [32.], Gradient: [23777960.], Op: ReLU
Value: [ 1. 20. 26. 32.], Gradient: [16291060. 20034510. 23777960. 27521410.], Op: concat

Value: [ 1. 20. 26. 32.], Gradient: [16291060. 20034510. 23777960. 27521410.], Op: concat
Value: [  2.  60. 104. 160.], Gradient: [485238. 485238. 485238. 485238.], Op: *
Value: [  3.  80. 130. 192.], Gradient: [616964. 616964. 616964. 616964.], Op: *
Value: [  4. 100. 156. 224.], Gradient: [748690. 748690. 748690. 748690.], Op: *
Value: [  5. 120. 182. 256.], Gradient: [880416. 880416. 880416. 880416.], Op: *
Value: [ 

### **Layer Test Case**

In [7]:
## Simulation of one-layered network with n = 5 number of neurons

x = Tensor(np.array([1, 2, 3, 4]))      # input, x[0] is always 1
y = np.array([5, 0, 3, 1, 2])           # correct class / y_true

layer = Dense(neuron_size=5, activation="linear", kernel_initializer="glorot_uniform", input_size=3) # output layer with 5 neurons

# Forwardpropagation
y_pred = layer.forward(x)
loss = y_pred.compute_loss(y, MeanSquaredError)
print("y_pred:", y_pred)
print("Loss:", loss)

print("\n---------- Before backpropagation ----------")
print('Weights:', layer.weights)

# Backpropagation
loss.backward()
print("\n---------- After backpropagation ----------")
print("Weights:", layer.weights)

y_pred: Value: [-2.28833063 -0.91082992  3.24960389 -0.03329379  0.7034286 ], Gradient: [0. 0. 0. 0. 0.], Op: concat
Loss: Value: [11.35209401], Gradient: [0.], Op: MeanSquaredError

---------- Before backpropagation ----------
Weights: [Value: [ 0.21898912  0.48338857 -0.18376022 -0.73070405], Gradient: [0. 0. 0. 0.], Op: None, Type: weight, Value: [-0.33975298 -0.17490727  0.43756951 -0.38349273], Gradient: [0. 0. 0. 0.], Op: None, Type: weight, Value: [ 0.380372   -0.14919553  0.30283812  0.56477715], Gradient: [0. 0. 0. 0.], Op: None, Type: weight, Value: [ 0.78799295 -0.44396825  0.22573212 -0.15263665], Gradient: [0. 0. 0. 0.], Op: None, Type: weight, Value: [ 0.64913492 -0.0870846  -0.42925467  0.37905672], Gradient: [0. 0. 0. 0.], Op: None, Type: weight]

---------- After backpropagation ----------
Weights: [Value: [ 0.21898912  0.48338857 -0.18376022 -0.73070405], Gradient: [-14.57666126 -29.15332252 -43.72998377 -58.30664503], Op: None, Type: weight, Value: [-0.33975298 -0.17

### **FFNN Test Case**

In [8]:
## Simulation of one-layered network with n = 5 number of neurons

x = Tensor(np.array([2, 3, 4]))      
y = np.array([5, 0, 3, 1, 2])           

# Build the FFNN model
model = FFNN([
    Dense(neuron_size=5, activation="linear", kernel_initializer="glorot_uniform", input_size=3)
])

# Compile the model
model.compile(optimizer="sgd", loss="mean_squared_error")

# Predict output (without fitting)
y_pred = model.forward(x)
print("y_pred:", y_pred)
print("\n---------- Before backpropagation ----------")
for layer in model.layers:
    print(layer.weights)

# Initiate backwardpropagation
model.backward(y)
print("\n---------- After backpropagation ----------")
for layer in model.layers:
    print(layer.weights)

y_pred: [ 1.08280691 -3.9456623  -2.89536147  0.1779078   0.1844783 ]

---------- Before backpropagation ----------
[Value: [-0.14196564 -0.22094583  0.68587401 -0.09773946], Gradient: [0. 0. 0. 0.], Op: None, Type: weight, Value: [-0.55022965 -0.50279027 -0.23835301 -0.41869827], Gradient: [0. 0. 0. 0.], Op: None, Type: weight, Value: [ 0.34080087 -0.47776755  0.30501036 -0.79891458], Gradient: [0. 0. 0. 0.], Op: None, Type: weight, Value: [-0.33538684  0.01071245 -0.60909573  0.57978923], Gradient: [0. 0. 0. 0.], Op: None, Type: weight, Value: [-0.09143406 -0.04448454  0.73113813 -0.45713324], Gradient: [0. 0. 0. 0.], Op: None, Type: weight]

---------- After backpropagation ----------
[Value: [-0.14196564 -0.22094583  0.68587401 -0.09773946], Gradient: [ -7.83438618 -15.66877236 -23.50315855 -31.33754473], Op: None, Type: weight, Value: [-0.55022965 -0.50279027 -0.23835301 -0.41869827], Gradient: [ -7.8913246  -15.78264921 -23.67397381 -31.56529841], Op: None, Type: weight, Value: [

In [9]:
def generate_linearly_separable_data(num_samples, num_features=2, weight=None, bias=1, random_seed=None):
    if num_features < 2:
        raise ValueError("num_features must be at least 2 for meaningful separation.")
    
    if random_seed is not None:
        np.random.seed(random_seed)

    X = np.random.rand(num_samples, num_features) * 20

    if weight is None:
        weight = np.random.uniform(-5, 5, size=(num_features - 1))

    decision_boundary = np.dot(X[:, :-1], weight) + X[:, -1]
    y = np.array(decision_boundary > (bias + 10), dtype=int).reshape(-1, 1)

    return X, y


In [10]:
## Simulation of three-layered network with n = [5, 7, 3] number of neurons, with fiting phase

# Generate dataset
X, y = generate_linearly_separable_data(num_samples=40, num_features=4)

# Split training data (70%), validation data (15%), and test data (15%) 
X_train, y_train = X[:28], y[:28]
X_val, y_val = X[28:34], y[28:34]
X_test, y_test = X[34:], y[34:]

# Build the FFNN model
model = FFNN([
    Dense(neuron_size=5, activation="sigmoid", kernel_initializer="glorot_uniform", input_size=4),
    Dense(neuron_size=7, activation="sigmoid", kernel_initializer="glorot_uniform"),
    Dense(neuron_size=1, activation="sigmoid", kernel_initializer="glorot_uniform")
])

model.compile(optimizer="sgd", loss="mean_squared_error")

model.fit(X_train, y_train, epochs=50, batch_size=20, validation_data=(X_val, y_val))

y_pred = model.predict(X_test)
print(y_pred)

Epoch 1, Validation Loss: 0.05747100228003801
Epoch 2, Validation Loss: 0.05204575526911812
Epoch 3, Validation Loss: 0.04745240307360599
Epoch 4, Validation Loss: 0.04352482676954859
Epoch 5, Validation Loss: 0.040136375523001276
Epoch 6, Validation Loss: 0.03718927436076861
Epoch 7, Validation Loss: 0.03460713248391065
Epoch 8, Validation Loss: 0.032329587458457536
Epoch 9, Validation Loss: 0.030308432683792203
Epoch 10, Validation Loss: 0.028504785140673103
Epoch 11, Validation Loss: 0.02688699052562249
Epoch 12, Validation Loss: 0.02542905675091744
Epoch 13, Validation Loss: 0.02410947009651354
Epoch 14, Validation Loss: 0.02291029137530343
Epoch 15, Validation Loss: 0.021816459077815476
Epoch 16, Validation Loss: 0.020815247019429214
Epoch 17, Validation Loss: 0.019895838429354384
Epoch 18, Validation Loss: 0.019048988623792045
Epoch 19, Validation Loss: 0.0182667556890689
Epoch 20, Validation Loss: 0.017542283842223894
Epoch 21, Validation Loss: 0.016869627939453645
Epoch 22, Val

### **mnist_784 Test Case**

In [11]:
from sklearn.datasets import fetch_openml

X, y = fetch_openml("mnist_784", version=1, return_X_y=True, as_frame=False)

In [12]:
# Preprocessing
X = X / 255.0

In [13]:
# Convert to integers
y = y.astype(int)

# One-hot encode using np.eye()
num_classes = np.max(y) + 1
y = np.eye(num_classes)[y]

In [14]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.125, random_state=42, stratify=y_train)

print(f"Training set: {X_train.shape}, {y_train.shape}")
print(f"Validation set: {X_val.shape}, {y_val.shape}")
print(f"Test set: {X_test.shape}, {y_test.shape}")

Training set: (49000, 784), (49000, 10)
Validation set: (7000, 784), (7000, 10)
Test set: (14000, 784), (14000, 10)


In [15]:
model = FFNN([
    Dense(neuron_size=256, activation="relu", kernel_initializer="glorot_uniform", input_size=784),
    Dense(neuron_size=128, activation="relu", kernel_initializer="glorot_uniform"),
    Dense(neuron_size=10, activation="softmax", kernel_initializer="glorot_uniform")
])

model.compile(optimizer="sgd", loss="categorical_crossentropy")

model.fit(X_train[:120], y_train[:120], 20, 20, (X_val[:20], y_val[:20]))
# res = model.predict(X_train[:5])

# print(res)

Epoch 1, Validation Loss: 0.2010598098106946
Epoch 2, Validation Loss: 0.1827466372219712
Epoch 3, Validation Loss: 0.1486053066817694
Epoch 4, Validation Loss: 0.07663933945462023
Epoch 5, Validation Loss: 0.061013123063173164
Epoch 6, Validation Loss: 0.046962566726295875
Epoch 7, Validation Loss: 0.04861137161869134
Epoch 8, Validation Loss: 0.035842108554138497
Epoch 9, Validation Loss: 0.03838128986536981
Epoch 10, Validation Loss: 0.025256349296199382
Epoch 11, Validation Loss: 0.024474114569042554
Epoch 12, Validation Loss: 0.025830198551818055
Epoch 13, Validation Loss: 0.025622058823238425
Epoch 14, Validation Loss: 0.02739135317970347
Epoch 15, Validation Loss: 0.027064144541394067
Epoch 16, Validation Loss: 0.024548986360234674
Epoch 17, Validation Loss: 0.02319822655472346
Epoch 18, Validation Loss: 0.022672644187608736
Epoch 19, Validation Loss: 0.022307857267606434
Epoch 20, Validation Loss: 0.022037734855405867


In [16]:
loss, metric = model.evaluate(X_test[:20], y_test[:20])

Loss: 0.0716, Accuracy: 0.8000
