## 1

In [15]:
import numpy as np

#observations
X = np.array([[0.7, -0.3], [0.4, 0.5], [-0.2, 0.8], [-0.4, 0.3]])
#targets
Y = np.array([0.8, 0.6, 0.3, 0.3])

# Radial basis function
def radial_basis_function(x, c):
    return np.exp(-0.5 * np.linalg.norm(x - c) ** 2)

# Centers
C = np.array([[0, 0], [1, -1], [-1, 1]])

# Compute the design matrix
Phi = np.array([[radial_basis_function(x, c) for c in C] for x in X])
# Ridge regression closed-form solution with lambda (λ) = 0.1
lambda_val = 0.1
I = np.eye(4)  # 4 centers, so the identity matrix is 4x4

In [16]:
# Add a column of 1s for the bias term
Phi_with_bias = np.column_stack((np.ones(Phi.shape[0]), Phi))
print("Transformed Values: \n", Phi_with_bias)

x_time_x = np.dot(Phi_with_bias.T, Phi_with_bias)
print("X * X transpose: \n", x_time_x)

plus_lambda = x_time_x + lambda_val * I
print("X*X^T + lambda * I \n", np.round(plus_lambda, 5))

inverted_matrix = np.linalg.inv(plus_lambda)
print("Inverted matrix \n", np.round(inverted_matrix, 5))

#Inverted Matrix * X^T
dot = np.dot(inverted_matrix, Phi_with_bias.T)
print("Inverted Matrix * X^T: \n", np.round(dot, 5))

#Ridge regressionn coefficients 
w = np.round(np.dot(dot, Y), 5)
print("Ridge regression coefficients: \n", w)

Transformed Values: 
 [[1.         0.74826357 0.74826357 0.10126646]
 [1.         0.81464732 0.27117254 0.33121088]
 [1.         0.71177032 0.09632764 0.71177032]
 [1.         0.8824969  0.16121764 0.65376979]]
X * X transpose: 
 [[4.         3.15717811 1.27698138 1.79801745]
 [3.15717811 2.50896639 0.99164557 1.42916086]
 [1.27698138 0.99164557 0.66870305 0.33955168]
 [1.79801745 1.42916086 0.33955168 1.05398747]]
X*X^T + lambda * I 
 [[4.1     3.15718 1.27698 1.79802]
 [3.15718 2.60897 0.99165 1.42916]
 [1.27698 0.99165 0.7687  0.33955]
 [1.79802 1.42916 0.33955 1.15399]]
Inverted matrix 
 [[ 4.54826 -3.77682 -1.86117 -1.86155]
 [-3.77682  5.98285 -0.88543 -1.26432]
 [-1.86117 -0.88543  4.33276  2.72156]
 [-1.86155 -1.26432  2.72156  4.53204]]
Inverted Matrix * X^T: 
 [[ 0.14105  0.35022  0.35575 -0.30185]
 [-0.09064  0.43823 -0.50361  0.5337 ]
 [ 0.99394 -0.50615 -0.1369  -0.16477]
 [-0.31222 -0.65246  0.72647  0.42436]]
Ridge regression coefficients: 
 [ 0.33914  0.19945  0.40096 -

### (b)

In [17]:
# Predictions using the learned model
y_pred = np.dot(Phi_with_bias, w)

print("Output predictions: \n", np.round(y_pred, 5))

# Compute RMSE (Root Mean Squared Error)
rmse = np.sqrt(np.mean((y_pred - Y) ** 2))

print("RMSE: \n", round(rmse,5) )

Output predictions: 
 [0.75843 0.51231 0.30904 0.38628]
RMSE: 
 0.06508


## 2

In [18]:
import numpy as np

# Given weights and biases
W1 = np.array([[1, 1, 1, 1], [1, 1, 2, 1], [1, 1, 1, 1]], dtype=np.float64)
b1 = np.array([1, 1, 1], dtype=np.float64)
W2 = np.array([[1, 4, 1], [1, 1, 1]], dtype=np.float64)
b2 = np.array([1, 1], dtype=np.float64)
W3 = np.array([[1, 1], [3, 1], [1, 1]], dtype=np.float64)
b3 = np.array([1, 1, 1], dtype=np.float64)

# Activation function
def activation(x):
    return np.tanh(0.5 * x - 2)

# Learning rate
eta = 0.1

In [19]:
# Training observations and targets
x1 = np.array([1, 1, 1, 1])
targets = []
target1 = np.array([0, 1, 0])  # Target for class B
targets.append(target1)

observations = []
observations.append(x1)

In [20]:
x2 = np.array([1, 0, 0, -1])
target2 = np.array([1, 0, 0])  # Target for class A
targets.append(target2)
observations.append(x2)

In [21]:
w1_lst = []
w2_lst = []
w3_lst = []
b1_lst = []
b2_lst = []
b3_lst = []

In [22]:
for i in range(2):
    # Forward pass
    print(np.dot(W1, observations[i]))
    print("Z[1] = ",np.dot(W1, observations[i]) + b1)
    z1 = activation(np.dot(W1, observations[i]) + b1)
    print("X[1] = ", np.round(z1, 5))
    print("\n")
    print(np.dot(W2, z1))
    print("Z[2] = ",np.dot(W2, z1) + b2)
    z2 = activation(np.dot(W2, z1) + b2)
    print("X[2] = ", np.round(z2, 5))
    print("\n")
    print(np.dot(W3, z2))
    print("Z[3] = ",np.dot(W3, z2) + b3)
    z3 = activation(np.dot(W3, z2) + b3)
    print("X[3] = ", np.round(z3, 5))

    delta3_part1 = np.subtract(z3, targets[i])
    print("Delta 3, parte 1 = \n", np.round(delta3_part1, 5))
    delta3_part2 = (0.5 - 0.5 * z3 ** 2)
    print("Delta 3, parte 2 = \n", np.round(delta3_part2, 5))
    delta3 = delta3_part1 * delta3_part2
    print("Delta 3 =\n", np.round(delta3, 5))

    delta2_parte1 = np.dot(W3.T, delta3)
    print("Delta 2, parte 1 = \n", np.round(delta2_parte1, 5))

    delta2_part2 = (0.5 - 0.5 * z2 ** 2)
    print("Delta 2, parte 2 = \n", np.round(delta2_part2, 5))

    delta2 = delta2_parte1 * delta2_part2
    print("Delta 2 =\n", delta2)

    delta1_parte1 = np.dot(W2.T, delta2)
    print("Delta 1, parte 1 = \n", np.round(delta1_parte1, 5))

    delta1_part2 = (0.5 - 0.5 * z1 ** 2)
    print("Delta 1, parte 2 = \n", np.round(delta1_part2, 5))

    delta1 = delta1_parte1 * delta1_part2
    print("Delta 1 =\n", delta1)

    delta_w1 = -0.1 * np.dot(delta1[:, np.newaxis], observations[i][np.newaxis, :])
    print("Delta W1: \n", delta_w1)
    w1_lst.append(delta_w1)

    delta_b1 = -0.1 * delta1
    print("Delta b1: \n", delta_b1)
    b1_lst.append(delta_b1)

    delta_w2 = -0.1 * np.dot(delta2[:, np.newaxis], z1[np.newaxis, :])
    print("Delta W2: \n", delta_w2)
    w2_lst.append(delta_w2)

    delta_b2 = -0.1 * delta2
    print("Delta b2: \n", delta_b2)
    b2_lst.append(delta_b2)

    delta_w3 = -0.1 * np.dot(delta3[:, np.newaxis], z2[np.newaxis, :])
    print("Delta W3: \n", delta_w3)
    w3_lst.append(delta_w3)

    delta_b3 = -0.1 * delta3
    print("Delta b3: \n", delta_b3)
    b3_lst.append(delta_b3)

[4. 5. 4.]
Z[1] =  [5. 6. 5.]
X[1] =  [0.46212 0.76159 0.46212]


[3.97061094 1.68582847]
Z[2] =  [4.97061094 2.68582847]
X[2] =  [ 0.45048 -0.57642]


[-0.12593822  0.77502679 -0.12593822]
Z[3] =  [0.87406178 1.77502679 0.87406178]
X[3] =  [-0.9159  -0.80494 -0.9159 ]
Delta 3, parte 1 = 
 [-0.9159  -1.80494 -0.9159 ]
Delta 3, parte 2 = 
 [0.08056 0.17604 0.08056]
Delta 3 =
 [-0.07379 -0.31773 -0.07379]
Delta 2, parte 1 = 
 [-1.10078 -0.46531]
Delta 2, parte 2 = 
 [0.39853 0.33387]
Delta 2 =
 [-0.43869681 -0.15535308]
Delta 1, parte 1 = 
 [-0.59405 -1.91014 -0.59405]
Delta 1, parte 2 = 
 [0.39322 0.20999 0.39322]
Delta 1 =
 [-0.23359459 -0.40110496 -0.23359459]
Delta W1: 
 [[0.02335946 0.02335946 0.02335946 0.02335946]
 [0.0401105  0.0401105  0.0401105  0.0401105 ]
 [0.02335946 0.02335946 0.02335946 0.02335946]]
Delta b1: 
 [0.02335946 0.0401105  0.02335946]
Delta W2: 
 [[0.02027293 0.03341089 0.02027293]
 [0.00717913 0.0118316  0.00717913]]
Delta b2: 
 [0.04386968 0.01553531]
Delta W3

In [23]:
# Batch Gradient descent update
W1 += np.sum(w1_lst, axis=0, dtype=np.float64)
b1 += np.sum(b1_lst, axis=0, dtype=np.float64)
W2 += np.sum(w2_lst, axis=0, dtype=np.float64)
b2 += np.sum(b2_lst, axis=0, dtype=np.float64)
W3 += np.sum(w3_lst, axis=0, dtype=np.float64)
b3 += np.sum(b3_lst, axis=0, dtype=np.float64)

In [24]:
# Print updated weights and biases
print("Updated W1:\n", np.round(W1, 5))
print("Updated b1:\n", np.round(b1, 5))
print("Updated W2:\n", np.round(W2, 5))
print("Updated b2:\n", np.round(b2, 5))
print("Updated W3:\n", np.round(W3, 5))
print("Updated b3:\n", np.round(b3, 5))

Updated W1:
 [[1.02336 1.02336 1.02336 1.02336]
 [1.04011 1.04011 2.04011 1.04011]
 [1.02336 1.02336 1.02336 1.02336]]
Updated b1:
 [1.02336 1.04011 1.02336]
Updated W2:
 [[1.02027 4.03341 1.02027]
 [1.00715 1.01181 1.00715]]
Updated b2:
 [1.04387 1.01556]
Updated W3:
 [[1.00067 0.9931 ]
 [3.01413 0.9815 ]
 [1.002   0.99443]]
Updated b3:
 [1.01004 1.03196 1.0087 ]
