# 1

In [187]:
import numpy as np

column1 = [0.7, 0.4, -0.2, -0.4]
column2 = [-0.3, 0.5, 0.8, 0.3]
column3 = [0.8, 0.6, 0.3, 0.3]

X = np.column_stack((column1, column2))

y = np.array(column3)

X

array([[ 0.7, -0.3],
       [ 0.4,  0.5],
       [-0.2,  0.8],
       [-0.4,  0.3]])

### a)

In [188]:
# Regularization parameter (λ)
alpha = 0.1

# Define the RBF centers
centers = [(0, 0), (1, -1), (-1, 1)]

# Initialize an empty array for the transformed data
X_transformed = np.zeros((len(column1), len(centers)))

# Apply the radial basis function for each center
for j, center in enumerate(centers):
    for i in range(len(column1)):
        x = np.array([column1[i], column2[i]])
        c = np.array(center)
        # Compute the radial basis function value and store it in the transformed_data array
        X_transformed[i, j] = np.exp(-np.linalg.norm(x - c)**2 / 2)

# Add a column of 1s for the bias term
bias_column = np.ones((len(column1), 1))
X_transformed = np.hstack((bias_column, X_transformed))

print(X_transformed)

[[1.         0.74826357 0.74826357 0.10126646]
 [1.         0.81464732 0.27117254 0.33121088]
 [1.         0.71177032 0.09632764 0.71177032]
 [1.         0.8824969  0.16121764 0.65376979]]


Learn the Ridge regression (𝑙2 regularization) using the closed solution

In [189]:
np.matmul(X_transformed.T, X_transformed)

array([[4.        , 3.15717811, 1.27698138, 1.79801745],
       [3.15717811, 2.50896639, 0.99164557, 1.42916086],
       [1.27698138, 0.99164557, 0.66870305, 0.33955168],
       [1.79801745, 1.42916086, 0.33955168, 1.05398747]])

In [190]:
np.matmul(X_transformed.T, X_transformed) + alpha * np.identity(4)

array([[4.1       , 3.15717811, 1.27698138, 1.79801745],
       [3.15717811, 2.60896639, 0.99164557, 1.42916086],
       [1.27698138, 0.99164557, 0.76870305, 0.33955168],
       [1.79801745, 1.42916086, 0.33955168, 1.15398747]])

In [191]:
inv = np.linalg.pinv(np.matmul(X_transformed.T, X_transformed) + alpha * np.identity(4))
inv


array([[ 4.54826202, -3.77681832, -1.86116983, -1.86155421],
       [-3.77681832,  5.98284561, -0.88542926, -1.26432443],
       [-1.86116983, -0.88542926,  4.33275508,  2.72155678],
       [-1.86155421, -1.26432443,  2.72155678,  4.53204296]])

In [192]:
moore_penrose = np.matmul(inv, X_transformed.T)
moore_penrose

array([[ 0.14104789,  0.35022196,  0.3557537 , -0.30184975],
       [-0.09064104,  0.43822869, -0.50360629,  0.53370047],
       [ 0.99394091, -0.506149  , -0.13690469, -0.16477025],
       [-0.31221638, -0.65245932,  0.726472  ,  0.42435912]])

In [193]:
W_ridge = np.matmul(moore_penrose, y)
W_ridge

array([ 0.33914267,  0.19945264,  0.40096085, -0.29599936])

### b)

In [195]:
y_pred = np.dot(X_transformed, W_ridge)

rmse = np.sqrt(np.mean((y - y_pred)**2))

print("RMSE:", rmse)

RMSE: 0.06508238153393446


# 2

In [229]:
learning_rate = 0.1

W1 = np.array([[1, 1, 1, 1],
                [1, 1, 2, 1],
                [1, 1, 1, 1]])
b1 = np.array([[1],
                [1],
                [1]])


W2 = np.array([[1, 4, 1],
                [1, 1, 1]])
b2 = np.array([[1],
                [1]])


W3 = np.array([[1, 1],
                [3, 1],
                [1, 1]])
b3 = np.array([[1],
                [1],
                [1]])


X0_1 = np.array([[1],
                [1],
                [1],
                [1]])
X0_2 = np.array([[1],
                [0],
                [0],
                [-1]])

t1 = np.array([[-1],
                [1],
                [-1]]) #B

t2 = np.array([[1],
                [-1],
                [-1]]) #A

def activation_function(x):
    return np.tanh(0.5 * x - 2)

## Forward Propagation

#### Para o input X0_1 (x1 do enunciado) -> _1 indica isso

In [197]:
Z1_1 = np.matmul(W1, X0_1) + b1
Z1_1

array([[5],
       [6],
       [5]])

In [198]:
X1_1 = activation_function(Z1_1)
X1_1

array([[0.46211716],
       [0.76159416],
       [0.46211716]])

In [199]:
Z2_1 = np.matmul(W2, X1_1) + b2
Z2_1

array([[4.97061094],
       [2.68582847]])

In [200]:
X2_1 = activation_function(Z2_1)
X2_1

array([[ 0.45048251],
       [-0.57642073]])

In [201]:
Z3_1 = np.matmul(W3, X2_1) + b3
Z3_1

array([[0.87406178],
       [1.77502679],
       [0.87406178]])

In [202]:
X3_1 = activation_function(Z3_1)
X3_1

array([[-0.91590016],
       [-0.80493961],
       [-0.91590016]])

#### Para o input X0_2 (x2 do enunciado) _2 indica isso

In [203]:
Z1_2 = np.matmul(W1, X0_2) + b1
Z1_2

array([[1],
       [1],
       [1]])

In [204]:
X1_2 = activation_function(Z1_2)
X1_2

array([[-0.90514825],
       [-0.90514825],
       [-0.90514825]])

In [205]:
Z2_2 = np.matmul(W2, X1_2) + b2
Z2_2

array([[-4.43088952],
       [-1.71544476]])

In [206]:
X2_2 = activation_function(Z2_2)
X2_2

array([[-0.99956404],
       [-0.99343227]])

In [207]:
Z3_2 = np.matmul(W3, X2_2) + b3
Z3_2

array([[-0.99299631],
       [-2.99212439],
       [-0.99299631]])

In [208]:
X3_2 = activation_function(Z3_2)
X3_2

array([[-0.98652085],
       [-0.9981635 ],
       [-0.98652085]])

## Backpropagation

#### derived activation function

In [209]:
def derived_actication_function(x):
    return (1 / (np.cosh(0.5 * x - 2) ** 2)) * 0.5

Cálculo de deltas

In [210]:
delta3_1 = np.multiply((X3_1 - t1), derived_actication_function(Z3_1))
delta3_1

array([[ 0.00677537],
       [-0.31773455],
       [ 0.00677537]])

In [211]:
delta3_2 = np.multiply((X3_2 - t2), derived_actication_function(Z3_2))
delta3_2

array([[-2.65961421e-02],
       [ 3.36962051e-06],
       [ 1.80462886e-04]])

In [212]:
delta2_1 = np.multiply(np.matmul(W3.T, delta3_1), derived_actication_function(Z2_1))
delta2_1

array([[-0.37448246],
       [-0.10155772]])

In [213]:
delta2_2 = np.multiply(np.matmul(W3.T, delta3_2), derived_actication_function(Z2_2))
delta2_2

array([[-1.15092599e-05],
       [-1.72899298e-04]])

In [214]:
delta1_1 = np.multiply(np.matmul(W2.T, delta2_1), derived_actication_function(Z1_1))
delta1_1

array([[-0.18719036],
       [-0.33587187],
       [-0.18719036]])

In [215]:
delta1_2 = np.multiply(np.matmul(W2.T, delta2_2), derived_actication_function(Z1_2))
delta1_2

array([[-1.66619254e-05],
       [-1.97816249e-05],
       [-1.66619254e-05]])

#### Updates dos valores

##### Nível 3

In [216]:
dE_dW3 = (delta3_1 * X2_1.T) + (delta3_2 * X2_2.T)
dE_dW3

array([[ 0.02963673,  0.022516  ],
       [-0.14313723,  0.18314544],
       [ 0.0028718 , -0.00408474]])

In [217]:
W3_new = W3 - (learning_rate * dE_dW3)
W3_new

array([[0.99703633, 0.9977484 ],
       [3.01431372, 0.98168546],
       [0.99971282, 1.00040847]])

In [218]:
dE_db3 = (delta3_1 + delta3_2)
dE_db3

array([[-0.01982077],
       [-0.31773118],
       [ 0.00695584]])

In [219]:
b3_new = b3 - (learning_rate * dE_db3)
b3_new

array([[1.00198208],
       [1.03177312],
       [0.99930442]])

##### Nível 2

In [220]:
dE_dW2 = (delta2_1 * X1_1.T) + (delta2_2 * X1_2.T)
dE_dW2

array([[-0.17304435, -0.28519324, -0.17304435],
       [-0.04677506, -0.07718926, -0.04677506]])

In [221]:
W2_new = W2 - (learning_rate * dE_dW2)
W2_new

array([[1.01730444, 4.02851932, 1.01730444],
       [1.00467751, 1.00771893, 1.00467751]])

In [222]:
dE_db2 = (delta2_1 + delta2_2)
dE_db2

array([[-0.37449397],
       [-0.10173062]])

In [223]:
b2_new = b2 - (learning_rate * dE_db2)
b2_new

array([[1.0374494 ],
       [1.01017306]])

##### Nível 1

In [224]:
dE_dW1 = (delta1_1 * X0_1.T) + (delta1_2 * X0_2.T)
dE_dW1

array([[-0.18720702, -0.18719036, -0.18719036, -0.1871737 ],
       [-0.33589165, -0.33587187, -0.33587187, -0.33585209],
       [-0.18720702, -0.18719036, -0.18719036, -0.1871737 ]])

In [225]:
W1_new = W1 - (learning_rate * dE_dW1)
W1_new

array([[1.0187207 , 1.01871904, 1.01871904, 1.01871737],
       [1.03358917, 1.03358719, 2.03358719, 1.03358521],
       [1.0187207 , 1.01871904, 1.01871904, 1.01871737]])

In [226]:
dE_db1 = (delta1_1 + delta1_2)
dE_db1

array([[-0.18720702],
       [-0.33589165],
       [-0.18720702]])

In [227]:
b1_new = b1 - (learning_rate * dE_db1)
b1_new

array([[1.0187207 ],
       [1.03358917],
       [1.0187207 ]])