In [1]:
# Imports definition
import pandas as pd
from numpy.linalg import norm as vector_norm
import numpy as np
import math
from sklearn.linear_model import LinearRegression

In [2]:
train_var = {
    "y0": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],  # Does not belong in the table
    "y1": [1.0, 1.0, 0.0, 1.0, 2.0, 1.0, 2.0, 0.0],
    "y2": [1.0, 1.0, 2.0, 2.0, 0.0, 1.0, 0.0, 2.0],
    "y3": [0.0, 5.0, 4.0, 3.0, 7.0, 1.0, 2.0, 9.0],
    "output": [1.0, 3.0, 2.0, 0.0, 6.0, 4.0, 5.0, 7.0]
}

test_var = {
    "y0": [1.0, 1.0],  # Does not belong in the table
    "y1": [2.0, 0.0],
    "y2": [0.0, 2.0],
    "y3": [0.0, 1.0],
    "output": [2.0, 4.0]
}

df_train = pd.DataFrame.from_dict(train_var)
df_test = pd.DataFrame.from_dict(test_var)

In [3]:
# Applies basis function to dataset to normalize it's values
for index, row in df_train.iterrows():

    # Calculates vector norm
    vector = [row["y1"], row["y2"], row["y3"]]
    norm = vector_norm(vector)

    # Updates row's values according to basis function
    row["y1"] = norm ** 1
    row["y2"] = norm ** 2
    row["y3"] = norm ** 3

# Applies basis function to dataset to normalize it's values
for index, row in df_test.iterrows():

    # Calculates vector norm
    vector = [row["y1"], row["y2"], row["y3"]]
    norm = vector_norm(vector)

    # Updates row's values according to basis function
    row["y1"] = norm ** 1
    row["y2"] = norm ** 2
    row["y3"] = norm ** 3

print(df_train)

    y0        y1    y2          y3  output
0  1.0  1.414214   2.0    2.828427     1.0
1  1.0  5.196152  27.0  140.296115     3.0
2  1.0  4.472136  20.0   89.442719     2.0
3  1.0  3.741657  14.0   52.383203     0.0
4  1.0  7.280110  53.0  385.845824     6.0
5  1.0  1.732051   3.0    5.196152     4.0
6  1.0  2.828427   8.0   22.627417     5.0
7  1.0  9.219544  85.0  783.661279     7.0


In [4]:
# Gathers vars
X_train = df_train.drop("output", axis=1).to_numpy()
y_train = df_train["output"].to_numpy()

X_test = df_test.drop("output", axis=1).to_numpy()
y_test = df_test["output"].to_numpy()

In [5]:
# Creates a linear regression model (we wont need a polynomial because the data has been transformed) and trains it
reg = LinearRegression()
reg.fit(X_train, y_train)

# Predicts values and calculates RMSE
predictions = reg.predict(X_test)
print(f"predictions: {predictions}")
error = math.sqrt(np.square(np.subtract(y_test, predictions)).mean())
print(error)

predictions: [2.45360697 2.35072931]
1.2095150071840008


In [6]:
# Calculates W
print("X: \n", X_train)
print("Z: \n", y_train)
xt = np.matrix.transpose(X_train)
print("Xt: \n", xt)
m1 = np.matmul(xt, X_train)
print("Xt.X: \n", m1)
inverse = np.linalg.inv(m1)
print(f"(Xt.X)^-1: \n", inverse)
m2 = np.matmul(inverse, xt)
print(f"(Xt.X)^-1.Xt: \n", m2)
w = np.matmul(m2, y_train)
print(f"(Xt.X)^-1.Xt.Z: \n", w)

# Predicts y
print(X_test[0])
prod_1 = np.dot(w, X_test[0])
prod_2 = np.dot(w, X_test[1])
print(f"prod1: {prod_1} | prod2: {prod_2}")


X: 
 [[  1.           1.41421356   2.           2.82842712]
 [  1.           5.19615242  27.         140.29611541]
 [  1.           4.47213595  20.          89.4427191 ]
 [  1.           3.74165739  14.          52.38320341]
 [  1.           7.28010989  53.         385.84582413]
 [  1.           1.73205081   3.           5.19615242]
 [  1.           2.82842712   8.          22.627417  ]
 [  1.           9.21954446  85.         783.66127887]]
Z: 
 [1. 3. 2. 0. 6. 4. 5. 7.]
Xt: 
 [[  1.           1.           1.           1.           1.
    1.           1.           1.        ]
 [  1.41421356   5.19615242   4.47213595   3.74165739   7.28010989
    1.73205081   2.82842712   9.21954446]
 [  2.          27.          20.          14.          53.
    3.           8.          85.        ]
 [  2.82842712 140.29611541  89.4427191   52.38320341 385.84582413
    5.19615242  22.627417   783.66127887]]
Xt.X: 
 [[8.00000000e+00 3.58842916e+01 2.12000000e+02 1.48228114e+03]
 [3.58842916e+01 2.120000