<a href="https://colab.research.google.com/github/Mark12481632/Intro_ML/blob/main/Intro_ML.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [10]:
# Imports located here
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

In [71]:
# Read in the data file
url = "https://raw.githubusercontent.com/Mark12481632/Intro_ML/refs/heads/main/datasets/lin_reg_dataset_1.csv"
df = pd.read_csv(url)

# Set up names of columns and target appropraitely
df.columns = ['attr_1', 'attr_2', "attr_3", "attr_4", "result"]

In [72]:
X = df[['attr_1', 'attr_2', "attr_3", "attr_4"]]
y = df["result"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

model = LinearRegression()
model.fit(X_train, y_train)

predictions = model.predict(X_test)

mse = mean_squared_error(y_test, predictions)
r2 = r2_score(y_test, predictions)

print("MSE:", mse)
print("R² score:", r2)

MSE: 96.36438521152955
R² score: 0.9891123673256726


In [73]:
comparison = pd.DataFrame({"Actual": y_test.values, "Predicted": predictions})
print(comparison.head())

       Actual   Predicted
0  -71.507715  -82.643661
1    3.727841   -1.099662
2  -50.513493  -69.315145
3 -123.858728 -112.646463
4  -24.699426  -37.978642


In [75]:
# Let's do SVD version:
X_train_aug = np.c_[np.ones(X_train.shape[0]), X_train]
X_test_aug = np.c_[np.ones(X_test.shape[0]), X_test]

# Lets SVD the matrix X:
#  U: left singular vectors
#  S: singular values (as 1D array)
#  VT: transpose of right singular vectors
U, S, VT = np.linalg.svd(X_train_aug, full_matrices=False)
S = np.diag(S)

# can reconstruct with:
# Sigma = np.zeros((U.shape[0], VT.shape[0]))
# np.fill_diagonal(Sigma, S)
# X_reconstructed = U @ Sigma @ VT

# Calc beta using these SVD values:
beta = VT.transpose() @ np.linalg.inv(S) @ U.transpose() @ y_train
print(beta)

predictions_svd = X_test_aug @ beta
pd.DataFrame({"Actual": y_test.values, "Predicted": predictions})

# Results (as far as I can see) identical to above model.
# predictions - predictions_svd

[ 0.4988758  59.37336842  2.50601202 31.90779211 83.10631424]


array([-1.13686838e-13, -1.63424829e-13, -1.42108547e-14, -3.26849658e-13,
       -1.84741111e-13, -2.84217094e-14,  2.55795385e-13,  0.00000000e+00,
       -1.13686838e-13, -5.68434189e-14, -2.84217094e-13,  1.27897692e-13,
       -7.10542736e-14, -2.84217094e-13, -2.84217094e-14, -1.13686838e-13,
       -1.42108547e-14,  1.42108547e-14,  6.39488462e-14,  1.13686838e-13,
        2.34479103e-13, -2.84217094e-14, -2.13162821e-14, -4.26325641e-14,
       -1.13686838e-13,  2.41584530e-13,  1.20792265e-13, -2.13162821e-13,
        0.00000000e+00,  1.13686838e-13,  1.56319402e-13, -1.13686838e-13,
        1.70530257e-13])