# Gradient Descent Exercises

In [327]:
import pandas as pd
import numpy as np

import seaborn as sns
import matplotlib.pyplot as plt

---
## E1.0) Simulate Dataset

- set random seed to 42
- (1000,2) samples from $X \sim \mathcal{U}(0,1)$ , i.e. 1000 rows, 2 columns. 
- 1000 samples from $\epsilon \sim \mathcal{N}(0,1)$
- $y = 3x_1 + 5x_2 + 3 + \epsilon$ , where $x_i$ is column $i$ of $X$

Finally add a column of ones for the intercept to $X$.

In [328]:
def simulate_dataset(samples, features, beta_values):
    np.random.seed(42) # random seed for reproducability

    # generating random X values based on samples and features
    X = np.random.uniform(0, 1, (samples, features))

    # column vector of ones for intercept
    ones = np.ones(len(X))

     # adding column of ones to feature matrix
    X = np.c_[ones, X]

    # generating random noise values
    epsilon = np.random.normal(0, 1, samples)

    # calculating y based on X dot multiplied with beta vector (note beta_0 multiplied by ones from the added column)
    # adding epsilon for some random variance
    y = np.dot(X, beta_values) + epsilon
    
    return X, y

In [329]:
X, y = simulate_dataset(samples = 1000, features = 2, beta_values = [3, 3, 5])

print(f"X:\nShape: {X.shape}\n{X[:3]}\n")
print(f"y:\nShape: {y.shape}\n{y[:3]}")

X:
Shape: (1000, 3)
[[1.         0.37454012 0.95071431]
 [1.         0.73199394 0.59865848]
 [1.         0.15601864 0.15599452]]

y:
Shape: (1000,)
[7.9992093  7.36239389 4.02154963]


In [330]:
X.shape[1], 1

(3, 1)

In [331]:
np.random.randn(X.shape[1], 1)

array([[ 1.87096518],
       [ 0.38961418],
       [-0.86829273]])

---
## Train | Test Split

In [332]:
def train_test_split(X, y, train_fraction = 0.3, replace = False):
    
    # amount of indices to pick for training data
    n = int(len(X) * train_fraction)

    # n random indices between 0 and len(X) to use as training data
    indices = np.random.choice(len(X), size = n, replace = replace)

    # picking out indices as train, and all other indices as test
    X_train = X[indices]
    X_test = X[np.in1d(np.arange(len(X)), indices, invert = True)]

    # NOTE: reshaping y-vectors to (..., 1) instead of (..., ) to be able to use in coming calculations
    y_train = y[indices].reshape(-1, 1)
    y_test = y[np.in1d(np.arange(len(y)), indices, invert = True)].reshape(-1, 1)

    return X_train, X_test, y_train, y_test

In [333]:
X_train, X_test, y_train, y_test = train_test_split(X, y)

# checking that dimensions look correct
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((300, 3), (700, 3), (300, 1), (700, 1))

---
## E1.1 A) Gradient Descent

In [336]:
def gradient_descent(X, y, learning_rate = 0.1, epochs = 500):
    m = len(X)

    # array of random values with equal amount of rows as X has columns (X.shape[1]), and 1 column
    # for example X (1000, 3) -> theta (3, 1)
    theta = np.random.randn(X.shape[1], 1)

    lista = [[] for _ in range(len(X.T))]

    # looping over epochs
    for i in range(epochs):


        # calculating gradient
        gradient = 2 / m * X.T @ (X @ theta - y)

        # updating theta based on learning rate and gradient
        theta -= learning_rate * gradient

        if i % 5 == 0:
            for i, _ in enumerate(theta):
                lista[i].append(theta[i])
        
    return theta, lista

In [335]:
epochs = 500

# getting theta estimations based on training data
theta_hat, theta_list = gradient_descent(X_train, y_train, epochs = epochs)

theta_hat

AttributeError: 'numpy.ndarray' object has no attribute 'value'

In [None]:
len(theta_list[2]), theta_list[0][:3]

(100, [array([3.13843901]), array([3.13843901]), array([3.13843901])])

### Plotting

In [None]:
sns.lineplot(x = range(1, 500, 5), y = theta_list[0].values)

AttributeError: 'list' object has no attribute 'values'

In [None]:
# defining prediction function
predict = lambda x, theta: np.dot(x, theta)

# predicting y-values based on X_test
y_pred = predict(X_test, theta_hat)

y_pred.shape, y_pred[:3]

((700, 1),
 array([[8.83920082],
        [8.22565364],
        [4.3592411 ]]))

In [None]:
# defining functions to calculate MAE, MSE, RMSE
mean_absolute_error = lambda y_test, y_pred: np.sum(np.abs(y_test - y_pred)) / len(y_test)
mean_squared_error = lambda y_test, y_pred: np.sum((y_test - y_pred) ** 2) / len(y_test)
root_mean_squared_error = lambda y_test, y_pred: np.sqrt(mean_squared_error(y_test, y_pred))

print(f"MAE: {mean_absolute_error(y_test, y_pred)}")
print(f"MSE: {mean_squared_error(y_test, y_pred)}")
print(f"RMSE: {root_mean_squared_error(y_test, y_pred)}")

MAE: 0.7980729703106827
MSE: 1.0154914305799596
RMSE: 1.0077159473680863
