In [None]:
import torch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from numba import jit
from tqdm.auto import tqdm

torch.set_default_device('cuda:0')

In [None]:
df = pd.read_csv('Student_Performance.csv')
df.head()

In [None]:
def z_score_normalize(a):
    mean = np.mean(a, axis=0)
    std = np.std(a, axis=0)
    return (a - mean) / std

In [None]:
df_np = z_score_normalize(df.to_numpy())

X = df_np[:, :-1]
y = df_np[:, -1]

In [None]:
fig, ax = plt.subplots(2, 3, figsize=(20, 10))
ax = ax.flatten()

for i, col in enumerate(df.columns[:-1]):
    ax[i].set_xlabel(col)
    ax[i].set_ylabel(df.columns[-1])
    ax[i].scatter(X[:, i], y, s=7)

fig.delaxes(ax[-1])

In [None]:
msk = np.random.permutation(X.shape[0])
m = torch.tensor(round(0.8 * X.shape[0]))
n = X.shape[1]

X = torch.as_tensor(X).to(dtype=torch.float32) # A copy is made, since it sits on gpu.
y = torch.as_tensor(y).to(dtype=torch.float32)

X_train = X[msk[:m]]
y_train = y[msk[:m]]

X_test = X[msk[m:]]
y_test = y[msk[m:]]

In [None]:
@torch.jit.script
def compute_gradient(X, y, w, b):
    m = X.shape[0]
    y_hat = w @ X.transpose(0, 1) + b
    error = y_hat - y

    dj_dw = (1 / m) * (error @ X)
    dj_db = (1 / m) * torch.sum(error)

    return dj_dw, dj_db

In [None]:
@torch.jit.script
def compute_cost(X, y, w, b):
    m = X.shape[0]
    y_hat = w @ X.transpose(0, 1) + b
    J = (1 / (2 * m)) * torch.sum((y_hat - y) ** 2)
    return J

In [None]:
def gradient_descent(X, y, w, b, num_iters, alpha):
    J_hist = []
    
    for i in tqdm(range(num_iters)):
        dj_dw, dj_db = compute_gradient(X, y, w, b)
        w -= alpha * dj_dw
        b -= alpha * dj_db

        J_hist.append(compute_cost(X, y, w, b))
        if i % (num_iters // 10) == 0:
            print(f'Iteration = {i:6}, Cost = {J_hist[-1]:.8f}')

    return w, b, J_hist

In [None]:
w = torch.zeros(n)
b = torch.tensor(0, dtype=torch.float32)
num_iters = 1000000
alpha = 0.001

w, b, J = gradient_descent(X_train, y_train, w, b, num_iters, alpha)

In [None]:
fig, ax = plt.subplots()
ax.plot(np.arange(0, len(J)), J)

In [None]:
y_pred = np.dot(w, X_test.transpose())

In [None]:
fig, ax = plt.subplots(2, 3, figsize=(20, 10))
ax = ax.flatten()

for i, col in enumerate(df.columns[:-1]):
    ax[i].set_xlabel(col)
    ax[i].set_ylabel(df.columns[-1])
    ax[i].scatter(X_test[:, i], y_test, s=7)
    ax[i].scatter(X_test[:, i], y_pred, s=4)

fig.delaxes(ax[-1])

In [None]:
def r2_score(y_test, y_pred):
    RSS = np.sum((y_test - y_pred) ** 2)
    TSS = np.sum((y_test - y_pred.mean()) ** 2)
    return 1 - RSS / TSS
    
print(f'r2_score = {r2_score(y_test, y_pred):.4f}')

In [None]:
a = torch.tensor([[1, 2], [3, 4]])
a = a.transpose(0, 1)
a