# Linear Regression Warmup

In [None]:
import torch
import numpy as np
import warnings
warnings.filterwarnings('ignore')

# Y is just a linear combination of X
x = torch.rand((100, 10))
coefficients = torch.rand(10)
y = torch.mv(x, coefficients)

weights = torch.rand(10)
regularization = .01
learning_rate = .001

for epoch in range(10):
  print(f'EPOCH: {epoch} --------------------------------------------------------')
  losses = []
  for i in range(len(x)):
    pred = torch.dot(weights, x[i].T)
    loss = (y[i] - pred)**2
    loss_grad = 2 * (pred - y[i])

    gradient = (loss_grad * x[i]) + (regularization * weights)
    weights = weights - learning_rate * gradient
    losses.append(loss)
  print(np.mean(losses))

EPOCH: 0 --------------------------------------------------------
0.567188
EPOCH: 1 --------------------------------------------------------
0.270656
EPOCH: 2 --------------------------------------------------------
0.1575074
EPOCH: 3 --------------------------------------------------------
0.11319754
EPOCH: 4 --------------------------------------------------------
0.094761156
EPOCH: 5 --------------------------------------------------------
0.08608443
EPOCH: 6 --------------------------------------------------------
0.08112461
EPOCH: 7 --------------------------------------------------------
0.07761602
EPOCH: 8 --------------------------------------------------------
0.074708775
EPOCH: 9 --------------------------------------------------------
0.07208331


# Hard Coded Neural Network

In [None]:
# Generate non linear training data for neural networks
import torch
import numpy as np
from sklearn.model_selection import train_test_split

x = torch.rand((10000, 10))

linear_coef = np.random.rand(10)
quad_coef = np.random.rand(10)
exp_coef = np.random.rand(10)
sin_coef = np.random.rand(10)
y = (
    np.dot(linear_coef, x.T) +
    np.dot(quad_coef, x.T**2) +
    np.dot(exp_coef, np.exp(x.T)) +
    np.dot(sin_coef, np.sin(x.T))
)
noise = np.random.normal(0, 0.1, len(x))
y += noise
y = torch.tensor(y, dtype=torch.float).unsqueeze(1)
y -= y.min()
y /= y.max()

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.33)

In [None]:
class NeuralNetwork:
  def __init__(self):
    self.layer1 = torch.rand((10, 10))
    self.layer2 = torch.rand((10, 20))
    self.layer3 = torch.rand((20, 1))

  def sigmoid(self, x):
      return 1 / (1 + np.exp(-x))

  def sigmoid_derivative(self, x):
      return x * (1 - x)

  def forward(self, x):
    layer1_out = self.sigmoid(x @ self.layer1)
    layer2_out = self.sigmoid(layer1_out @ self.layer2)
    pred = layer2_out @ self.layer3
    return pred

  def train(self, x, y):
    # Minibatch gradient descent
    losses = []
    batch_size = 100
    for i in range(0, len(x), batch_size):
      x_batch, y_batch = x[i:i+batch_size], y[i:i+batch_size]

      layer1_out = self.sigmoid(x_batch @ self.layer1) #(N, 10) @ (10, 10) = (N, 10)
      layer2_out = self.sigmoid(layer1_out @ self.layer2) #(N, 10) @ (10, 20) = (N, 20)
      pred = layer2_out @ self.layer3 #(N, 20) @ (20, 1) = (N, 1)

      loss = (y_batch - pred)**2 # N, 1
      loss_grad = 2 * (pred - y_batch)/len(y_batch) # N, 1

      grad_layer3 = self.sigmoid_derivative(layer2_out).T @ loss_grad + regularization * self.layer3 # (20, N) @ (N, 1) + (20, 1) = (20, 1)
      loss_layer2 = loss_grad @ self.layer3.T  # (N, 1) @ (1, 20) = (N, 20)
      grad_layer2 = self.sigmoid_derivative(layer1_out).T @ loss_layer2 + regularization * self.layer2 # 10, N @ N, 20 = 10, 20
      loss_layer1 = loss_layer2 @ self.layer2.T  # (N, 20) @ (20, 10) = (N, 10)
      grad_layer1 = x_batch.T @ loss_layer1 + regularization * self.layer1 # 10, N @ N, 10 = 10, 10

      self.layer3 = self.layer3 - learning_rate * grad_layer3
      self.layer2 = self.layer2 - learning_rate * grad_layer2
      self.layer1 = self.layer1 - learning_rate * grad_layer1

      losses.append(loss)

    return losses

model = NeuralNetwork()
regularization = .01
learning_rate = .001

for epoch in range(10):
  print(f'EPOCH: {epoch}')

  train_losses = model.train(x_train, y_train)
  y_pred = model.forward(x_test)
  test_losses = (y_test - y_pred)**2

  print(f'Train loss: {np.mean(train_losses)}, test loss: {torch.mean(test_losses)} \n')

EPOCH: 0
Train loss: 39.1106071472168, test loss: 13.47390079498291 

EPOCH: 1
Train loss: 9.59207534790039, test loss: 6.5740485191345215 

EPOCH: 2
Train loss: 4.807140827178955, test loss: 3.342585802078247 

EPOCH: 3
Train loss: 2.4535675048828125, test loss: 1.7106475830078125 

EPOCH: 4
Train loss: 1.2598650455474854, test loss: 0.8809639811515808 

EPOCH: 5
Train loss: 0.652201771736145, test loss: 0.45865124464035034 

EPOCH: 6
Train loss: 0.34260228276252747, test loss: 0.24367833137512207 

EPOCH: 7
Train loss: 0.18483026325702667, test loss: 0.13429103791713715 

EPOCH: 8
Train loss: 0.1044313982129097, test loss: 0.07866998016834259 

EPOCH: 9
Train loss: 0.06346756964921951, test loss: 0.050418466329574585 



# General Solution (any combination of layers)

In [453]:
class NeuralNetwork:
  def __init__(self, layers, loss_func, loss_func_deriv, batch_size=100):
    self.layers = layers
    self.loss_func = loss_func
    self.loss_func_deriv = loss_func_deriv
    self.batch_size = batch_size

  def forward(self, x):
    out = x
    for layer in self.layers:
      out = layer.forward(out)
    return out

  def train(self, x, y):
    losses = []
    for i in range(0, len(x), self.batch_size):
      x_batch, y_batch = x[i:i+self.batch_size], y[i:i+self.batch_size]

      y_pred = self.forward(x_batch)
      loss = self.loss_func(y_batch, y_pred)
      losses.append(torch.mean(loss).squeeze())

      loss_grad = self.loss_func_deriv(y_batch, y_pred)
      for layer in reversed(self.layers):
        loss_grad = layer.backpropagate(loss_grad)

    return losses

class FF_Layer:
  def __init__(self, inp_shape, out_shape):
    self.weights = torch.rand((inp_shape, out_shape)) - 0.5
    self.bias = torch.rand((1, out_shape))

  def forward(self, x):
    self.input = x
    self.output = self.input @ self.weights + self.bias # (n, inp_shape) @ (inp_shape, out_shape) = (n, out_shape)
    return self.output

  def backpropagate(self, loss):
    gradient = self.input.T @ loss + regularization * self.weights # (inp_shape, n) @ (n, out_shape) = (inp_shape, out_shape)
    self.weights -= learning_rate * gradient
    self.bias -= learning_rate * (torch.mean(loss, axis=0) * self.bias)
    return loss @ self.weights.T # (n, out_shape) @ (out_shape, inp_shape) = (n, inp_shape)

class Sigmoid_Activation:
  def forward(self, x):
    self.input = x
    self.output = 1 / (1 + np.exp(-x))
    return self.output

  def backpropagate(self, loss):
    return loss * (self.output * (1 - self.output))

class Relu_Activation:
  def forward(self, x):
    self.input = x
    self.output = torch.maximum(x, torch.tensor(0.0))
    return self.output

  def backpropagate(self, loss):
    return loss * (self.output > 0).float()

def mean_squared_error(y_true, y_pred):
  return (y_true - y_pred)**2

def mean_squared_error_deriv(y_true, y_pred):
  return 2 * (y_pred - y_true)/len(y_true)

model = NeuralNetwork([
    FF_Layer(10, 20),
    Sigmoid_Activation(),
    FF_Layer(20, 50),
    Relu_Activation(),
    FF_Layer(50, 10),
    Relu_Activation(),
    FF_Layer(10, 1),
], mean_squared_error, mean_squared_error_deriv)

regularization = .01
learning_rate = .0001

for epoch in range(10):
  print(f'EPOCH: {epoch}')

  train_losses = model.train(x_train, y_train)
  y_pred = model.forward(x_test)
  test_losses = (y_test - y_pred)**2

  print(f'Train loss: {np.mean(train_losses)}, test loss: {torch.mean(test_losses)} \n')

EPOCH: 0
Train loss: 0.3687610328197479, test loss: 0.23215410113334656 

EPOCH: 1
Train loss: 0.1660536229610443, test loss: 0.11483999341726303 

EPOCH: 2
Train loss: 0.08878140151500702, test loss: 0.06913238018751144 

EPOCH: 3
Train loss: 0.058044079691171646, test loss: 0.05080677941441536 

EPOCH: 4
Train loss: 0.045444831252098083, test loss: 0.04326576367020607 

EPOCH: 5
Train loss: 0.04011203348636627, test loss: 0.04006524011492729 

EPOCH: 6
Train loss: 0.03775382414460182, test loss: 0.038642123341560364 

EPOCH: 7
Train loss: 0.036650799214839935, test loss: 0.037969764322042465 

EPOCH: 8
Train loss: 0.036090053617954254, test loss: 0.0376136414706707 

EPOCH: 9
Train loss: 0.035769056528806686, test loss: 0.03739610314369202 

