## Training data

In [5]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [6]:
# create a dataset of 100 examples with single features and labels 
# we use 3 parameters for this

w1=3
w0=4
n=100

X=10*np.random.rand(n,)
# y=4+3x+noise  randomly added noise
y= w0+w1*X+np.random.rand(n,)

In [7]:
print(X.shape, y.shape)

(100,) (100,)


In [8]:
from sklearn.model_selection import train_test_split
xtrain,xtest,ytrain,ytest=train_test_split(X,y,test_size=0.2,random_state=42)

In [9]:
print(xtrain.shape,xtest.shape,ytrain.shape,ytest.shape)

(80,) (20,) (80,) (20,)


In [10]:
def add_dummy_features(x):
  return np.column_stack((np.ones(x.shape[0]),x))

In [11]:
add_dummy_features(X)

array([[1.        , 1.1561058 ],
       [1.        , 1.18527197],
       [1.        , 2.93063029],
       [1.        , 0.79696649],
       [1.        , 3.79729219],
       [1.        , 5.32895556],
       [1.        , 8.57232406],
       [1.        , 9.04544922],
       [1.        , 4.5312963 ],
       [1.        , 3.0272807 ],
       [1.        , 0.28886686],
       [1.        , 0.61745859],
       [1.        , 3.45721065],
       [1.        , 5.82963402],
       [1.        , 5.56109722],
       [1.        , 0.82976541],
       [1.        , 7.25049167],
       [1.        , 9.47109381],
       [1.        , 1.42511016],
       [1.        , 2.75961854],
       [1.        , 2.67011288],
       [1.        , 0.91525879],
       [1.        , 2.45537195],
       [1.        , 2.51867317],
       [1.        , 4.3294874 ],
       [1.        , 4.98909122],
       [1.        , 7.41808839],
       [1.        , 6.95976958],
       [1.        , 3.38168885],
       [1.        , 4.65052752],
       [1.

In [12]:
import unittest

class TestAddDummyFeature(unittest.TestCase):
  def test_add_dummy_feature(self):
    train_matrix=np.array([[3,2,5],[9,4,7]])
    train_matrix_with_dummy_feature=add_dummy_features(train_matrix)

    self.assertEqual(train_matrix_with_dummy_feature.shape,(2,4))

    np.testing.assert_array_equal(
        train_matrix_with_dummy_feature,
        np.array([[1,3,2,5],[1,9,4,7]])
    )

unittest.main(argv=[''],defaultTest='TestAddDummyFeature', verbosity=2,exit=False) 

test_add_dummy_feature (__main__.TestAddDummyFeature) ... ok

----------------------------------------------------------------------
Ran 1 test in 0.003s

OK


<unittest.main.TestProgram at 0x7fb3f44192d0>

In [13]:
add_dummy_features(np.array([[3,2],[5,4]]))

array([[1., 3., 2.],
       [1., 5., 4.]])

## Model

In [14]:
def predict(X,w):
  return X@w

In [15]:
import unittest

class TestPredict(unittest.TestCase):
  def test_predict(self):
    train_matrix=np.array([[1,3,2,5],[1,9,4,7]])
    weight_vector=np.array([1,1,1,1])
    expected_label_vector=np.array([11,21])

    predicted_label_vector=predict(train_matrix, weight_vector)

    self.assertEqual(predicted_label_vector.shape,(2,))

    np.testing.assert_array_equal(
        expected_label_vector, predicted_label_vector
    )

unittest.main(argv=[''],defaultTest='TestPredict', verbosity=2,exit=False) 

test_predict (__main__.TestPredict) ... ok

----------------------------------------------------------------------
Ran 1 test in 0.003s

OK


<unittest.main.TestProgram at 0x7fb3f4419250>

In [19]:
w=np.random.rand(4,)
w

array([0.78366549, 0.31745818, 0.73365041, 0.52066454])

In [20]:
xtrain=np.array([[1,3,2,5],[1,9,4,7]])
y_hat=predict(xtrain,w)

In [21]:
y_hat

array([ 5.80666357, 10.22004256])

In [22]:
def non_vectorized_predict(X,w):
  y=[]
  for i in range(0,X.shape[0]):
    y_hat_i=0
    for j in range(0, X.shape[1]):
      y_hat_i+=X[i][j]*w[j]
    y.append(y_hat_i)
  return np.array(y)

## Loss

In [23]:
def loss(X,y,w):
  e= predict(X,w)-y
  return (1/2)*(np.transpose(e)@e)

## Optimization

In [24]:
def normal_equation(X,y):
  return np.linalg.pinv(X)@y

In [25]:
def calculate_gradient(X,y,w):
  return np.transpose(X)@(predict(X,y)-y)

In [26]:
def update_weights(w,grad,lr):
  return w-lr*grad

In [27]:
def gradient_descent(X:np.ndarray, y:np.ndarray, lr:float, num_epochs:int):
  w_all=[]
  err_all=[]

  w=np.zeros((X.shape[1]))

  print()
  for i in np.arange(0,num_epochs):
    w_all.append(w)
    err_all.append(loss(X,y,w))
    dJdW=calculate_gradient(X,y,w)
    if(i%100)==0:
      print('iter num: ',i,'loss: ',err_all[-1])
    w=update_weights(w,dJdW,lr)
  return w, err_all,w_all

## Optimization and evaluation

In [29]:
# mbgd
t0,t1=200,100000
def learning_schedule(t):
  return t0/(t+t1)

In [30]:
def mini_batch(X,y,num_iter,minibatch_size):
  w_all=[]
  err_all=[]
  w=np.zeros(X.shape[1])
  t=0
  for epochs in range(num_iter):
    shuffled_indices=np.random.permutation(X.shape[0])
    X_shuffled=X[shuffled_indices]
    y_shuffled=y[shuffled_indices]
    for i in range(0,X.shape[0],minibatch_size):
      t+=1
      xi=X_shuffled[i:i+minibatch_size]
      yi=y_shuffled[i:i+minibatch_size]
      err_all.append(loss(xi,yi,w))
      gradients=2/minibatch_size*calculate_gradient(xi,yi,w)
      lr=learning_schedule(t)
      w=update_weights(w,gradients,lr)
      w_all.append(w)
  return w, err_all, w_all

In [31]:
def sgd(X,y,num_epochs):
  w_all=[]
  err_all=[]
  w=np.zeros(X.shape[1])
  for epochs in range(num_epochs):
    for i in range(X.shape[0]):
      random_index=np.random.randint(X.shape[0])
      xi=X[random_index:random_index+1]
      yi=y[random_index:random_index+1]
      err_all.append(loss(xi,yi,w))
      gradients=2*calculate_gradient(xi,yi,w)
      lr=learning_schedule(epochs*X.shape[0]+i)
      w=update_weights(w,gradients,lr)
      w_all.append(w)
  return w, err_all, w_all

## End to end colab

In [32]:
class LinReg(object):
  def __init__(self):
    self.t0=200
    self.t1=100000
  
  def predict(self,X):
    y=X @ self.w
    return y 
  
  def loss(self,X,y):
    e=y-self.predict(X)
    return (0.5)*(np.transpose(e)@e)

  def rmse(self,X,y):
    return np.sqrt((2/X.shape[0])*self.loss(X,y))
  
  def fit(self,X,y):
    self.w=np.linalg.pinv(X)@y
    return self.w
  
  def calculate_gradient(self,X,y):
    return np.transpose(X)@(self.predict(y)-y)
  
  def update_weights(self,grad,lr):
    return (self.w-lr*grad)

  def learning_schedule(self,t):
    return self.t0/t+self.t1
  
  def gd(self,X,y,num_epochs,lr):
    self.w=np.zeros(X.shape[1])
    self.w_all=[]
    self.err_all=[]
    for i in np.arange(0,num_epochs):
      dJdW=self.calculate_gradient(X,y)
      self.w_all.append(self.w)
      self.err_all.append(self.loss(X,y))
      self.w=self.update_weights(dJdW,lr)
    return self.w

  def mbgd(self,X,y,num_epochs,batch_size):
    self.w=np.zeros(X.shape[1])
    self.w_all=[]
    self.err_all=[]
    for epoch in range(num_epochs):
      for i in range(X.shape[0]):
        random_index=np.random.randint(X.shape[0])
        xi=X[random_index:random_index+1]
        yi=y[random_index:random_index+1]

      
        self.w_all.append(self.w)
        self.err_all.append(self.loss(xi,yi))
        gradients=2*self.calculate_gradient(xi,yi)
        lr=self.learning_schedule(epoch*X.shape[0]+i)

        self.w=self.update_weights(gradients,lr)
    return self.w