# Implementation with Numpy

In [19]:
import numpy as np

In [20]:
# N is batch size; D_in is input dimension;
# H is hidden dimension; D_out is output dimension.
N, D_in, H, D_out = 64, 1000, 100, 10

In [21]:
# Create random input and output data
x = np.random.randn(N, D_in)
y = np.random.randn(N, D_out)

In [22]:
# Randomly initialize weights
w1 = np.random.randn(D_in, H)
w2 = np.random.randn(H, D_out)

In [23]:
learning_rate = 1e-6

In [26]:
for epoch in range(1000):
    # forward pass : compute y_hat
    h = np.dot(x, w1)
    h_relu = np.maximum(h, 0)
    y_pred = np.dot(h_relu, w2)
    
    # Compute loss:
    loss = np.sum(np.square(y_pred-y))
    print(epoch, loss)
    
    # Backpropogations
    #grad_y_pred = 2*np.sum(y-y_pred)
    #grad_w2 = 2*np.sum(y-y_pred)*h_relu
    #grad_h_relu = 2*np.sum(y-y_pred)*w2
    #grad_h = 2*np.sum(y-y_pred)*w2
    #grad_w1 = 2*np.sum(y-y_pred)*w2*x
    grad_y_pred = 2.0 * (y_pred - y)
    grad_w2 = h_relu.T.dot(grad_y_pred)
    grad_h_relu = grad_y_pred.dot(w2.T)
    grad_h = grad_h_relu.copy()
    grad_h[h < 0] = 0
    grad_w1 = x.T.dot(grad_h)
    
    #Update weights
    w1-=learning_rate*grad_w1
    w2-=learning_rate*grad_w2
    


0 218.91145758148312
1 205.72473023559695
2 193.36597134454877
3 181.78334704511732
4 170.92161261383495
5 160.7373566724646
6 151.18487460027205
7 142.2248609729156
8 133.816950097831
9 125.92650105252143
10 118.52142861208685
11 111.56742055060771
12 105.0398271533561
13 98.90903167366758
14 93.14927381849853
15 87.73795867125814
16 82.65297428523769
17 77.87384178474454
18 73.38330022859567
19 69.16051453449448
20 65.18945132149058
21 61.45503857267683
22 57.94317783767785
23 54.63902532326508
24 51.53087076870959
25 48.605000986206846
26 45.85171361723435
27 43.25982826686502
28 40.81964267146701
29 38.52136712753798
30 36.35840439468815
31 34.32025148851305
32 32.39990162688214
33 30.590738723589375
34 28.886417714778837
35 27.2799122024515
36 25.76557860408469
37 24.33860907389674
38 22.99260749209298
39 21.723281069489957
40 20.526610368036668
41 19.39779137276347
42 18.333182784300593
43 17.328456452288464
44 16.38055427596669
45 15.485770248875223
46 14.641298539325815
47 13.8

414 1.5600716577320134e-07
415 1.4873236491876945e-07
416 1.417990307107573e-07
417 1.3518571705092278e-07
418 1.2888218664316452e-07
419 1.2287162194817775e-07
420 1.1714355770123869e-07
421 1.1168300524041106e-07
422 1.0647984716943228e-07
423 1.0151792621674747e-07
424 9.678715988568053e-08
425 9.227662341235404e-08
426 8.797774221947618e-08
427 8.387864178768728e-08
428 7.997207776662505e-08
429 7.624801891641203e-08
430 7.26975392742715e-08
431 6.931267501346099e-08
432 6.608469080887832e-08
433 6.300774070295841e-08
434 6.007405674083752e-08
435 5.7279312863333884e-08
436 5.461295984921388e-08
437 5.207132336635833e-08
438 4.964794567051393e-08
439 4.7338113935173935e-08
440 4.513552643310914e-08
441 4.303629397021537e-08
442 4.103440243587467e-08
443 3.9125802774952765e-08
444 3.730591301226124e-08
445 3.5571455716773326e-08
446 3.3917497338536254e-08
447 3.2340742047360155e-08
448 3.0837366936129965e-08
449 2.9404200347450636e-08
450 2.8037193803422457e-08
451 2.673425897563397

850 1.9830744808788659e-16
851 1.8930752185904545e-16
852 1.8071599229756502e-16
853 1.72513317713215e-16
854 1.6468717979114874e-16
855 1.5721087218160917e-16
856 1.5007569440850075e-16
857 1.4326589328261915e-16
858 1.3676286594713934e-16
859 1.3056106010919763e-16
860 1.2463535739213107e-16
861 1.189768313358353e-16
862 1.135797366569906e-16
863 1.08427346849378e-16
864 1.0350797709833548e-16
865 9.881354033259912e-17
866 9.432795511261083e-17
867 9.004992363266991e-17
868 8.596423792975326e-17
869 8.20663392995047e-17
870 7.834328703707341e-17
871 7.478852924051436e-17
872 7.139926372654657e-17
873 6.815758334579308e-17
874 6.506807672666381e-17
875 6.211657903106564e-17
876 5.929863542375029e-17
877 5.661017998698058e-17
878 5.4040859463435256e-17
879 5.1590106374642454e-17
880 4.9251933396864964e-17
881 4.701854592605041e-17
882 4.4885615228427224e-17
883 4.2851638348612284e-17
884 4.0908225104685713e-17
885 3.9053886474347466e-17
886 3.7281686915100176e-17
887 3.559318060166347e

# Implementation with PyTorch

In [35]:
import torch

In [30]:
dtype = torch.float
device = torch.device("cpu")

In [42]:
N = 64
D_in = 1000
H = 100
D_out = 10

In [43]:
x = torch.randn(N, D_in, device=device, dtype=dtype)
y = torch.randn(N, D_out, device=device, dtype = dtype)

In [44]:
w1 = torch.randn(D_in, H, device=device, dtype=dtype, requires_grad=True)
w2 = torch.randn(H, D_out, device=device, dtype=dtype, requires_grad=True)

In [45]:
for epoch in range(5000):
    y_pred = x.mm(w1).clamp(min=0).mm(w2)
    loss = (y_pred-y).pow(2).sum()
    if epoch % 1000 == 0:
        print(epoch, loss.item())
    
    loss.backward()
    
    with torch.no_grad():
        w1 -= learning_rate * w1.grad
        w2 -= learning_rate * w2.grad
        
        w1.grad.zero_()
        w2.grad.zero_()

0 43923776.0
1000 6.3058432715479285e-06
2000 1.5636321677447995e-06
3000 8.388394689973211e-07
4000 5.554211384151131e-07


In [47]:
loss.item()

3.969978479290148e-07

# Using nn module - Functional 

In [110]:
from torch.nn import Linear
from torch.nn.functional import relu

In [117]:
class LinearRegression(torch.nn.Module):
    def __init__(self, D_in, H, D_out):
        super(LinearRegression, self).__init__()
        self.h1 = Linear(in_features =D_in,
                         out_features = H)
        self.h2 = Linear(in_features = H,
                             out_features = D_out)
    def forward(self, x):
        h_1 = self.h1(x)
        h_1_relu = relu(h_1)
        y_pred = self.h2(h_1_relu)
        return y_pred

In [118]:
x = torch.from_numpy(np.random.randn(64, 1000))
y = torch.from_numpy(np.random.randn(64, 10))

In [119]:
model = LinearRegression(D_in, H, D_out)

In [120]:
criterion = torch.nn.MSELoss(reduction="sum")
optimizer = torch.optim.SGD(model.parameters(), lr = 1e-4)

In [121]:
for epoch in range(1000):
    y_pred = model(x.float())
    loss = criterion(y_pred, y.float())
    if epoch%100 ==0 :
        print(epoch, loss.item())
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    

0 661.2566528320312
100 1.8130842447280884
200 0.022029289975762367
300 0.0005905301659367979
400 2.012088953051716e-05
500 7.433949349433533e-07
600 2.981779090305281e-08
700 3.0761535541046214e-09
800 1.0760587976221814e-09
900 5.784445700207641e-10


# Using nn module - Sequential API

In [145]:
from torch import nn
from torch import functional as F

In [146]:
model = nn.Sequential(
            nn.Linear(D_in, H),
            nn.ReLU(),
            nn.Linear(H, D_out)
            )

In [147]:
criterion = nn.MSELoss(reduce="sum")
optimizer = torch.optim.SGD(params = model.parameters(), lr=1e-4)



In [148]:
for epoch in range(1000):
    y_pred = model(x.float())
    loss = criterion(y_pred, y.float())
    optimizer.zero_grad()
    
    loss.backward()
    optimizer.step()
    if epoch%100 == 0:
        print(epoch, loss.item())

0 1.0447428226470947
100 1.033126711845398
200 1.0217097997665405
300 1.0105340480804443
400 0.9995964169502258
500 0.9888695478439331
600 0.9784125089645386
700 0.9681406021118164
800 0.9580532312393188
900 0.9481567144393921
