In [54]:
import torch
import numpy as np
import torch.nn as nn

In [55]:
torch.cuda.is_available()

False

Tensor Basics

In [56]:
# Tensor Basics
x = torch.empty(3) # vector of 3
y = torch.empty(2,2) # 2d vector
# print (x, y)
# print (x.size())

In [57]:
x = torch.rand(2,2)
# torch.zeros(2)
# torch.ones(1,1)

In [58]:
print(x[1,1])
print(x[1,1].item())

tensor(0.3658)
0.3658078908920288


In [59]:
# pytorch to numpy
a = torch.ones(5)
print(a)
b = a.numpy()
print(b)
a.add_(1)
print(a)
print(b) #if a changes, b also changes

tensor([1., 1., 1., 1., 1.])
[1. 1. 1. 1. 1.]
tensor([2., 2., 2., 2., 2.])
[2. 2. 2. 2. 2.]


In [60]:
a = np.ones(5)
print(a)
b = torch.from_numpy(a)
print(b)
a = a+1
print(a)
print(b) #here b doesnt change as no gpu

[1. 1. 1. 1. 1.]
tensor([1., 1., 1., 1., 1.], dtype=torch.float64)
[2. 2. 2. 2. 2.]
tensor([1., 1., 1., 1., 1.], dtype=torch.float64)


Autograd

In [61]:
x = torch.ones(5, requires_grad=True)  #by default is set to false, this means that we will have to optimize this variable and
                                       #gradient needs to be calculated later
print(x)

tensor([1., 1., 1., 1., 1.], requires_grad=True)


In [62]:
y = x+2
print(y)

tensor([3., 3., 3., 3., 3.], grad_fn=<AddBackward0>)


In [63]:
z = y*y*2
print(z)
z = z.mean()
print(z)

tensor([18., 18., 18., 18., 18.], grad_fn=<MulBackward0>)
tensor(18., grad_fn=<MeanBackward0>)


In [64]:
z.backward()
print(x.grad)

tensor([2.4000, 2.4000, 2.4000, 2.4000, 2.4000])


In [65]:
weights = torch.ones(4, requires_grad=True)
for epoch in range(2):
    model_out = (weights*3).sum()
    model_out.backward()
    print(weights.grad)
    weights.grad.zero_()

tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])


In [66]:
#Back propogation

x = torch.tensor(2.0)
y = torch.tensor(5.0)

w = torch.tensor(1.0, requires_grad=True)

y_hat = w*x
loss = (y_hat - y)**2

In [67]:
loss.backward()
print(w.grad)

tensor(-12.)


In [68]:
#Gradient Descent from scratch

import numpy as np

# y = 2*x
X = np.array([1,2,4,7], dtype=np.float32)
Y = np.array([2,4,8,14], dtype=np.float32)

w = 0

#model prediction
def forward(x):
    return w*x

#loss
def loss(y, y_predicted):
    return np.mean((y_predicted - y)**2)

#gradient
# L = 1/N * ((w*x - y)**2)
# dL/dw = 1/N * (2*(w*x - y) * x)
def gradient(x, y, y_predicted):
    return -np.dot(2*x, y-y_predicted).mean()

print(f"Prediction before training: f(5) = {forward(5):.3f}")

#Training
learning_rate = 0.01
n_iters = 20

for epoch in range(n_iters):
    y_predicted = forward(X)

    #loss
    l = loss(Y, y_predicted)

    #gradient
    dw = gradient(X, Y, y_predicted)

    #learning rate
    w -= learning_rate*dw

    if epoch%1 == 0:
        print(f'epoch {epoch+1}: w = {w:.3f}, loss = {l:.3f}')

print(f"Prediction after training: f(5) = {forward(5):.3f}")

Prediction before training: f(5) = 0.000
epoch 1: w = 2.800, loss = 70.000
epoch 2: w = 1.680, loss = 11.200
epoch 3: w = 2.128, loss = 1.792
epoch 4: w = 1.949, loss = 0.287
epoch 5: w = 2.020, loss = 0.046
epoch 6: w = 1.992, loss = 0.007
epoch 7: w = 2.003, loss = 0.001
epoch 8: w = 1.999, loss = 0.000
epoch 9: w = 2.001, loss = 0.000
epoch 10: w = 2.000, loss = 0.000
epoch 11: w = 2.000, loss = 0.000
epoch 12: w = 2.000, loss = 0.000
epoch 13: w = 2.000, loss = 0.000
epoch 14: w = 2.000, loss = 0.000
epoch 15: w = 2.000, loss = 0.000
epoch 16: w = 2.000, loss = 0.000
epoch 17: w = 2.000, loss = 0.000
epoch 18: w = 2.000, loss = 0.000
epoch 19: w = 2.000, loss = 0.000
epoch 20: w = 2.000, loss = 0.000
Prediction after training: f(5) = 10.000


In [69]:
#Gradient Descent using torch

# y = 2*x
X = torch.tensor([1,2,4,7], dtype=torch.float32)
Y = torch.tensor([2,4,8,14], dtype=torch.float32)

w = torch.tensor(0.0, requires_grad=True)

#model prediction
def forward(x):
    return w*x

#loss
def loss(y, y_predicted):
    return ((y_predicted - y)**2).mean()

#gradient
# L = 1/N * ((w*x - y)**2)
# dL/dw = 1/N * (2*(w*x - y) * x)
def gradient(l, weights):
    l.backward()
    return weights.grad

print(f"Prediction before training: f(5) = {forward(5):.3f}")

#Training
learning_rate = 0.01
n_iters = 30

for epoch in range(n_iters):
    y_predicted = forward(X)

    #loss
    l = loss(Y, y_predicted)

    #gradient
    dw = gradient(l, w)

    #learning rate
    with torch.no_grad():
        w -= learning_rate*dw
 
    # zero the gradients
    w.grad.zero_()

    if epoch%3 == 0:
        print(f'epoch {epoch+1}: w = {w:.3f}, loss = {l:.3f}')

print(f"Prediction after training: f(5) = {forward(5):.3f}")

Prediction before training: f(5) = 0.000
epoch 1: w = 0.700, loss = 70.000
epoch 4: w = 1.643, loss = 5.279
epoch 7: w = 1.902, loss = 0.398
epoch 10: w = 1.973, loss = 0.030
epoch 13: w = 1.993, loss = 0.002
epoch 16: w = 1.998, loss = 0.000
epoch 19: w = 1.999, loss = 0.000
epoch 22: w = 2.000, loss = 0.000
epoch 25: w = 2.000, loss = 0.000
epoch 28: w = 2.000, loss = 0.000
Prediction after training: f(5) = 10.000


1. Design model - input, output size, forward pass
2. Construct loss and the optimizer
3. Training loop 
    - forward pass: Compute prediction
    - backward pass and gradients
    - update weights

In [70]:
# y = 2*x
X = torch.tensor([1,2,4,7], dtype=torch.float32)
Y = torch.tensor([2,4,8,14], dtype=torch.float32)

w = torch.tensor(0.0, requires_grad=True)
learning_rate = 0.01

#model prediction
def forward(x):
    return w*x

#loss
loss = nn.MSELoss()

optimizer = torch.optim.SGD([w], learning_rate)
#gradient
# L = 1/N * ((w*x - y)**2)
# dL/dw = 1/N * (2*(w*x - y) * x)
def gradient(l, weights):
    l.backward()
    return weights.grad

print(f"Prediction before training: f(5) = {forward(5):.3f}")

#Training
n_iters = 30

for epoch in range(n_iters):
    y_predicted = forward(X)

    #loss
    l = loss(Y, y_predicted)

    #gradient
    dw = gradient(l, w)

    optimizer.step()
    optimizer.zero_grad()

    if epoch%3 == 0:
        print(f'epoch {epoch+1}: w = {w:.3f}, loss = {l:.3f}')

print(f"Prediction after training: f(5) = {forward(5):.3f}")

Prediction before training: f(5) = 0.000
epoch 1: w = 0.700, loss = 70.000
epoch 4: w = 1.643, loss = 5.279
epoch 7: w = 1.902, loss = 0.398
epoch 10: w = 1.973, loss = 0.030
epoch 13: w = 1.993, loss = 0.002
epoch 16: w = 1.998, loss = 0.000
epoch 19: w = 1.999, loss = 0.000
epoch 22: w = 2.000, loss = 0.000
epoch 25: w = 2.000, loss = 0.000
epoch 28: w = 2.000, loss = 0.000
Prediction after training: f(5) = 10.000


In [71]:
# only one layer in the linear regression model
# which is also defined by pytorch - model = nn.Linear()

import torch.nn as nn

# y = 2*x
# pytorch model forward needs 2d - each row is a sample and each column is a feature
X = torch.tensor([[1],[2],[4],[7]], dtype=torch.float32)
Y = torch.tensor([[2],[4],[8],[14]], dtype=torch.float32)

X_test = torch.tensor([5], dtype=torch.float32)

learning_rate = 0.01

#model prediction
input_size = X.shape[1]
output_size = Y.shape[1]
model = nn.Linear(input_size, output_size)

#loss
loss = nn.MSELoss()

optimizer = torch.optim.SGD(model.parameters(), learning_rate)
#gradient
# L = 1/N * ((w*x - y)**2)
# dL/dw = 1/N * (2*(w*x - y) * x)
def gradient(l, weights):
    l.backward()
    return weights.grad

print(f"Prediction before training: f(5) = {model(X_test).item():.3f}")

#Training
n_iters = 600

for epoch in range(n_iters):
    y_predicted = model(X)

    #loss
    l = loss(Y, y_predicted)

    #gradient
    dw = gradient(l, w)

    optimizer.step()
    optimizer.zero_grad()

    if epoch%60 == 0:
        [w, b] = model.parameters()
        print(f'epoch {epoch+1}: w = {w[0][0].item():.3f}, loss = {l:.3f}')

print(f"Prediction after training: f(5) = {model(X_test).item():.3f}")

Prediction before training: f(5) = -5.026
epoch 1: w = 0.122, loss = 158.079
epoch 61: w = 1.970, loss = 0.007
epoch 121: w = 1.979, loss = 0.003
epoch 181: w = 1.985, loss = 0.002
epoch 241: w = 1.989, loss = 0.001
epoch 301: w = 1.992, loss = 0.000
epoch 361: w = 1.995, loss = 0.000
epoch 421: w = 1.996, loss = 0.000
epoch 481: w = 1.997, loss = 0.000
epoch 541: w = 1.998, loss = 0.000
Prediction after training: f(5) = 10.000


In [72]:
# custom linear regression
class LinearRegression(nn.Module):
    
    def __init__(self, input_dimensions, output_dimensions):
        super().__init__()
        # define layers
        self.lin = nn.Linear(input_dimensions, output_dimensions)

    def forward(self, x):
        return self.lin(x)


# y = 2*x
# pytorch model forward needs 2d - each row is a sample and each column is a feature
X = torch.tensor([[1],[2],[4],[7]], dtype=torch.float32)
Y = torch.tensor([[2],[4],[8],[14]], dtype=torch.float32)

X_test = torch.tensor([5], dtype=torch.float32)

learning_rate = 0.01

#model prediction
input_size = X.shape[1]
output_size = Y.shape[1]
model = LinearRegression(input_size, output_size)

#loss
loss = nn.MSELoss()

optimizer = torch.optim.SGD(model.parameters(), learning_rate)
#gradient
# L = 1/N * ((w*x - y)**2)
# dL/dw = 1/N * (2*(w*x - y) * x)
def gradient(l, weights):
    l.backward()
    return weights.grad

print(f"Prediction before training: f(5) = {model(X_test).item():.3f}")

#Training
n_iters = 600

for epoch in range(n_iters):
    y_predicted = model(X)

    #loss
    l = loss(Y, y_predicted)

    #gradient
    dw = gradient(l, w)

    optimizer.step()
    optimizer.zero_grad()

    if epoch%60 == 0:
        [w, b] = model.parameters()
        print(f'epoch {epoch+1}: w = {w[0][0].item():.3f}, loss = {l:.3f}')

print(f"Prediction after training: f(5) = {model(X_test).item():.3f}")

Prediction before training: f(5) = -4.476
epoch 1: w = 0.038, loss = 146.728
epoch 61: w = 1.862, loss = 0.139
epoch 121: w = 1.903, loss = 0.070
epoch 181: w = 1.931, loss = 0.035
epoch 241: w = 1.951, loss = 0.017
epoch 301: w = 1.966, loss = 0.009
epoch 361: w = 1.976, loss = 0.004
epoch 421: w = 1.983, loss = 0.002
epoch 481: w = 1.988, loss = 0.001
epoch 541: w = 1.991, loss = 0.001
Prediction after training: f(5) = 9.999
