In [1]:
import torch

In [2]:
torch.cuda.is_available()

False

Reshaping tensors:

In [3]:
# generate a tensor from a uniform distribution boundaries: 0-1
x = torch.rand(4,4)
print(x)

tensor([[0.9373, 0.2543, 0.4360, 0.8204],
        [0.9481, 0.4211, 0.0872, 0.0491],
        [0.1412, 0.1419, 0.5622, 0.3768],
        [0.2289, 0.0240, 0.4572, 0.4584]])


In [4]:
y = x.view(16)
print(y)

tensor([0.9373, 0.2543, 0.4360, 0.8204, 0.9481, 0.4211, 0.0872, 0.0491, 0.1412,
        0.1419, 0.5622, 0.3768, 0.2289, 0.0240, 0.4572, 0.4584])


In [5]:
y = x.view(-1,8)
print(y.size())

torch.Size([2, 8])


In [6]:
import numpy as np

In [7]:
a = torch.ones(5)
print(a)

tensor([1., 1., 1., 1., 1.])


In [8]:
b = a.numpy()
print(type(b))

<class 'numpy.ndarray'>


In [9]:
a.add_(1)
print(a)

tensor([2., 2., 2., 2., 2.])


In [10]:
print(b)

[2. 2. 2. 2. 2.]


In [11]:
b = a.clone().detach().numpy()

In [12]:
print(a)

tensor([2., 2., 2., 2., 2.])


In [13]:
a.add_(2)

tensor([4., 4., 4., 4., 4.])

In [14]:
print(a)

tensor([4., 4., 4., 4., 4.])


In [15]:
print(b)

[2. 2. 2. 2. 2.]


Numpy to tensor.

In [16]:
a =  np.ones(5)
print(a)

[1. 1. 1. 1. 1.]


In [17]:
b = torch.from_numpy(a)
print(b)

tensor([1., 1., 1., 1., 1.], dtype=torch.float64)


In [18]:
a += 1

In [19]:
a

array([2., 2., 2., 2., 2.])

In [20]:
b

tensor([2., 2., 2., 2., 2.], dtype=torch.float64)

Prevent these modification issues using the clone method:

In [21]:
b = torch.from_numpy(a).clone()

In [22]:
a+=2

In [23]:
a

array([4., 4., 4., 4., 4.])

In [24]:
b

tensor([2., 2., 2., 2., 2.], dtype=torch.float64)

In [None]:
if torch.cuda.is_available():
    device = "cuda"

Autograd:

In [27]:
x = torch.randn(3, requires_grad=True)

In [28]:
print(x)

tensor([ 0.2625, -0.0276, -0.1396], requires_grad=True)


In [29]:
y = x+2

In [30]:
print(y)

tensor([2.2625, 1.9724, 1.8604], grad_fn=<AddBackward0>)


In [31]:
z = y*y*2

In [32]:
print(z)

tensor([10.2381,  7.7804,  6.9223], grad_fn=<MulBackward0>)


In [33]:
z = z.mean()

In [34]:
print(z)

tensor(8.3136, grad_fn=<MeanBackward0>)


In [35]:
z.backward()

In [36]:
x.grad

tensor([3.0167, 2.6298, 2.4806])

Calling the backward operation on a fxn that doesn't produce a scalar:

In [37]:
x = torch.randn(3, requires_grad=True)
print(x)

tensor([-0.8871,  0.3572, -0.4592], requires_grad=True)


In [38]:
y = x+2
print(y)
z = y*y*2
print(z)

tensor([1.1129, 2.3572, 1.5408], grad_fn=<AddBackward0>)
tensor([ 2.4773, 11.1127,  4.7484], grad_fn=<MulBackward0>)


In [39]:
v = torch.tensor([0.1,1.0,0.001], dtype=torch.float32)
z.backward(v)
print(x.grad)

tensor([4.4518e-01, 9.4288e+00, 6.1634e-03])


Preventing pytorch from tracking history and computing gradients:

In [40]:
# permanently turning off gradients on a vector:
# x.requires_grad_(False)
# x.detach() -> return a new tensor with the gradient turned off
# with torch.no_grad() -> temporarily turn off gradients

In [41]:
x = torch.randn(3, requires_grad=True)
print(x)

tensor([ 0.5361, -0.4994,  1.5042], requires_grad=True)


In [42]:
x.requires_grad_(False)
print(x)

tensor([ 0.5361, -0.4994,  1.5042])


Calling detach:

In [44]:
x = torch.randn(3, requires_grad=True)
y = x.detach()
print(y)

tensor([-0.7893,  1.5616, -0.2001])


In [45]:
x = torch.randn(3, requires_grad=True)

In [46]:
print(x)

tensor([0.2621, 0.3376, 0.6003], requires_grad=True)


In [47]:
with torch.no_grad():
    y = x+2
    print(y)

tensor([2.2621, 2.3376, 2.6003])


In [48]:
y = x+2
print(y)

tensor([2.2621, 2.3376, 2.6003], grad_fn=<AddBackward0>)


In [56]:
weights = torch.ones(4, requires_grad=True)

In [57]:
print(weights.grad)

None


In [58]:
for epoch in range(3):
    model_output = (weights*3).sum()
    model_output.backward()
    print(weights.grad)
    weights.grad.zero_()

tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])


weights accumulate - each time you call backward. before each epoch need to refresh them otherwise they accumulate.

Back propagation:

In [60]:
x = torch.tensor(1.0)
y_hat = torch.tensor(2.0)
m = torch.tensor(2.0, requires_grad=True)
c = torch.tensor(1.0, requires_grad=True)

In [61]:
y = x*m + c

In [62]:
loss = (y - y_hat)**2

In [63]:
loss.backward()

In [64]:
m.grad

tensor(2.)

In [66]:
c.grad

tensor(2.)

In [67]:
loss

tensor(1., grad_fn=<PowBackward0>)

In [113]:
x = torch.tensor(1.0)
y_hat = torch.tensor(2.0)
w = torch.tensor(0.7, requires_grad=True)
c = torch.tensor(0.001, requires_grad=True)

In [114]:
for epoch in range(1000):
    # compute the operation: model
    y = w*x + c
    loss = (y_hat-y)**2
    loss.backward()
    # perform optimization:
    with torch.no_grad():
        w -= 0.01*w.grad
        c -= 0.01*c.grad
        # flash out the gradients
        # print(w.grad)
        # print(c.grad)
        w.grad.zero_()
        c.grad.zero_()
else:
    print(f"w: {w}, c: {c}")


w: 1.349498987197876, c: 0.6504995226860046


Gradient Descent with Autograd and Backpropagation:

In [97]:
import numpy as np

In [118]:
X = np.array([1,2,3,4], dtype=np.float32)
Y = np.array([2,4,6,8], dtype=np.float32)

w = 0.0

In [119]:
# model prediction
def forward(x):
    return w*x

# loss = MSE
def loss(y, y_predicted):
    return ((y_predicted-y)**2).mean()

# gradient
# MSE = 1/N * (w*x - y)**2
# Derivative dJ/dW = 
def gradient(x,y,y_predicted):
    return np.dot(2*x, y_predicted-y).mean()

print(f'Prediction before training: f(5) = {forward(5):.3f}')

# Training
n_iters= 10
learning_rate = 0.01

for epoch in range(n_iters):
    # prediction = forward pass
    y_pred = forward(X)

    # loss
    l = loss(Y, y_pred)

    # gradients
    dw = gradient(X, Y, y_pred)

    # update weights
    w -= learning_rate * dw

    if epoch%1 == 0:
        print(f'Epoch {epoch+1}: w= {w:.3f}, loss = {l:.8f}')

print(f'Prediction after training: f(5) = {forward(5):.3f}')

Prediction before training: f(5) = 0.000
Epoch 1: w= 1.200, loss = 30.00000000
Epoch 2: w= 1.680, loss = 4.79999924
Epoch 3: w= 1.872, loss = 0.76800019
Epoch 4: w= 1.949, loss = 0.12288000
Epoch 5: w= 1.980, loss = 0.01966083
Epoch 6: w= 1.992, loss = 0.00314574
Epoch 7: w= 1.997, loss = 0.00050331
Epoch 8: w= 1.999, loss = 0.00008053
Epoch 9: w= 1.999, loss = 0.00001288
Epoch 10: w= 2.000, loss = 0.00000206
Prediction after training: f(5) = 9.999


Implementation with pytorch:

In [139]:
X = torch.tensor([1,2,3,4], dtype=torch.float32)
Y = torch.tensor([2,4,6,8], dtype=torch.float32)
w = torch.tensor(0, dtype=torch.float32, requires_grad=True)
learning_rate = 0.01

def forward(x):
    return w*x

def loss(y, y_pred):
    return ((y - y_pred)**2).mean()

for epoch in range(40):
    y = forward(X)
    l = loss(y, Y)

    l.backward()

    # update the gradients:
    with torch.no_grad():
        w -= learning_rate*w.grad

        w.grad.zero_()
    
        if epoch%2==1:
            print(f"Epoch: {epoch}, loss: {l.item()} | Test Prediction f(5): {forward(5)}")

Epoch: 1, loss: 21.674999237060547 | Test Prediction f(5): 2.7749996185302734
Epoch: 3, loss: 11.314486503601074 | Test Prediction f(5): 4.779937267303467
Epoch: 5, loss: 5.9062323570251465 | Test Prediction f(5): 6.228504657745361
Epoch: 7, loss: 3.083089828491211 | Test Prediction f(5): 7.275094985961914
Epoch: 9, loss: 1.609391689300537 | Test Prediction f(5): 8.031255722045898
Epoch: 11, loss: 0.8401124477386475 | Test Prediction f(5): 8.577583312988281
Epoch: 13, loss: 0.4385439455509186 | Test Prediction f(5): 8.97230339050293
Epoch: 15, loss: 0.22892260551452637 | Test Prediction f(5): 9.257489204406738
Epoch: 17, loss: 0.11949898302555084 | Test Prediction f(5): 9.463536262512207
Epoch: 19, loss: 0.0623791441321373 | Test Prediction f(5): 9.612405776977539
Epoch: 21, loss: 0.03256231173872948 | Test Prediction f(5): 9.719962120056152
Epoch: 23, loss: 0.016997724771499634 | Test Prediction f(5): 9.797673225402832
Epoch: 25, loss: 0.008872910402715206 | Test Prediction f(5): 9.85

In [125]:
forward(5)

tensor(8.0313, grad_fn=<MulBackward0>)

Computing the model entirely in pytorch:

In [140]:
import torch.nn as nn

In [163]:
X = torch.tensor([[1],[2],[3],[4]], dtype=torch.float32)
Y = torch.tensor([[2],[4],[6],[8]], dtype=torch.float32)

X_test = torch.tensor([5], dtype=torch.float32)

learning_rate = 0.01

n_samples, n_features = X.shape
print(n_samples, n_features)

input_size = n_features
output_size = n_features

loss  = nn.MSELoss(reduction='mean')

model = nn.Linear(input_size, output_size)

print(f'Prediction before training: f(5) = {model(X_test).item():.3f}')

optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate, momentum=0.9, nesterov=True)


for epoch in range(10):
    y = model(X)
    l = loss(y, Y)

    l.backward()

    # update the gradients:
    optimizer.step()
    optimizer.zero_grad()
    
    if epoch%1==0:
        [w,b] = model.parameters()
        print(f"Epoch: {epoch}, loss: {l.item()} | w = {w[0].item():.3f}| b = {b[0].item():.3f} | Test Prediction f(5): {model(X_test).item():.3f}")

4 1
Prediction before training: f(5) = -4.954
Epoch: 0, loss: 72.41464233398438 | w = 0.065| b = -0.554 | Test Prediction f(5): -0.230
Epoch: 1, loss: 33.7505989074707 | w = 1.046| b = -0.221 | Test Prediction f(5): 5.010
Epoch: 2, loss: 7.924973487854004 | w = 1.936| b = 0.081 | Test Prediction f(5): 9.761
Epoch: 3, loss: 0.011376401409506798 | w = 2.609| b = 0.309 | Test Prediction f(5): 13.352
Epoch: 4, loss: 3.8130557537078857 | w = 3.006| b = 0.442 | Test Prediction f(5): 15.474
Epoch: 5, loss: 10.017172813415527 | w = 3.131| b = 0.483 | Test Prediction f(5): 16.141
Epoch: 6, loss: 12.569701194763184 | w = 3.031| b = 0.448 | Test Prediction f(5): 15.605
Epoch: 7, loss: 10.487966537475586 | w = 2.779| b = 0.360 | Test Prediction f(5): 14.257
Epoch: 8, loss: 6.089456558227539 | w = 2.456| b = 0.248 | Test Prediction f(5): 12.526
Epoch: 9, loss: 2.1839728355407715 | w = 2.132| b = 0.136 | Test Prediction f(5): 10.798
