# With numpy

In [1]:
import torch
import numpy as np
import time

In [2]:
weights = torch.ones(4, requires_grad=True)

for epoch in range(3):
    print('first>>>', weights.grad)
    model_outPut = (weights * 3).sum()
    model_outPut.backward()
    print(weights.grad) # will save the old value and add a new if we dont use the .zero_, but we have to use .zero()
    weights.grad.zero_()
    
# RuntimeError: grad can be implicitly created only for scalar outputs

first>>> None
tensor([3., 3., 3., 3.])
first>>> tensor([0., 0., 0., 0.])
tensor([3., 3., 3., 3.])
first>>> tensor([0., 0., 0., 0.])
tensor([3., 3., 3., 3.])


# Calc the Grad

In [3]:
x = torch.tensor(1.0)
y = torch.tensor(2.0)
w = torch.tensor(1.0, requires_grad=True)

In [4]:
y_pred = w * x
loss = (y_pred - y)**2
print(f'yPred: {y_pred}, loss: {loss} ')

yPred: 1.0, loss: 1.0 


In [5]:
# Backward pass
print(w.grad) # will print None
loss.backward()
print(w.grad) # will print -2

None
tensor(-2.)


# =============================

# 👉 Let's find the forward, loss, gradient, then the Prediction.

In [6]:
x2 = np.array([1, 2, 3], dtype=np.float32)
y2 = np.array([2, 4, 6], dtype=np.float32)

w2 = 0.0
print(w2 * x2)


[0. 0. 0.]


In [7]:

def forward(x2): # forward or predic
    return w2 * x2


def loss(y_pred, y2): # loss = MSE
    return ((y_pred - y2)**2).mean()


def gradient(x2, y_pred, y2):
    # 1/n * (2x(w*x - y))
    derivative = np.dot(2*x2, y_pred - y2).mean()
    return derivative

print(f"predic befor training: {forward(5)}")
    

predic befor training: 0.0


# Deravative Separately 
##### we need deravative  and learning rate for updating our weight

In [8]:
# derivative = np.dot(2*x, y_pred - y).mean()
print(2.0*x2)
print([0.,0.,0.] - y2)
print(np.dot([2., 4., 6.], [-2., -4., -6.]))

[2. 4. 6.]
[-2. -4. -6.]
-56.0


# Trianing

In [9]:
lr = 0.01

for epoch in range(10):
    #forward
    y_prediction = forward(x2)

    #loss
    loss2 = loss(y_prediction, y2)
    
    #gradient
    deriva = gradient(x2, y_prediction, y2)
    
    #update the weight important !!!!!!!!!!!!!!!!!
    #w = w - lr * derivative
    w2 = w2 - lr*deriva # w2 will not be anymore 0.0
    
    print(f"predic: {y_prediction}, Loss: {loss2}, Weight: {w2}")
    #time.sleep(2)

    
print("===========")
print(f"predic After training: {forward(5)}")


predic: [0. 0. 0.], Loss: 18.66666603088379, Weight: 0.56
predic: [0.56      1.12      1.6800001], Loss: 9.676798820495605, Weight: 0.9631999969482423
predic: [0.9632    1.9264    2.8895998], Loss: 5.016453266143799, Weight: 1.2535040092468264
predic: [1.253504 2.507008 3.760512], Loss: 2.6005289554595947, Weight: 1.4625228881835939
predic: [1.4625229 2.9250457 4.3875685], Loss: 1.3481146097183228, Weight: 1.6130164909362794
predic: [1.6130165 3.226033  4.8390493], Loss: 0.6988625526428223, Weight: 1.721371879577637
predic: [1.7213719 3.4427438 5.164116 ], Loss: 0.3622901141643524, Weight: 1.7993877363204958
predic: [1.7993877 3.5987754 5.398163 ], Loss: 0.1878114938735962, Weight: 1.855559196472168
predic: [1.8555592 3.7111185 5.5666776], Loss: 0.09736133366823196, Weight: 1.8960026168823243
predic: [1.8960027 3.7920053 5.688008 ], Loss: 0.05047212168574333, Weight: 1.9251218819618225
predic After training: 9.625609409809112


# First predic 👇

In [10]:
# get predic
print(np.dot(0.0, [1, 2, 3]))

[0. 0. 0.]


In [11]:
# loss 
print((([0., 0., 0.] - y2)**2).mean())

18.666666666666668


In [12]:
# derivative = np.dot(2*x, y_pred - y).mean()
print(2.0*x2)
print([0.,0.,0.] - y2)
print(np.dot([2., 4., 6.], [-2., -4., -6.]).mean()) # Second predic

[2. 4. 6.]
[-2. -4. -6.]
-56.0


In [13]:
# update derivative
# lr*dw
print(0.01 * -56)

-0.56


In [14]:
# update w2
w2 = 0.0 - lr*-56
print(w2)

0.56


# Second predic after you update your weight 👇

## It continues like the first step till the last iteration 

# ==============================================================

# With torch

In [15]:
import torch


In [22]:
X = torch.tensor([1, 2, 3, 4], dtype=torch.float32)
Y = torch.tensor([2, 4, 6, 8], dtype=torch.float32)
w = torch.tensor(0.0, dtype=torch.float32, requires_grad=True)

# model output
def forward(x):
    return w * x

# loss = MSE
def loss(y, y_pred):
    return ((y_pred - y)**2).mean()


# Training
lr = 0.01
n_iters = 100

for epoch in range(100):
    # predict = forward pass
    y_pred = forward(X)

    # loss
    l = loss(Y, y_pred)

    # calculate gradients = backward pass
    l.backward()

    # update weights
    with torch.no_grad():
        w -= lr * w.grad
    
    # zero the gradients after updating
    w.grad.zero_()

    if epoch % 10 == 0:
        print(f'epoch {epoch+1}: w = {w.item():.3f}, loss = {l.item():.8f}, Predic: {y_pred}')


epoch 1: w = 0.300, loss = 30.00000000, Predic: tensor([0., 0., 0., 0.], grad_fn=<MulBackward0>)
epoch 11: w = 1.665, loss = 1.16278565, Predic: tensor([1.6063, 3.2125, 4.8188, 6.4250], grad_fn=<MulBackward0>)
epoch 21: w = 1.934, loss = 0.04506890, Predic: tensor([1.9225, 3.8450, 5.7674, 7.6899], grad_fn=<MulBackward0>)
epoch 31: w = 1.987, loss = 0.00174685, Predic: tensor([1.9847, 3.9695, 5.9542, 7.9390], grad_fn=<MulBackward0>)
epoch 41: w = 1.997, loss = 0.00006770, Predic: tensor([1.9970, 3.9940, 5.9910, 7.9880], grad_fn=<MulBackward0>)
epoch 51: w = 1.999, loss = 0.00000262, Predic: tensor([1.9994, 3.9988, 5.9982, 7.9976], grad_fn=<MulBackward0>)
epoch 61: w = 2.000, loss = 0.00000010, Predic: tensor([1.9999, 3.9998, 5.9997, 7.9995], grad_fn=<MulBackward0>)
epoch 71: w = 2.000, loss = 0.00000000, Predic: tensor([2.0000, 4.0000, 5.9999, 7.9999], grad_fn=<MulBackward0>)
epoch 81: w = 2.000, loss = 0.00000000, Predic: tensor([2.0000, 4.0000, 6.0000, 8.0000], grad_fn=<MulBackward0>)