In [21]:
import numpy as np
import torch 

In [22]:
# Data generation

In [23]:
true_b = 1
true_w = 2 
N = 100

# set the random seed for numpy 
np.random.seed(43)

x= np.random.rand(N,1)
epsilon = (.1 * np.random.rand(N,1))

y = true_b + true_w *x + epsilon

In [24]:
# Generate training and validating sets 
idx = np.arange(N)
np.random.shuffle(idx)

# Use first 80 randowm indices for train
train_idx = idx[:int(N*.8)]
val_idx = idx[int(N*.8):]

# Generate train and validation sets
x_train, y_train = x[train_idx], y[train_idx]
x_val, y_val = x[val_idx], y[val_idx]


In [25]:
# Data preparation 
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# Tranform the data from numpy array to torch tensor
x_train_tensor = torch.as_tensor(x_train).float().to(device)
y_train_tensor = torch.as_tensor(y_train).float().to(device)

In [26]:
# Creating parameters 

In [34]:
# Step 0: Initialize parameters 'b' with 'w' randomly 
torch.manual_seed(42)
b = torch.randn(1, requires_grad=True, \
                dtype=torch.float, device=device)
w = torch.randn(1, requires_grad=True, \
                dtype=torch.float, device=device)


In [28]:
# backward

In [40]:
# Step 1: Compute model's predicted output- forward pass 

yhat = b + w * x_train_tensor

# Step 2: Compute the loss 
# Using the all datapoints, this is BATCH gradient descent
error = (yhat - y_train_tensor)

# Compute the mean ssquare error
loss = (error ** 2).mean()

# Step 3: Computes the gradients for both 'b' and 'w' parameters
# No more manaul computation of gradients 
# b_grad = 2 * error.mean()
# w_grad = 2 * (x_tensor*error).mean()
loss.backward()

None


In [30]:
print(error.requires_grad,yhat.requires_grad, \
     b.requires_grad, w.requires_grad)

True True True True


In [33]:
print(y_train_tensor.requires_grad, x_train_tensor.requires_grad)

False False


In [41]:
# grad 
print(b.grad, w.grad)

tensor([-13.3556]) tensor([-8.0432])


In [48]:
import torch 

# check if the device is cuda or cpu
device = 'cuda' if torch.cuda.is_available() else 'cpu'
lr = 0.1 
torch.manual_seed(42)
# initial the trainable parameters
b = torch.randn(1, requires_grad=True, \
                dtype=torch.float, device=device)
w = torch.randn(1, requires_grad=True, \
                dtype=torch.float, device=device)
print("original:",b,w)

n_epochs = 1000

for epoch in range(n_epochs):
    # Step1: Compute the model's predicted output- forward pass
    yhat = b + w*x_train_tensor 
    
    # Step2: Compute the loss 
    error = (yhat-y_train_tensor)
    loss = (error**2).mean()
    
    # Step3: Computes gradients for both 'b' and 'w'
    loss.backward()
    
    # Step4: updates parameters using gradients and the learning rate 
    # Use no_grad to keep the update out of the gradient computaiton.
    with torch.no_grad():
        b-= lr *b.grad
        w-= lr *w.grad
    
    # Let the computed gradients go
    b.grad.zero_()
    w.grad.zero_()

print(b,w)

original: tensor([0.3367], requires_grad=True) tensor([0.1288], requires_grad=True)
tensor([1.0557], requires_grad=True) tensor([1.9947], requires_grad=True)
