In [8]:
import torch.nn as nn
import torch 

#Define a MSE loss function 
loss_fn = nn.MSELoss(reduction="mean")
print(loss_fn)

MSELoss()


In [9]:
# A example to excute the loss function
predictions = torch.tensor([1.0, 2.0])
ground_truth = torch.tensor([2.0, 5.0])

# Calculate the loss 
print(loss_fn(predictions, ground_truth))

tensor(5.)


# Import the loss function

In [10]:
#Data generation 
import numpy as np
import torch 


true_b = 1
true_w = 2 
N = 100

# set the random seed for numpy 
np.random.seed(43)

x= np.random.rand(N,1)
epsilon = (.1 * np.random.rand(N,1))

y = true_b + true_w *x + epsilon

# Generate training and validating sets 
idx = np.arange(N)
np.random.shuffle(idx)

# Use first 80 randowm indices for train
train_idx = idx[:int(N*.8)]
val_idx = idx[int(N*.8):]

# Generate train and validation sets
x_train, y_train = x[train_idx], y[train_idx]
x_val, y_val = x[val_idx], y[val_idx]

# Data preparation 
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# Tranform the data from numpy array to torch tensor
x_train_tensor = torch.as_tensor(x_train).float().to(device)
y_train_tensor = torch.as_tensor(y_train).float().to(device)

In [12]:
# Sets learning rate
lr = 0.1 

# Step 0 : Initialize parameters 'b' and 'w' randomly
torch.manual_seed(42)

b = torch.randn(1, requires_grad=True, \
                dtype=torch.float, device=device)
w = torch.randn(1, requires_grad=True, \
                dtype=torch.float, device=device)

# Define a SGD optimizer to update the parameters
optimizer = torch.optim.SGD([b,w], lr=lr)


#Define a MSE loss function 
loss_fn = nn.MSELoss(reduction="mean")  #1


# Define number of epochs 
n_epochs=1000

for epoch in range(n_epochs):
    # Step 1: Computes the model's predicted output - forward pass
    yhat = b + w*x_train_tensor 
    
    # Step 2: Computes the loss
    # No more manual loss
    # error = (yhat - y_train_tensor)
    # loss = (error**2).mean()
    loss = loss_fn(yhat, y_train_tensor) #2
    
    # Step 3: Computes gradients for both 'b' and 'w' parameters
    loss.backward()
    
    # Step 4: Updates parameters using gradients and the learning rate
    # No more manual update
    # with torch.no_grade():
    #    b-=lr*b.grad
    #    w-=lr*w.grad
    optimizer.step()
    
    
    # Graident Zeroing
    # No more telling pytorch to let gradients go 
    #b.grad.zero()
    #w.grad.zero()
    optimizer.zero_grad()

print(b,w)

tensor([1.0557], requires_grad=True) tensor([1.9947], requires_grad=True)


In [13]:
print(loss)

tensor(0.0009, grad_fn=<MseLossBackward0>)


In [14]:
# Converting to numpy using detach since gradient is being computed 
print(loss.detach().cpu().numpy())

0.00092545414
