In [1]:
import torch 
from torch import nn

In [2]:

# Define the linear regression model with the pytorch `nn.Module` class

In [3]:
class ManualLinearRegression(nn.Module):
    def __init__(self):
        super().__init__()
        #Make `b` and `w` be the parameters of the model
        #Wrap them with `nn.Parameter` 
        self.b = nn.Parameter(torch.randn(1,
                                        requires_grad=True,
                                        dtype=torch.float))
        self.w = nn.Parameter(torch.randn(1,
                                        requires_grad=True,
                                        dtype=torch.float))
    def forward(self, x):
        # Compute the outputs /predictions
        return self.b + self.w*x

In [4]:
#Example to construct a object
dummy = ManualLinearRegression()
dummy.parameters()
print(dummy.parameters())
print(list(dummy.parameters()))

<generator object Module.parameters at 0x7fd8a8d56660>
[Parameter containing:
tensor([2.1423], requires_grad=True), Parameter containing:
tensor([0.8725], requires_grad=True)]


In [5]:
print(dummy.state_dict())

OrderedDict([('b', tensor([2.1423])), ('w', tensor([0.8725]))])


In [6]:
#Data generation 
import numpy as np
import torch 


true_b = 1
true_w = 2 
N = 100

# set the random seed for numpy 
np.random.seed(43)

x= np.random.rand(N,1)
epsilon = (.1 * np.random.rand(N,1))

y = true_b + true_w *x + epsilon

# Generate training and validating sets 
idx = np.arange(N)
np.random.shuffle(idx)

# Use first 80 randowm indices for train
train_idx = idx[:int(N*.8)]
val_idx = idx[int(N*.8):]

# Generate train and validation sets
x_train, y_train = x[train_idx], y[train_idx]
x_val, y_val = x[val_idx], y[val_idx]

# Data preparation 
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# Tranform the data from numpy array to torch tensor
x_train_tensor = torch.as_tensor(x_train).float().to(device)
y_train_tensor = torch.as_tensor(y_train).float().to(device)

In [7]:
# Sets learning rate
lr = 0.1 

# Step 0 : Initialize parameters 'b' and 'w' randomly
torch.manual_seed(42)

# Create a model and send it at once to the device
model = ManualLinearRegression().to(device) # 1)

# Define a SGD optimizer to update the parameters
#optimizer = torch.optim.SGD([b,w], lr=lr)
optimizer = torch.optim.SGD(model.parameters(),lr=lr)


#Define a MSE loss function 
loss_fn = nn.MSELoss(reduction="mean")  


# Define number of epochs 
n_epochs=1000

for epoch in range(n_epochs): 
    model.train()  #2)
    # Step 1: Computes the model's predicted output - forward pass
    # No more manula prediction 
    #yhat = b + w*x_train_tensor 
    yhat = model(x_train_tensor)  #3)
    
    # Step 2: Computes the loss
    # No more manual loss
    # error = (yhat - y_train_tensor)
    # loss = (error**2).mean()
    loss = loss_fn(yhat, y_train_tensor) #2
    
    # Step 3: Computes gradients for both 'b' and 'w' parameters
    loss.backward()
    
    # Step 4: Updates parameters using gradients and the learning rate
    # No more manual update
    # with torch.no_grade():
    #    b-=lr*b.grad
    #    w-=lr*w.grad
    optimizer.step()
    
    
    # Graident Zeroing
    # No more telling pytorch to let gradients go 
    #b.grad.zero()
    #w.grad.zero()
    optimizer.zero_grad()
    print(model.state_dict())

#print(b,w)
#Inspect the parameters using its state_dict
print(model.state_dict())

OrderedDict([('b', tensor([0.6706])), ('w', tensor([0.3299]))])
OrderedDict([('b', tensor([0.9172])), ('w', tensor([0.4832]))])
OrderedDict([('b', tensor([1.0989])), ('w', tensor([0.6008]))])
OrderedDict([('b', tensor([1.2323])), ('w', tensor([0.6919]))])
OrderedDict([('b', tensor([1.3297])), ('w', tensor([0.7632]))])
OrderedDict([('b', tensor([1.4003])), ('w', tensor([0.8197]))])
OrderedDict([('b', tensor([1.4511])), ('w', tensor([0.8651]))])
OrderedDict([('b', tensor([1.4871])), ('w', tensor([0.9022]))])
OrderedDict([('b', tensor([1.5121])), ('w', tensor([0.9331]))])
OrderedDict([('b', tensor([1.5290])), ('w', tensor([0.9593]))])
OrderedDict([('b', tensor([1.5398])), ('w', tensor([0.9820]))])
OrderedDict([('b', tensor([1.5462])), ('w', tensor([1.0021]))])
OrderedDict([('b', tensor([1.5492])), ('w', tensor([1.0201]))])
OrderedDict([('b', tensor([1.5498])), ('w', tensor([1.0366]))])
OrderedDict([('b', tensor([1.5486])), ('w', tensor([1.0519]))])
OrderedDict([('b', tensor([1.5460])), ('

OrderedDict([('b', tensor([1.0635])), ('w', tensor([1.9804]))])
OrderedDict([('b', tensor([1.0634])), ('w', tensor([1.9806]))])
OrderedDict([('b', tensor([1.0633])), ('w', tensor([1.9808]))])
OrderedDict([('b', tensor([1.0632])), ('w', tensor([1.9809]))])
OrderedDict([('b', tensor([1.0631])), ('w', tensor([1.9811]))])
OrderedDict([('b', tensor([1.0630])), ('w', tensor([1.9813]))])
OrderedDict([('b', tensor([1.0629])), ('w', tensor([1.9815]))])
OrderedDict([('b', tensor([1.0629])), ('w', tensor([1.9816]))])
OrderedDict([('b', tensor([1.0628])), ('w', tensor([1.9818]))])
OrderedDict([('b', tensor([1.0627])), ('w', tensor([1.9820]))])
OrderedDict([('b', tensor([1.0626])), ('w', tensor([1.9821]))])
OrderedDict([('b', tensor([1.0625])), ('w', tensor([1.9823]))])
OrderedDict([('b', tensor([1.0624])), ('w', tensor([1.9825]))])
OrderedDict([('b', tensor([1.0623])), ('w', tensor([1.9826]))])
OrderedDict([('b', tensor([1.0622])), ('w', tensor([1.9828]))])
OrderedDict([('b', tensor([1.0621])), ('

OrderedDict([('b', tensor([1.0558])), ('w', tensor([1.9945]))])
OrderedDict([('b', tensor([1.0558])), ('w', tensor([1.9945]))])
OrderedDict([('b', tensor([1.0558])), ('w', tensor([1.9945]))])
OrderedDict([('b', tensor([1.0558])), ('w', tensor([1.9945]))])
OrderedDict([('b', tensor([1.0558])), ('w', tensor([1.9945]))])
OrderedDict([('b', tensor([1.0558])), ('w', tensor([1.9945]))])
OrderedDict([('b', tensor([1.0558])), ('w', tensor([1.9945]))])
OrderedDict([('b', tensor([1.0558])), ('w', tensor([1.9945]))])
OrderedDict([('b', tensor([1.0558])), ('w', tensor([1.9946]))])
OrderedDict([('b', tensor([1.0558])), ('w', tensor([1.9946]))])
OrderedDict([('b', tensor([1.0558])), ('w', tensor([1.9946]))])
OrderedDict([('b', tensor([1.0558])), ('w', tensor([1.9946]))])
OrderedDict([('b', tensor([1.0558])), ('w', tensor([1.9946]))])
OrderedDict([('b', tensor([1.0558])), ('w', tensor([1.9946]))])
OrderedDict([('b', tensor([1.0558])), ('w', tensor([1.9946]))])
OrderedDict([('b', tensor([1.0558])), ('

OrderedDict([('b', tensor([1.0557])), ('w', tensor([1.9947]))])
OrderedDict([('b', tensor([1.0557])), ('w', tensor([1.9947]))])
OrderedDict([('b', tensor([1.0557])), ('w', tensor([1.9947]))])
OrderedDict([('b', tensor([1.0557])), ('w', tensor([1.9947]))])
OrderedDict([('b', tensor([1.0557])), ('w', tensor([1.9947]))])
OrderedDict([('b', tensor([1.0557])), ('w', tensor([1.9947]))])
OrderedDict([('b', tensor([1.0557])), ('w', tensor([1.9947]))])
OrderedDict([('b', tensor([1.0557])), ('w', tensor([1.9947]))])
OrderedDict([('b', tensor([1.0557])), ('w', tensor([1.9947]))])
OrderedDict([('b', tensor([1.0557])), ('w', tensor([1.9947]))])
OrderedDict([('b', tensor([1.0557])), ('w', tensor([1.9947]))])
OrderedDict([('b', tensor([1.0557])), ('w', tensor([1.9947]))])
OrderedDict([('b', tensor([1.0557])), ('w', tensor([1.9947]))])
