### computational graphs in pyTorch

In [1]:
import torch

In [3]:
a = torch.tensor([2.], requires_grad=True)
b = torch.tensor([1.], requires_grad=True)

In [5]:
c = a + b
d = b + 1
e = c * d

# grads populated for non-leaf nodes
c.retain_grad()
d.retain_grad()
e.retain_grad()

In [7]:
print(e)

tensor([6.], grad_fn=<MulBackward0>)


### Train a neural network using PyTorch

In [10]:
## The usual imports
import torch
import torch.nn as nn

## print out the pytorch version used
print(torch.__version__)

2.5.1+cpu


In [12]:
## our data in tensor form
x = torch.tensor([[-1.0],  [0.0], [1.0], [2.0], [3.0], [4.0]], dtype=torch.float)
y = torch.tensor([[-3.0], [-1.0], [1.0], [3.0], [5.0], [7.0]], dtype=torch.float)

In [14]:
## print size of the input tensor
x.size()

torch.Size([6, 1])

In [16]:
## Neural network with 1 hidden layer
layer1 = nn.Linear(1,1, bias=False)
model = nn.Sequential(layer1)

## loss function
criterion = nn.MSELoss()

## optimizer algorithm
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

In [18]:
## training
for ITER in range(150):
    model = model.train()

    ## forward
    output = model(x)
    loss = criterion(output, y)
    optimizer.zero_grad()

    ## backward + update model params 
    loss.backward()
    optimizer.step()

    model.eval()
    print('Epoch: %d | Loss: %.4f' %(ITER, loss.detach().item()))

Epoch: 0 | Loss: 33.4215
Epoch: 1 | Loss: 26.9819
Epoch: 2 | Loss: 21.8044
Epoch: 3 | Loss: 17.6416
Epoch: 4 | Loss: 14.2947
Epoch: 5 | Loss: 11.6037
Epoch: 6 | Loss: 9.4402
Epoch: 7 | Loss: 7.7006
Epoch: 8 | Loss: 6.3020
Epoch: 9 | Loss: 5.1775
Epoch: 10 | Loss: 4.2734
Epoch: 11 | Loss: 3.5465
Epoch: 12 | Loss: 2.9621
Epoch: 13 | Loss: 2.4922
Epoch: 14 | Loss: 2.1144
Epoch: 15 | Loss: 1.8106
Epoch: 16 | Loss: 1.5664
Epoch: 17 | Loss: 1.3700
Epoch: 18 | Loss: 1.2122
Epoch: 19 | Loss: 1.0852
Epoch: 20 | Loss: 0.9832
Epoch: 21 | Loss: 0.9011
Epoch: 22 | Loss: 0.8352
Epoch: 23 | Loss: 0.7821
Epoch: 24 | Loss: 0.7395
Epoch: 25 | Loss: 0.7052
Epoch: 26 | Loss: 0.6776
Epoch: 27 | Loss: 0.6554
Epoch: 28 | Loss: 0.6376
Epoch: 29 | Loss: 0.6233
Epoch: 30 | Loss: 0.6118
Epoch: 31 | Loss: 0.6025
Epoch: 32 | Loss: 0.5951
Epoch: 33 | Loss: 0.5891
Epoch: 34 | Loss: 0.5843
Epoch: 35 | Loss: 0.5804
Epoch: 36 | Loss: 0.5773
Epoch: 37 | Loss: 0.5748
Epoch: 38 | Loss: 0.5728
Epoch: 39 | Loss: 0.5712
Epoc

In [20]:
## test the model
sample = torch.tensor([10.0], dtype=torch.float)
predicted = model(sample)
print(predicted.detach().item())

17.096769332885742


In [22]:
# New input data with two features per example
x = torch.tensor([
    [-1.0, -2.0],
    [0.0, 0.0],
    [1.0, 2.0],
    [2.0, 4.0],
    [3.0, 6.0],
    [4.0, 8.0],
    [5.0, 10.0]
], dtype=torch.float)

# Corresponding output data
y = torch.tensor([
    [-3.0],
    [-1.0],
    [1.0],
    [3.0],
    [5.0],
    [7.0],
    [9.0]
], dtype=torch.float)

In [24]:
# Print the size of the input tensor
print("Input size:", x.size())

Input size: torch.Size([7, 2])


In [26]:
# Adjusted Neural Network with 2 input features and 1 output feature
layer1 = nn.Linear(2, 1, bias=True)  # Note the increase in input features from 1 to 2
model = nn.Sequential(layer1)

# Loss function
criterion = nn.MSELoss()

# Optimizer
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

In [28]:
# Training loop
for ITER in range(150):
    model = model.train()

    # Forward pass
    output = model(x)
    loss = criterion(output, y)
    optimizer.zero_grad()

    # Backward pass and parameter update
    loss.backward()
    optimizer.step()

    # Print the loss
    if ITER % 10 == 0:  # Print every 10 epochs for brevity
        print(f'Epoch: {ITER} | Loss: {loss.item():.4f}')

Epoch: 0 | Loss: 44.9798
Epoch: 10 | Loss: 0.4007
Epoch: 20 | Loss: 0.3286
Epoch: 30 | Loss: 0.2694
Epoch: 40 | Loss: 0.2209
Epoch: 50 | Loss: 0.1811
Epoch: 60 | Loss: 0.1485
Epoch: 70 | Loss: 0.1218
Epoch: 80 | Loss: 0.0999
Epoch: 90 | Loss: 0.0819
Epoch: 100 | Loss: 0.0671
Epoch: 110 | Loss: 0.0551
Epoch: 120 | Loss: 0.0451
Epoch: 130 | Loss: 0.0370
Epoch: 140 | Loss: 0.0304


In [30]:
# Test the model
sample = torch.tensor([[10.0, 20.0]], dtype=torch.float)  # Two features in the test input
predicted = model(sample)
print("Prediction for [10.0, 20.0]:", predicted.item())

Prediction for [10.0, 20.0]: 18.658388137817383


The model converged really fast, which means it learned the relationship between x and y values after a couple of iterations. Do you think it makes
sense to continue training? How would you automate the process of stopping the training after the model loss doesn't subtantially change?

#### Updated Code with Early Stopping

In [34]:
# Input data with two features per example
x = torch.tensor([
    [-1.0, -2.0],
    [0.0, 0.0],
    [1.0, 2.0],
    [2.0, 4.0],
    [3.0, 6.0],
    [4.0, 8.0],
    [5.0, 10.0]
], dtype=torch.float)

# Corresponding output data
y = torch.tensor([
    [-3.0],
    [-1.0],
    [1.0],
    [3.0],
    [5.0],
    [7.0],
    [9.0]
], dtype=torch.float)

# Neural network
layer1 = nn.Linear(2, 1, bias=True)
model = nn.Sequential(layer1)

# Loss function and optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

# Early stopping parameters
patience = 10
min_delta = 1e-4  # Minimum change in loss to be considered an improvement
best_loss = float('inf')
wait = 0

# Training loop with early stopping
for ITER in range(150):
    model.train()
    output = model(x)
    loss = criterion(output, y)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # Check for early stopping
    current_loss = loss.item()
    if best_loss - current_loss > min_delta:
        best_loss = current_loss
        wait = 0  # Reset the wait counter
    else:
        wait += 1  # Increment the wait counter

    # Print loss every 10 epochs
    if ITER % 10 == 0:
        print(f'Epoch: {ITER} | Loss: {current_loss:.4f}')

    # Stop training if no improvement for `patience` epochs
    if wait >= patience:
        print(f"Early stopping at epoch {ITER}. Best loss: {best_loss:.4f}")
        break


Epoch: 0 | Loss: 102.2603
Epoch: 10 | Loss: 0.2162
Epoch: 20 | Loss: 0.1773
Epoch: 30 | Loss: 0.1454
Epoch: 40 | Loss: 0.1192
Epoch: 50 | Loss: 0.0978
Epoch: 60 | Loss: 0.0802
Epoch: 70 | Loss: 0.0657
Epoch: 80 | Loss: 0.0539
Epoch: 90 | Loss: 0.0442
Epoch: 100 | Loss: 0.0362
Epoch: 110 | Loss: 0.0297
Epoch: 120 | Loss: 0.0244
Epoch: 130 | Loss: 0.0200
Epoch: 140 | Loss: 0.0164


In [36]:
# Test the model
sample = torch.tensor([[10.0, 20.0]], dtype=torch.float)
predicted = model(sample)
print("Prediction for [10.0, 20.0]:", predicted.item())


Prediction for [10.0, 20.0]: 18.749040603637695


### Multi-Layer Network

In [39]:
# Input data with two features per example
x = torch.tensor([
    [-1.0, -2.0],
    [0.0, 0.0],
    [1.0, 2.0],
    [2.0, 4.0],
    [3.0, 6.0],
    [4.0, 8.0],
    [5.0, 10.0]
], dtype=torch.float)

# Corresponding output data
y = torch.tensor([
    [-3.0],
    [-1.0],
    [1.0],
    [3.0],
    [5.0],
    [7.0],
    [9.0]
], dtype=torch.float)

# Neural network with multiple layers
model = nn.Sequential(
    nn.Linear(2, 4),  # Input layer to first hidden layer (2 inputs -> 4 outputs)
    nn.ReLU(),       # Activation function
    nn.Linear(4, 4),  # First hidden layer to second hidden layer (4 inputs -> 4 outputs)
    nn.ReLU(),       # Activation function
    nn.Linear(4, 1)   # Second hidden layer to output layer (4 inputs -> 1 output)
)

# Loss function and optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

# Training loop
for ITER in range(150):
    model.train()
    output = model(x)
    loss = criterion(output, y)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # Print loss every 10 epochs
    if ITER % 10 == 0:
        print(f'Epoch: {ITER} | Loss: {loss.item():.4f}')


Epoch: 0 | Loss: 23.3729
Epoch: 10 | Loss: 1.6055
Epoch: 20 | Loss: 0.6209
Epoch: 30 | Loss: 0.2011
Epoch: 40 | Loss: 0.0492
Epoch: 50 | Loss: 0.0501
Epoch: 60 | Loss: 0.0162
Epoch: 70 | Loss: 0.0238
Epoch: 80 | Loss: 0.0174
Epoch: 90 | Loss: 0.0178
Epoch: 100 | Loss: 0.0119
Epoch: 110 | Loss: 0.0570
Epoch: 120 | Loss: 0.0069
Epoch: 130 | Loss: 0.0053
Epoch: 140 | Loss: 0.0222


In [41]:
# Test the model
sample = torch.tensor([[10.0, 20.0]], dtype=torch.float)
predicted = model(sample)
print("Prediction for [10.0, 20.0]:", predicted.item())

Prediction for [10.0, 20.0]: 17.481834411621094


### Another Neural Network in PyTorch

In [2]:
import torch
import torch.nn as nn

In [4]:
X = torch.tensor(([2, 9], [1, 5], [3, 6]), dtype=torch.float) # 3 X 2 tensor
y = torch.tensor(([92], [100], [89]), dtype=torch.float) # 3 X 1 tensor
xPredicted = torch.tensor(([4, 8]), dtype=torch.float) # 1 X 2 tensor

In [6]:
print(X.size())
print(y.size())

torch.Size([3, 2])
torch.Size([3, 1])


In [8]:
# scale units
X_max, _ = torch.max(X, 0)
xPredicted_max, _ = torch.max(xPredicted, 0)

X = torch.div(X, X_max)
xPredicted = torch.div(xPredicted, xPredicted_max)
y = y / 100  # max test score is 100

In [10]:
class Neural_Network(nn.Module):
    def __init__(self, ):
        super(Neural_Network, self).__init__()
        # parameters
        # TODO: parameters can be parameterized instead of declaring them here
        self.inputSize = 2
        self.outputSize = 1
        self.hiddenSize = 3
        
        # weights
        self.W1 = torch.randn(self.inputSize, self.hiddenSize) # 2 X 3 tensor
        self.W2 = torch.randn(self.hiddenSize, self.outputSize) # 3 X 1 tensor
        
    def forward(self, X):
        self.z = torch.matmul(X, self.W1) # 3 X 3 ".dot" does not broadcast in PyTorch
        self.z2 = self.sigmoid(self.z) # activation function
        self.z3 = torch.matmul(self.z2, self.W2)
        o = self.sigmoid(self.z3) # final activation function
        return o
        
    def sigmoid(self, s):
        return 1 / (1 + torch.exp(-s))
    
    def sigmoidPrime(self, s):
        # derivative of sigmoid
        return s * (1 - s)
    
    def backward(self, X, y, o):
        self.o_error = y - o # error in output
        self.o_delta = self.o_error * self.sigmoidPrime(o) # derivative of sig to error
        self.z2_error = torch.matmul(self.o_delta, torch.t(self.W2))
        self.z2_delta = self.z2_error * self.sigmoidPrime(self.z2)
        self.W1 += torch.matmul(torch.t(X), self.z2_delta)
        self.W2 += torch.matmul(torch.t(self.z2), self.o_delta)
        
    def train(self, X, y):
        # forward + backward pass for training
        o = self.forward(X)
        self.backward(X, y, o)
        
    def saveWeights(self, model):
        # we will use the PyTorch internal storage functions
        torch.save(model, "NN")
        # you can reload model with all the weights and so forth with:
        # torch.load("NN")
        
    def predict(self):
        print ("Predicted data based on trained weights: ")
        print ("Input (scaled): \n" + str(xPredicted))
        print ("Output: \n" + str(self.forward(xPredicted)))

In [12]:
NN = Neural_Network()
for i in range(1000):  # trains the NN 1,000 times
    print ("#" + str(i) + " Loss: " + str(torch.mean((y - NN(X))**2).detach().item()))  # mean sum squared loss
    NN.train(X, y)
NN.saveWeights(NN)
NN.predict()

#0 Loss: 0.16250036656856537
#1 Loss: 0.1136440858244896
#2 Loss: 0.08155068010091782
#3 Loss: 0.0605747289955616
#4 Loss: 0.04657069221138954
#5 Loss: 0.036928530782461166
#6 Loss: 0.03007357381284237
#7 Loss: 0.02505268156528473
#8 Loss: 0.02127617783844471
#9 Loss: 0.01836881972849369
#10 Loss: 0.0160848256200552
#11 Loss: 0.014258626848459244
#12 Loss: 0.012775792740285397
#13 Loss: 0.011555382050573826
#14 Loss: 0.010538906790316105
#15 Loss: 0.009683270938694477
#16 Loss: 0.00895621720701456
#17 Loss: 0.008333173580467701
#18 Loss: 0.007795186247676611
#19 Loss: 0.007327425759285688
#20 Loss: 0.006918183993548155
#21 Loss: 0.006558097433298826
#22 Loss: 0.006239603739231825
#23 Loss: 0.005956541281193495
#24 Loss: 0.005703869741410017
#25 Loss: 0.0054773916490375996
#26 Loss: 0.005273626651614904
#27 Loss: 0.005089659243822098
#28 Loss: 0.004923011641949415
#29 Loss: 0.004771605599671602
#30 Loss: 0.00463363341987133
#31 Loss: 0.004507578909397125
#32 Loss: 0.004392113070935011
#