In [1]:
%matplotlib inline


`Learn the Basics <intro.html>`_ ||
`Quickstart <quickstart_tutorial.html>`_ ||
`Tensors <tensorqs_tutorial.html>`_ ||
`Datasets & DataLoaders <data_tutorial.html>`_ ||
`Transforms <transforms_tutorial.html>`_ ||
`Build Model <buildmodel_tutorial.html>`_ ||
`Autograd <autogradqs_tutorial.html>`_ ||
**Optimization** ||
`Save & Load Model <saveloadrun_tutorial.html>`_

Optimizing Model Parameters
===========================

Now that we have a model and data it's time to train, validate and test our model by optimizing its parameters on
our data. Training a model is an iterative process; in each iteration (called an *epoch*) the model makes a guess about the output, calculates
the error in its guess (*loss*), collects the derivatives of the error with respect to its parameters (as we saw in
the `previous section  <autograd_tutorial.html>`_), and **optimizes** these parameters using gradient descent. For a more
detailed walkthrough of this process, check out this video on `backpropagation from 3Blue1Brown <https://www.youtube.com/watch?v=tIeHLnjs5U8>`__.

Prerequisite Code
-----------------
We load the code from the previous sections on `Datasets & DataLoaders <data_tutorial.html>`_
and `Build Model  <buildmodel_tutorial.html>`_.


In [18]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor, Lambda

training_data = datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor()
)

test_data = datasets.FashionMNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor()
)

train_dataloader = DataLoader(training_data, batch_size=64)
test_dataloader = DataLoader(test_data, batch_size=64)


# model = NeuralNetwork()

In [19]:
class FirstModel(nn.Module): #For images
    def __init__(self):
        super(FirstModel, self).__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 10),
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits


In [20]:

class SecondModel(nn.Module):
    def __init__(self):
        super(SecondModel, self).__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.Softmax(),
            nn.Linear(512, 256),
            nn.Softmax(),
            nn.Linear(256, 64),
            nn.Softmax(),
            nn.Linear(64, 10),
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

# model = NeuralNetwork()

In [21]:
# class Combined_model(nn.Module):
#     def __init__(self, modelA, modelB):
#         super(Combined_model, self).__init__()
#         self.modelA = modelA
#         self.modelB = modelB
#         self.classifier = nn.Linear(20, 10)
        
#     def forward(self, x1, x2):
#         x1 = self.modelA(x1)
#         x2 = self.modelB(x2)
#         x = torch.cat((x1, x2), dim=1)
#         x = self.classifier(x)
#         return x

In [22]:
class ThirdModel(nn.Module): #For images
    def __init__(self):
        super(ThirdModel, self).__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 10),
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

In [23]:
class Combined_modelInt(nn.Module):
    def __init__(self, modelA, modelB):
        super(Combined_modelInt, self).__init__()
        self.modelA = modelA
        self.modelB = modelB
        self.classifier = nn.Linear(20, 10)
        
    def forward(self, x1, x2):
        x1 = self.modelA(x1)
        x2 = self.modelB(x2)
        x = torch.cat((x1, x2), dim=1)
        x = self.classifier(x)
        return x

In [24]:
class Combined_modelBruh(nn.Module):
    def __init__(self, modelA, modelB):
        super(Combined_modelBruh, self).__init__()
        self.modelA = modelA
        self.modelB = modelB
        self.classifier = nn.Linear(20, 10)
        
    def forward(self, x1, x2):
        x1 = self.modelA(x1)
        x2 = self.modelB(x2, x2)
        x = torch.cat((x1, x2), dim=1)
        x = self.classifier(x)
        return x

In [25]:
M1=FirstModel()
M3=ThirdModel()
M2=SecondModel()
M4 = Combined_modelInt(M1, M3)
# input_image = torch.rand(3,28,28)
# print(input_image.shape)
# model1 = Combined_modelInt(M1, M2)
# # x1, x2 = torch.randn(1, 10), torch.randn(1, 20)
# output1 = model1(input_image, input_image)

In [26]:


input_image = torch.rand(3,28,28)
# print(output1.shape)
model = Combined_modelBruh(M2, M4)
# x1, x2 = torch.randn(1, 10), torch.randn(1, 20)
output = model(input_image, input_image)

  input = module(input)


Hyperparameters
-----------------

Hyperparameters are adjustable parameters that let you control the model optimization process.
Different hyperparameter values can impact model training and convergence rates
(`read more <https://pytorch.org/tutorials/beginner/hyperparameter_tuning_tutorial.html>`__ about hyperparameter tuning)

We define the following hyperparameters for training:
 - **Number of Epochs** - the number times to iterate over the dataset
 - **Batch Size** - the number of data samples propagated through the network before the parameters are updated
 - **Learning Rate** - how much to update models parameters at each batch/epoch. Smaller values yield slow learning speed, while large values may result in unpredictable behavior during training.




In [27]:
M1=FirstModel()
M3=ThirdModel()
M2=SecondModel()
M4 = Combined_modelInt(M2, M3)
model = Combined_modelBruh(M1, M4)

In [28]:
learning_rate = 1e-3
batch_size = 64
epochs = 5

In [29]:
# Initialize the loss function
loss_fn = nn.CrossEntropyLoss()

In [30]:
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

In [31]:
def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    for batch, (X, y) in enumerate(dataloader):
        # Compute prediction and loss
        pred = model(X,X)
        # pred_label = torch.max(pred, dim = 1)
        # correct = (pred_label == y).float()
        
        # print(pred_label.shape)
        loss = loss_fn(pred, y)

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch % 100 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")


def test_loop(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0

    with torch.no_grad():
        for X, y in dataloader:
            pred = model(X,X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()

    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

We initialize the loss function and optimizer, and pass it to ``train_loop`` and ``test_loop``.
Feel free to increase the number of epochs to track the model's improving performance.



In [32]:
#model=Combined_modelInt(M1,M2)
model = Combined_modelBruh(M1, M4)
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

epochs = 10
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(train_dataloader, model, loss_fn, optimizer)
    test_loop(test_dataloader, model, loss_fn)
print("Done!")

Epoch 1
-------------------------------
loss: 2.313334  [    0/60000]


  input = module(input)


loss: 2.308922  [ 6400/60000]
loss: 2.319726  [12800/60000]
loss: 2.321332  [19200/60000]
loss: 2.294420  [25600/60000]
loss: 2.293205  [32000/60000]
loss: 2.306473  [38400/60000]
loss: 2.295124  [44800/60000]
loss: 2.313856  [51200/60000]
loss: 2.309300  [57600/60000]
Test Error: 
 Accuracy: 10.0%, Avg loss: 2.306882 

Epoch 2
-------------------------------
loss: 2.310316  [    0/60000]
loss: 2.306760  [ 6400/60000]
loss: 2.315878  [12800/60000]
loss: 2.317416  [19200/60000]
loss: 2.294489  [25600/60000]
loss: 2.293016  [32000/60000]
loss: 2.304900  [38400/60000]
loss: 2.294872  [44800/60000]
loss: 2.310818  [51200/60000]
loss: 2.307293  [57600/60000]
Test Error: 
 Accuracy: 10.0%, Avg loss: 2.305010 

Epoch 3
-------------------------------
loss: 2.307800  [    0/60000]
loss: 2.305031  [ 6400/60000]
loss: 2.312657  [12800/60000]
loss: 2.314136  [19200/60000]
loss: 2.294666  [25600/60000]
loss: 2.292946  [32000/60000]
loss: 2.303612  [38400/60000]
loss: 2.294726  [44800/60000]
loss: 

Further Reading
-----------------------
- `Loss Functions <https://pytorch.org/docs/stable/nn.html#loss-functions>`_
- `torch.optim <https://pytorch.org/docs/stable/optim.html>`_
- `Warmstart Training a Model <https://pytorch.org/tutorials/recipes/recipes/warmstarting_model_using_parameters_from_a_different_model.html>`_




In [8]:

0