<a href="https://colab.research.google.com/github/1995subhankar1995/Torch-Tutorials/blob/main/A_model_has_mul_modules.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        optimizer.zero_grad()
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            if phase == 'train':
                scheduler.step()

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model


#### Finetuning the convnet ####
# Load a pretrained model and reset final fully connected layer.

model = models.resnet18(pretrained=True)
num_ftrs = model.fc.in_features
# Here the size of each output sample is set to 2.
# Alternatively, it can be generalized to nn.Linear(num_ftrs, len(class_names)).
model.fc = nn.Linear(num_ftrs, 2)

model = model.to(device)

criterion = nn.CrossEntropyLoss()

# Observe that all parameters are being optimized
optimizer = optim.SGD(model.parameters(), lr=0.001)

# StepLR Decays the learning rate of each parameter group by gamma every step_size epochs
# Decay LR by a factor of 0.1 every 7 epochs
# Learning rate scheduling should be applied after optimizer’s update
# e.g., you should write your code this way:
# for epoch in range(100):
#     train(...)
#     validate(...)
#     scheduler.step()

step_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

model = train_model(model, criterion, optimizer, step_lr_scheduler, num_epochs=25)


In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np
import torchvision
import os
import sys

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
num_epochs = 10
batch_size = 16
lr = 1e-3

transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5),(0.5, 0.5, 0.5))])

train_dataset = torchvision.datasets.CIFAR10(root = './data', train = True, download = True, transform = transform)
test_dataset = torchvision.datasets.CIFAR10(root = './data', train = False, download = True, transform = transform)

Files already downloaded and verified
Files already downloaded and verified


In [None]:
train_loader = torch.utils.data.DataLoader(train_dataset)
num_classes = 10
train = [[] for i in range(num_classes)]
for i in range(num_classes):
  for image, label in train_loader:
    image = image.view(3, 32, 32).numpy()
    if i == label.numpy()[0]:
      train[i].append(image)
  train[i] = torch.tensor(train[i])


In [None]:
for i in range(num_classes):
  print('Shape:', train[i].shape, 'Label:', i)

Shape: torch.Size([5000, 3, 32, 32]) Label: 0
Shape: torch.Size([5000, 3, 32, 32]) Label: 1
Shape: torch.Size([5000, 3, 32, 32]) Label: 2
Shape: torch.Size([5000, 3, 32, 32]) Label: 3
Shape: torch.Size([5000, 3, 32, 32]) Label: 4
Shape: torch.Size([5000, 3, 32, 32]) Label: 5
Shape: torch.Size([5000, 3, 32, 32]) Label: 6
Shape: torch.Size([5000, 3, 32, 32]) Label: 7
Shape: torch.Size([5000, 3, 32, 32]) Label: 8
Shape: torch.Size([5000, 3, 32, 32]) Label: 9


In [None]:
def next_data(data, batch_size = 64, shuffle = True):
  data = torch.utils.data.DataLoader(data, batch_size, shuffle = True)
  return data

In [None]:
class Classifier(nn.Module):
  def __init__(self):
    super(Classifier, self).__init__()
    self.conv1 = nn.Conv2d(3, 6, 5)
    self.maxpool = nn.MaxPool2d(2, 2)
    self.conv2 = nn.Conv2d(6, 16, 5)
    self.lin1 = nn.Linear(5 * 16 * 5, 120)
    self.lin2 = nn.Linear(120, 20)
    self.drop = nn.Dropout(p = 0.5)
    self.lin3 = nn.Linear(20, 2)
    self.soft = nn.LogSoftmax(dim = 1)
    

  def forward(self, x):
    x = F.relu(self.conv1(x))
    x = self.maxpool(x)
    x = F.relu(self.conv2(x))
    x = self.maxpool(x)
    x = x.view(-1, 16 * 5 * 5)
    x = F.relu(self.lin1(x))
    x = F.relu(self.lin2(x))
    #x = self.drop(x)
    x = self.lin3(x)
    x = self.soft(x)
    return x
model_classifier = Classifier()
model_classifier.to(device)
print(model_classifier)

Classifier(
  (conv1): Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1))
  (maxpool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (lin1): Linear(in_features=400, out_features=120, bias=True)
  (lin2): Linear(in_features=120, out_features=20, bias=True)
  (drop): Dropout(p=0.5, inplace=False)
  (lin3): Linear(in_features=20, out_features=2, bias=True)
  (soft): LogSoftmax(dim=1)
)


In [None]:
batch_size = 64
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr = 1e-2)
model_classifier.train()
for i in range(num_classes):
  train_data = next_data(train[i], batch_size = 64, shuffle = True)
  for e in range(num_epochs):
    loss_sum = 0
    for images in train_data:
      optimizer.zero_grad()
      images = images.to(device)
      out = model_classifier(images)
      
      ones = torch.ones(len(images), dtype = torch.int64)
      ones = ones.to(device)
      loss = criterion(out, ones)
      loss.backward()
      optimizer.step()
      loss_sum += loss
    print('Loss:', loss_sum, 'Epoch:', e + 1)


Loss: tensor(55.1438, grad_fn=<AddBackward0>) Epoch: 1
Loss: tensor(55.1401, grad_fn=<AddBackward0>) Epoch: 2
Loss: tensor(55.1404, grad_fn=<AddBackward0>) Epoch: 3
Loss: tensor(55.1409, grad_fn=<AddBackward0>) Epoch: 4
Loss: tensor(55.1413, grad_fn=<AddBackward0>) Epoch: 5
Loss: tensor(55.1374, grad_fn=<AddBackward0>) Epoch: 6
Loss: tensor(55.1373, grad_fn=<AddBackward0>) Epoch: 7
Loss: tensor(55.1384, grad_fn=<AddBackward0>) Epoch: 8
Loss: tensor(55.1336, grad_fn=<AddBackward0>) Epoch: 9
Loss: tensor(55.1401, grad_fn=<AddBackward0>) Epoch: 10
Loss: tensor(55.1698, grad_fn=<AddBackward0>) Epoch: 1
Loss: tensor(55.1674, grad_fn=<AddBackward0>) Epoch: 2
Loss: tensor(55.1667, grad_fn=<AddBackward0>) Epoch: 3


KeyboardInterrupt: ignored

In [None]:
import torch
import torch.nn as nn
import torch.functional as F

class Net(nn.Module):
  def __init__(self):
    super(Net, self).__init__()
    for _ in range(4):
      self.lin1 = nn.Linear(30, 4)
      self.relu = nn.ReLU()
      self.lin2 = nn.Linear(4, 10)

  def forward(self, x):
    x = self.relu(self.lin1(x))
    x = self.lin2(x)
    return x

model = Net()
print(model)

Net(
  (lin1): Linear(in_features=30, out_features=4, bias=True)
  (relu): ReLU()
  (lin2): Linear(in_features=4, out_features=10, bias=True)
)


In [None]:
def Diff_Loss(p_out, S_out):
  
  return torch.norm(torch.matmul(torch.transpose(p_out, 1, 0), S_out))**2

a = torch.randn(30, 10)
b = torch.randn(30, 12)  
print(Diff_Loss(a, b))

tensor(4060.5161)


# models can be appended in a list. We can train separate models for each task and append them in a list so they can be used during testing.

In [68]:
import torch
import torch
import torch.nn as nn
import torch.functional as F
import torch.optim as optim
import numpy as np
from copy import deepcopy

path = '/content/drive/My Drive/ColabNotebooks/results/save1.pth'

class Net(nn.Module):
  def __init__(self):
    super(Net, self).__init__()
    self.lin1 = nn.Linear(2, 10)
    self.relu = nn.ReLU()
    self.lin2 = nn.Linear(10, 20)

  def forward(self, x):
    x = self.relu(self.lin1(x))
    x = self.lin2(x)
    return x
xx = np.random.random((40, 2))
x = [[]for i in range(2)]
y = [[]for i in range(2)]

for j in range(40):
  if j % 2 == 0:
    x[0].append(xx[j])
    y[0].append(1)
  else:
    x[1].append(xx[j])
    y[1].append(0)

x[0] = torch.from_numpy(np.array(x[0]).astype(np.float32))
x[1] = torch.from_numpy(np.array(x[1]).astype(np.float32))
y[0] = torch.from_numpy(np.array(y[0]).astype(np.float32))
y[1] = torch.from_numpy(np.array(y[1]).astype(np.float32))
model = Net()
torch.save(model.state_dict(), path)
print(x[0].shape, x[1].shape, y[0].shape, y[1].shape, 'shape')
save = []
optimizer = optim.SGD(model.parameters(), lr = 1e-2)
criterion = nn.MSELoss()
for i in range(2):
  for e in range(10):
    optimizer.zero_grad()

    out = model(x[i])
    loss = criterion(out, y[i])
    loss.backward()
    optimizer.step()
    print('epoch:', e + 1, 'loss:', loss)
  save.append(deepcopy(model))
  model.load_state_dict(torch.load(path))


print(save)

torch.Size([20, 2]) torch.Size([20, 2]) torch.Size([20]) torch.Size([20]) shape
epoch: 1 loss: tensor(1.0214, grad_fn=<MseLossBackward>)
epoch: 2 loss: tensor(1.0166, grad_fn=<MseLossBackward>)
epoch: 3 loss: tensor(1.0118, grad_fn=<MseLossBackward>)
epoch: 4 loss: tensor(1.0071, grad_fn=<MseLossBackward>)
epoch: 5 loss: tensor(1.0023, grad_fn=<MseLossBackward>)
epoch: 6 loss: tensor(0.9977, grad_fn=<MseLossBackward>)
epoch: 7 loss: tensor(0.9930, grad_fn=<MseLossBackward>)
epoch: 8 loss: tensor(0.9884, grad_fn=<MseLossBackward>)
epoch: 9 loss: tensor(0.9838, grad_fn=<MseLossBackward>)
epoch: 10 loss: tensor(0.9792, grad_fn=<MseLossBackward>)
epoch: 1 loss: tensor(0.0971, grad_fn=<MseLossBackward>)
epoch: 2 loss: tensor(0.0965, grad_fn=<MseLossBackward>)
epoch: 3 loss: tensor(0.0959, grad_fn=<MseLossBackward>)
epoch: 4 loss: tensor(0.0953, grad_fn=<MseLossBackward>)
epoch: 5 loss: tensor(0.0947, grad_fn=<MseLossBackward>)
epoch: 6 loss: tensor(0.0942, grad_fn=<MseLossBackward>)
epoch: 

  return F.mse_loss(input, target, reduction=self.reduction)


In [69]:
for i in range(2):
  for p in save[i].parameters():
    print(p[0])

tensor([-0.1271,  0.0361], grad_fn=<SelectBackward>)
tensor(-0.2887, grad_fn=<SelectBackward>)
tensor([-0.1882, -0.0921,  0.0462,  0.1552,  0.0955, -0.1248, -0.0467,  0.1713,
        -0.0779,  0.1496], grad_fn=<SelectBackward>)
tensor(0.0259, grad_fn=<SelectBackward>)
tensor([-0.1271,  0.0361], grad_fn=<SelectBackward>)
tensor(-0.2887, grad_fn=<SelectBackward>)
tensor([-0.1882, -0.0923,  0.0388,  0.1550,  0.0955, -0.1324, -0.0467,  0.1713,
        -0.0815,  0.1496], grad_fn=<SelectBackward>)
tensor(0.0160, grad_fn=<SelectBackward>)


# A model has 2 modules. They can be trained separately for different tasks.

In [70]:
import torch
import torch
import torch.nn as nn
import torch.functional as F
import torch.optim as optim
import numpy as np
from copy import deepcopy


class Net(nn.Module):
  def __init__(self):
    super(Net, self).__init__()
    self.head = torch.nn.ModuleList()
    for _ in range(2):
      self.head.append(
        torch.nn.Sequential(
          nn.Linear(2, 10),
          nn.ReLU(),
          nn.Linear(10, 20)
          ))
  def forward(self, x, i):
    return self.head[i].forward(x)

xx = np.random.random((40, 2))
x = [[]for i in range(2)]
y = [[]for i in range(2)]

for j in range(40):
  if j % 2 == 0:
    x[0].append(xx[j])
    y[0].append(1)
  else:
    x[1].append(xx[j])
    y[1].append(0)

x[0] = torch.from_numpy(np.array(x[0]).astype(np.float32))
x[1] = torch.from_numpy(np.array(x[1]).astype(np.float32))
y[0] = torch.from_numpy(np.array(y[0]).astype(np.float32))
y[1] = torch.from_numpy(np.array(y[1]).astype(np.float32))
model = Net()
print(x[0].shape, x[1].shape, y[0].shape, y[1].shape, 'shape')
optimizer = optim.SGD(model.parameters(), lr = 1e-2)
criterion = nn.MSELoss()
for i in range(2):
  for e in range(10):
    optimizer.zero_grad()

    out = model(x[i], i)
    loss = criterion(out, y[i])
    loss.backward()
    optimizer.step()
    print('epoch:', e + 1, 'loss:', loss)


print(model)

torch.Size([20, 2]) torch.Size([20, 2]) torch.Size([20]) torch.Size([20]) shape
epoch: 1 loss: tensor(1.0727, grad_fn=<MseLossBackward>)
epoch: 2 loss: tensor(1.0675, grad_fn=<MseLossBackward>)
epoch: 3 loss: tensor(1.0624, grad_fn=<MseLossBackward>)
epoch: 4 loss: tensor(1.0573, grad_fn=<MseLossBackward>)
epoch: 5 loss: tensor(1.0522, grad_fn=<MseLossBackward>)
epoch: 6 loss: tensor(1.0472, grad_fn=<MseLossBackward>)
epoch: 7 loss: tensor(1.0422, grad_fn=<MseLossBackward>)
epoch: 8 loss: tensor(1.0373, grad_fn=<MseLossBackward>)
epoch: 9 loss: tensor(1.0324, grad_fn=<MseLossBackward>)
epoch: 10 loss: tensor(1.0275, grad_fn=<MseLossBackward>)
epoch: 1 loss: tensor(0.0468, grad_fn=<MseLossBackward>)
epoch: 2 loss: tensor(0.0466, grad_fn=<MseLossBackward>)
epoch: 3 loss: tensor(0.0464, grad_fn=<MseLossBackward>)
epoch: 4 loss: tensor(0.0462, grad_fn=<MseLossBackward>)
epoch: 5 loss: tensor(0.0460, grad_fn=<MseLossBackward>)
epoch: 6 loss: tensor(0.0458, grad_fn=<MseLossBackward>)
epoch: 

  return F.mse_loss(input, target, reduction=self.reduction)


In [71]:
for i in range(2):
  for p in save[i].parameters():
    print(p[0])

tensor([-0.1271,  0.0361], grad_fn=<SelectBackward>)
tensor(-0.2887, grad_fn=<SelectBackward>)
tensor([-0.1882, -0.0921,  0.0462,  0.1552,  0.0955, -0.1248, -0.0467,  0.1713,
        -0.0779,  0.1496], grad_fn=<SelectBackward>)
tensor(0.0259, grad_fn=<SelectBackward>)
tensor([-0.1271,  0.0361], grad_fn=<SelectBackward>)
tensor(-0.2887, grad_fn=<SelectBackward>)
tensor([-0.1882, -0.0923,  0.0388,  0.1550,  0.0955, -0.1324, -0.0467,  0.1713,
        -0.0815,  0.1496], grad_fn=<SelectBackward>)
tensor(0.0160, grad_fn=<SelectBackward>)
