<a href="https://colab.research.google.com/github/1995subhankar1995/Torch-Tutorials/blob/main/A_model_has_mul_modules.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        optimizer.zero_grad()
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            if phase == 'train':
                scheduler.step()

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model


#### Finetuning the convnet ####
# Load a pretrained model and reset final fully connected layer.

model = models.resnet18(pretrained=True)
num_ftrs = model.fc.in_features
# Here the size of each output sample is set to 2.
# Alternatively, it can be generalized to nn.Linear(num_ftrs, len(class_names)).
model.fc = nn.Linear(num_ftrs, 2)

model = model.to(device)

criterion = nn.CrossEntropyLoss()

# Observe that all parameters are being optimized
optimizer = optim.SGD(model.parameters(), lr=0.001)

# StepLR Decays the learning rate of each parameter group by gamma every step_size epochs
# Decay LR by a factor of 0.1 every 7 epochs
# Learning rate scheduling should be applied after optimizer’s update
# e.g., you should write your code this way:
# for epoch in range(100):
#     train(...)
#     validate(...)
#     scheduler.step()

step_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

model = train_model(model, criterion, optimizer, step_lr_scheduler, num_epochs=25)


In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np
import torchvision
import os
import sys

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
num_epochs = 10
batch_size = 16
lr = 1e-3

transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5),(0.5, 0.5, 0.5))])

train_dataset = torchvision.datasets.CIFAR10(root = './data', train = True, download = True, transform = transform)
test_dataset = torchvision.datasets.CIFAR10(root = './data', train = False, download = True, transform = transform)

Files already downloaded and verified
Files already downloaded and verified


In [None]:
train_loader = torch.utils.data.DataLoader(train_dataset)
num_classes = 10
train = [[] for i in range(num_classes)]
for i in range(num_classes):
  for image, label in train_loader:
    image = image.view(3, 32, 32).numpy()
    if i == label.numpy()[0]:
      train[i].append(image)
  train[i] = torch.tensor(train[i])


In [None]:
for i in range(num_classes):
  print('Shape:', train[i].shape, 'Label:', i)

Shape: torch.Size([5000, 3, 32, 32]) Label: 0
Shape: torch.Size([5000, 3, 32, 32]) Label: 1
Shape: torch.Size([5000, 3, 32, 32]) Label: 2
Shape: torch.Size([5000, 3, 32, 32]) Label: 3
Shape: torch.Size([5000, 3, 32, 32]) Label: 4
Shape: torch.Size([5000, 3, 32, 32]) Label: 5
Shape: torch.Size([5000, 3, 32, 32]) Label: 6
Shape: torch.Size([5000, 3, 32, 32]) Label: 7
Shape: torch.Size([5000, 3, 32, 32]) Label: 8
Shape: torch.Size([5000, 3, 32, 32]) Label: 9


In [None]:
def next_data(data, batch_size = 64, shuffle = True):
  data = torch.utils.data.DataLoader(data, batch_size, shuffle = True)
  return data

In [None]:
class Classifier(nn.Module):
  def __init__(self):
    super(Classifier, self).__init__()
    self.conv1 = nn.Conv2d(3, 6, 5)
    self.maxpool = nn.MaxPool2d(2, 2)
    self.conv2 = nn.Conv2d(6, 16, 5)
    self.lin1 = nn.Linear(5 * 16 * 5, 120)
    self.lin2 = nn.Linear(120, 20)
    self.drop = nn.Dropout(p = 0.5)
    self.lin3 = nn.Linear(20, 2)
    self.soft = nn.LogSoftmax(dim = 1)
    

  def forward(self, x):
    x = F.relu(self.conv1(x))
    x = self.maxpool(x)
    x = F.relu(self.conv2(x))
    x = self.maxpool(x)
    x = x.view(-1, 16 * 5 * 5)
    x = F.relu(self.lin1(x))
    x = F.relu(self.lin2(x))
    #x = self.drop(x)
    x = self.lin3(x)
    x = self.soft(x)
    return x
model_classifier = Classifier()
model_classifier.to(device)
print(model_classifier)

Classifier(
  (conv1): Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1))
  (maxpool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (lin1): Linear(in_features=400, out_features=120, bias=True)
  (lin2): Linear(in_features=120, out_features=20, bias=True)
  (drop): Dropout(p=0.5, inplace=False)
  (lin3): Linear(in_features=20, out_features=2, bias=True)
  (soft): LogSoftmax(dim=1)
)


In [None]:
batch_size = 64
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr = 1e-2)
model_classifier.train()
for i in range(num_classes):
  train_data = next_data(train[i], batch_size = 64, shuffle = True)
  for e in range(num_epochs):
    loss_sum = 0
    for images in train_data:
      optimizer.zero_grad()
      images = images.to(device)
      out = model_classifier(images)
      
      ones = torch.ones(len(images), dtype = torch.int64)
      ones = ones.to(device)
      loss = criterion(out, ones)
      loss.backward()
      optimizer.step()
      loss_sum += loss
    print('Loss:', loss_sum, 'Epoch:', e + 1)


Loss: tensor(55.1438, grad_fn=<AddBackward0>) Epoch: 1
Loss: tensor(55.1401, grad_fn=<AddBackward0>) Epoch: 2
Loss: tensor(55.1404, grad_fn=<AddBackward0>) Epoch: 3
Loss: tensor(55.1409, grad_fn=<AddBackward0>) Epoch: 4
Loss: tensor(55.1413, grad_fn=<AddBackward0>) Epoch: 5
Loss: tensor(55.1374, grad_fn=<AddBackward0>) Epoch: 6
Loss: tensor(55.1373, grad_fn=<AddBackward0>) Epoch: 7
Loss: tensor(55.1384, grad_fn=<AddBackward0>) Epoch: 8
Loss: tensor(55.1336, grad_fn=<AddBackward0>) Epoch: 9
Loss: tensor(55.1401, grad_fn=<AddBackward0>) Epoch: 10
Loss: tensor(55.1698, grad_fn=<AddBackward0>) Epoch: 1
Loss: tensor(55.1674, grad_fn=<AddBackward0>) Epoch: 2
Loss: tensor(55.1667, grad_fn=<AddBackward0>) Epoch: 3


KeyboardInterrupt: ignored

In [None]:
import torch
import torch.nn as nn
import torch.functional as F

class Net(nn.Module):
  def __init__(self):
    super(Net, self).__init__()
    for _ in range(4):
      self.lin1 = nn.Linear(30, 4)
      self.relu = nn.ReLU()
      self.lin2 = nn.Linear(4, 10)

  def forward(self, x):
    x = self.relu(self.lin1(x))
    x = self.lin2(x)
    return x

model = Net()
print(model)

Net(
  (lin1): Linear(in_features=30, out_features=4, bias=True)
  (relu): ReLU()
  (lin2): Linear(in_features=4, out_features=10, bias=True)
)


In [None]:
def Diff_Loss(p_out, S_out):
  
  return torch.norm(torch.matmul(torch.transpose(p_out, 1, 0), S_out))**2

a = torch.randn(30, 10)
b = torch.randn(30, 12)  
print(Diff_Loss(a, b))

tensor(4060.5161)


In [2]:
a = [[] for i in range(3)]
print(a[0])

[]


In [57]:
import torch
import torch
import torch.nn as nn
import torch.functional as F
import torch.optim as optim
import numpy as np
from copy import deepcopy

path = '/content/drive/My Drive/ColabNotebooks/results/save1.pth'

class Net(nn.Module):
  def __init__(self):
    super(Net, self).__init__()
    self.lin1 = nn.Linear(2, 10)
    self.relu = nn.ReLU()
    self.lin2 = nn.Linear(10, 20)

  def forward(self, x):
    x = self.relu(self.lin1(x))
    x = self.lin2(x)
    return x
xx = np.random.random((40, 2))
x = [[]for i in range(2)]
y = [[]for i in range(2)]

for j in range(40):
  if j % 2 == 0:
    x[0].append(xx[j])
    y[0].append(1)
  else:
    x[1].append(xx[j])
    y[1].append(0)

x[0] = torch.from_numpy(np.array(x[0]).astype(np.float32))
x[1] = torch.from_numpy(np.array(x[1]).astype(np.float32))
y[0] = torch.from_numpy(np.array(y[0]).astype(np.float32))
y[1] = torch.from_numpy(np.array(y[1]).astype(np.float32))
model = Net()
torch.save(model.state_dict(), path)
print(x[0].shape, x[1].shape, y[0].shape, y[1].shape, 'shape')
save = []
optimizer = optim.SGD(model.parameters(), lr = 1e-2)
criterion = nn.MSELoss()
for i in range(2):
  for e in range(100):
    optimizer.zero_grad()

    out = model(x[i])
    loss = criterion(out, y[i])
    loss.backward()
    optimizer.step()
    print('epoch:', e + 1, 'loss:', loss)
  save.append(deepcopy(model))
  model.load_state_dict(torch.load(path))


print(save)

torch.Size([20, 2]) torch.Size([20, 2]) torch.Size([20]) torch.Size([20]) shape
epoch: 1 loss: tensor(1.0908, grad_fn=<MseLossBackward>)
epoch: 2 loss: tensor(1.0839, grad_fn=<MseLossBackward>)
epoch: 3 loss: tensor(1.0771, grad_fn=<MseLossBackward>)
epoch: 4 loss: tensor(1.0703, grad_fn=<MseLossBackward>)
epoch: 5 loss: tensor(1.0636, grad_fn=<MseLossBackward>)
epoch: 6 loss: tensor(1.0569, grad_fn=<MseLossBackward>)
epoch: 7 loss: tensor(1.0503, grad_fn=<MseLossBackward>)
epoch: 8 loss: tensor(1.0438, grad_fn=<MseLossBackward>)
epoch: 9 loss: tensor(1.0373, grad_fn=<MseLossBackward>)
epoch: 10 loss: tensor(1.0309, grad_fn=<MseLossBackward>)
epoch: 11 loss: tensor(1.0245, grad_fn=<MseLossBackward>)
epoch: 12 loss: tensor(1.0182, grad_fn=<MseLossBackward>)
epoch: 13 loss: tensor(1.0119, grad_fn=<MseLossBackward>)
epoch: 14 loss: tensor(1.0057, grad_fn=<MseLossBackward>)
epoch: 15 loss: tensor(0.9995, grad_fn=<MseLossBackward>)
epoch: 16 loss: tensor(0.9934, grad_fn=<MseLossBackward>)
e

  return F.mse_loss(input, target, reduction=self.reduction)


tensor(0.0665, grad_fn=<MseLossBackward>)
epoch: 17 loss: tensor(0.0660, grad_fn=<MseLossBackward>)
epoch: 18 loss: tensor(0.0656, grad_fn=<MseLossBackward>)
epoch: 19 loss: tensor(0.0652, grad_fn=<MseLossBackward>)
epoch: 20 loss: tensor(0.0647, grad_fn=<MseLossBackward>)
epoch: 21 loss: tensor(0.0643, grad_fn=<MseLossBackward>)
epoch: 22 loss: tensor(0.0639, grad_fn=<MseLossBackward>)
epoch: 23 loss: tensor(0.0634, grad_fn=<MseLossBackward>)
epoch: 24 loss: tensor(0.0630, grad_fn=<MseLossBackward>)
epoch: 25 loss: tensor(0.0626, grad_fn=<MseLossBackward>)
epoch: 26 loss: tensor(0.0622, grad_fn=<MseLossBackward>)
epoch: 27 loss: tensor(0.0618, grad_fn=<MseLossBackward>)
epoch: 28 loss: tensor(0.0614, grad_fn=<MseLossBackward>)
epoch: 29 loss: tensor(0.0610, grad_fn=<MseLossBackward>)
epoch: 30 loss: tensor(0.0606, grad_fn=<MseLossBackward>)
epoch: 31 loss: tensor(0.0602, grad_fn=<MseLossBackward>)
epoch: 32 loss: tensor(0.0598, grad_fn=<MseLossBackward>)
epoch: 33 loss: tensor(0.0594,

In [54]:
for i in range(2):
  for p in save[i].parameters():
    print(p)

Parameter containing:
tensor([[ 0.2914, -0.1130],
        [-0.2879,  0.4710],
        [-0.5957,  0.1354],
        [ 0.0294, -0.1331],
        [ 0.5959, -0.1033],
        [ 0.5983,  0.2800],
        [-0.4025, -0.0251],
        [ 0.0883, -0.3096],
        [ 0.0556,  0.1948],
        [-0.0578, -0.5518]], requires_grad=True)
Parameter containing:
tensor([ 0.5731,  0.1377,  0.2841, -0.5811, -0.5374, -0.3452,  0.3281, -0.2634,
        -0.1337,  0.1244], requires_grad=True)
Parameter containing:
tensor([[ 0.1177, -0.1863, -0.2026, -0.2303, -0.2994, -0.1909,  0.2157, -0.0466,
          0.1999,  0.1740],
        [-0.1457, -0.1335, -0.0319,  0.1815, -0.2844,  0.2966,  0.3079,  0.0892,
         -0.1505,  0.0631],
        [ 0.1753, -0.1600,  0.1418, -0.1915, -0.1387,  0.3213, -0.2342, -0.2125,
         -0.0708, -0.1614],
        [ 0.3205,  0.0806, -0.1084, -0.0817, -0.1881, -0.2159, -0.2609,  0.1877,
         -0.1884, -0.1653],
        [-0.1631,  0.1972,  0.0872, -0.0069,  0.0598, -0.0216,  0.1231

In [67]:
import torch
import torch
import torch.nn as nn
import torch.functional as F
import torch.optim as optim
import numpy as np
from copy import deepcopy


class Net(nn.Module):
  def __init__(self):
    super(Net, self).__init__()
    self.head = torch.nn.ModuleList()
    for _ in range(2):
      self.head.append(
        torch.nn.Sequential(
          nn.Linear(2, 10),
          nn.ReLU(),
          nn.Linear(10, 20)
          ))
  def forward(self, x, i):
    return self.head[i].forward(x)

xx = np.random.random((40, 2))
x = [[]for i in range(2)]
y = [[]for i in range(2)]

for j in range(40):
  if j % 2 == 0:
    x[0].append(xx[j])
    y[0].append(1)
  else:
    x[1].append(xx[j])
    y[1].append(0)

x[0] = torch.from_numpy(np.array(x[0]).astype(np.float32))
x[1] = torch.from_numpy(np.array(x[1]).astype(np.float32))
y[0] = torch.from_numpy(np.array(y[0]).astype(np.float32))
y[1] = torch.from_numpy(np.array(y[1]).astype(np.float32))
model = Net()
print(x[0].shape, x[1].shape, y[0].shape, y[1].shape, 'shape')
optimizer = optim.SGD(model.parameters(), lr = 1e-2)
criterion = nn.MSELoss()
for i in range(2):
  for e in range(100):
    optimizer.zero_grad()

    out = model(x[i], i)
    loss = criterion(out, y[i])
    loss.backward()
    optimizer.step()
    print('epoch:', e + 1, 'loss:', loss)


print(model)

torch.Size([20, 2]) torch.Size([20, 2]) torch.Size([20]) torch.Size([20]) shape
epoch: 1 loss: tensor(1.1015, grad_fn=<MseLossBackward>)
epoch: 2 loss: tensor(1.0960, grad_fn=<MseLossBackward>)
epoch: 3 loss: tensor(1.0906, grad_fn=<MseLossBackward>)
epoch: 4 loss: tensor(1.0852, grad_fn=<MseLossBackward>)
epoch: 5 loss: tensor(1.0799, grad_fn=<MseLossBackward>)
epoch: 6 loss: tensor(1.0746, grad_fn=<MseLossBackward>)
epoch: 7 loss: tensor(1.0693, grad_fn=<MseLossBackward>)
epoch: 8 loss: tensor(1.0641, grad_fn=<MseLossBackward>)
epoch: 9 loss: tensor(1.0589, grad_fn=<MseLossBackward>)
epoch: 10 loss: tensor(1.0538, grad_fn=<MseLossBackward>)
epoch: 11 loss: tensor(1.0487, grad_fn=<MseLossBackward>)
epoch: 12 loss: tensor(1.0437, grad_fn=<MseLossBackward>)
epoch: 13 loss: tensor(1.0387, grad_fn=<MseLossBackward>)
epoch: 14 loss: tensor(1.0337, grad_fn=<MseLossBackward>)
epoch: 15 loss: tensor(1.0288, grad_fn=<MseLossBackward>)
epoch: 16 loss: tensor(1.0239, grad_fn=<MseLossBackward>)
e

  return F.mse_loss(input, target, reduction=self.reduction)


tensor(0.0602, grad_fn=<MseLossBackward>)
epoch: 12 loss: tensor(0.0599, grad_fn=<MseLossBackward>)
epoch: 13 loss: tensor(0.0596, grad_fn=<MseLossBackward>)
epoch: 14 loss: tensor(0.0594, grad_fn=<MseLossBackward>)
epoch: 15 loss: tensor(0.0591, grad_fn=<MseLossBackward>)
epoch: 16 loss: tensor(0.0588, grad_fn=<MseLossBackward>)
epoch: 17 loss: tensor(0.0585, grad_fn=<MseLossBackward>)
epoch: 18 loss: tensor(0.0582, grad_fn=<MseLossBackward>)
epoch: 19 loss: tensor(0.0579, grad_fn=<MseLossBackward>)
epoch: 20 loss: tensor(0.0577, grad_fn=<MseLossBackward>)
epoch: 21 loss: tensor(0.0574, grad_fn=<MseLossBackward>)
epoch: 22 loss: tensor(0.0571, grad_fn=<MseLossBackward>)
epoch: 23 loss: tensor(0.0569, grad_fn=<MseLossBackward>)
epoch: 24 loss: tensor(0.0566, grad_fn=<MseLossBackward>)
epoch: 25 loss: tensor(0.0563, grad_fn=<MseLossBackward>)
epoch: 26 loss: tensor(0.0561, grad_fn=<MseLossBackward>)
epoch: 27 loss: tensor(0.0558, grad_fn=<MseLossBackward>)
epoch: 28 loss: tensor(0.0555,

In [65]:
for i in range(2):
  for p in save[i].parameters():
    print(p)

Parameter containing:
tensor([[-0.6287,  0.5508],
        [ 0.4756, -0.1229],
        [-0.0266,  0.4165],
        [ 0.7792,  0.0266],
        [ 0.5805, -0.4286],
        [ 0.5344, -0.6218],
        [ 0.0021,  0.5801],
        [-0.2372, -0.4829],
        [ 0.4925,  0.0033],
        [-0.6928,  0.6090]], requires_grad=True)
Parameter containing:
tensor([ 0.2154, -0.1954,  0.0571, -0.1987,  0.0665,  0.0586, -0.3737, -0.0659,
         0.0250, -0.6591], requires_grad=True)
Parameter containing:
tensor([[ 0.3075,  0.1309, -0.1176,  0.1279, -0.1492,  0.1504,  0.0225,  0.0463,
         -0.2329,  0.0400],
        [-0.2673, -0.0536, -0.2192, -0.1399,  0.1230, -0.0732, -0.0473, -0.2437,
         -0.0604,  0.0715],
        [-0.1268, -0.0397,  0.2849,  0.2475,  0.2983,  0.3058, -0.1552, -0.2309,
         -0.0852,  0.2704],
        [-0.2189, -0.1722, -0.1644, -0.1820,  0.1493,  0.2105, -0.0689,  0.1194,
          0.1019,  0.1048],
        [-0.0004, -0.1172, -0.0338,  0.0727,  0.1329,  0.0718,  0.0513