<a href="https://colab.research.google.com/github/IGM-Students/ResNet-compression/blob/main/ResNet101_CIFAR_10_V2_Depthwise.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install tensorboardcolab
!pip install 'git+https://github.com/katsura-jp/pytorch-cosine-annealing-with-warmup'
!pip install 'git+https://github.com/seungjunlee96/DepthwiseSeparableConvolution_Pytorch.git'

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting tensorboardcolab
  Downloading tensorboardcolab-0.0.22.tar.gz (2.5 kB)
Building wheels for collected packages: tensorboardcolab
  Building wheel for tensorboardcolab (setup.py) ... [?25l[?25hdone
  Created wheel for tensorboardcolab: filename=tensorboardcolab-0.0.22-py3-none-any.whl size=3858 sha256=14266c2490bb788a5c85997668a5addecd16a751474f07aeb0fd22c8897acd15
  Stored in directory: /root/.cache/pip/wheels/e5/6b/92/99a181b543b45a45df4346bcdf01aac1f882fe447c63302878
Successfully built tensorboardcolab
Installing collected packages: tensorboardcolab
Successfully installed tensorboardcolab-0.0.22
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting git+https://github.com/katsura-jp/pytorch-cosine-annealing-with-warmup
  Cloning https://github.com/katsura-jp/pytorch-cosine-annealing-with-warmup to /tmp/pip-req-build-vkuf

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import matplotlib.pyplot as plt
import numpy as np

import torch
import torchvision
import torchvision.transforms as transforms
from torchvision.models import resnet101

import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from cosine_annealing_warmup import CosineAnnealingWarmupRestarts

from torchsummary import summary
from torch.utils.tensorboard import SummaryWriter
writer = SummaryWriter()

import pathlib
import os

In [None]:
%load_ext tensorboard

In [None]:
%tensorboard --logdir=runs

In [None]:
batch_size = 256

if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

In [None]:
transform_train = transforms.Compose([
    torchvision.transforms.RandomCrop(32, padding=4),
    torchvision.transforms.RandomHorizontalFlip(),
    torchvision.transforms.RandomRotation(20),
    transforms.ToTensor(),
    torchvision.transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    torchvision.transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])

train_dataset = torchvision.datasets.CIFAR10(root='./data/',
                                             train=True, 
                                             transform=transform_train,
                                             download=True)

train_loader = torch.utils.data.DataLoader(train_dataset, 
                                          batch_size=batch_size,
                                          shuffle=True, 
                                          num_workers=2)

test_dataset = torchvision.datasets.CIFAR10(root='./data', 
                                       train=False,
                                       download=True, 
                                       transform=transform_test)

test_loader = torch.utils.data.DataLoader(test_dataset,
                                         batch_size=batch_size,
                                         shuffle=False, 
                                         num_workers=2)

classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

In [None]:
#from DepthwiseSeparableConvolution import depthwise_separable_conv


class depthwise_separable_conv(nn.Module):
    def __init__(self, nin, nout, kernel_size = 3, padding = 1, stride = 1, bias=False):
        super(depthwise_separable_conv, self).__init__()
        self.depthwise = nn.Conv2d(nin, nin, kernel_size=kernel_size, padding=padding, stride=(stride, stride) , groups=nin, bias=bias)
        self.pointwise = nn.Conv2d(nin, nout, kernel_size=1, bias=bias)

    def forward(self, x):
        out = self.depthwise(x)
        out = self.pointwise(out)
        return out


model = resnet101(pretrained=False, num_classes=10)

model.conv1 = depthwise_separable_conv(3, 64, kernel_size = 3, padding = 1, bias=False)
model.maxpool = nn.Identity()

#Change to depthwise convolutional layer

#layer1
model.layer1[0].conv2 = depthwise_separable_conv(64, 64, kernel_size = 3, padding = 1, bias=False)
model.layer1[1].conv2 = depthwise_separable_conv(64, 64, kernel_size = 3, padding = 1, bias=False)
model.layer1[2].conv2 = depthwise_separable_conv(64, 64, kernel_size = 3, padding = 1, bias=False)
#layer2
model.layer2[0].conv2 = depthwise_separable_conv(128, 128, kernel_size = 3, padding = 1, stride=2, bias=False)
model.layer2[1].conv2 = depthwise_separable_conv(128, 128, kernel_size = 3, padding = 1, bias=False)
model.layer2[2].conv2 = depthwise_separable_conv(128, 128, kernel_size = 3, padding = 1, bias=False)
model.layer2[3].conv2 = depthwise_separable_conv(128, 128, kernel_size = 3, padding = 1, bias=False)
#layer3
model.layer3[0].conv2 = depthwise_separable_conv(256, 256, kernel_size = 3, padding = 1, stride=2, bias=False)
model.layer3[1].conv2 = depthwise_separable_conv(256, 256, kernel_size = 3, padding = 1, bias=False)
model.layer3[2].conv2 = depthwise_separable_conv(256, 256, kernel_size = 3, padding = 1, bias=False)
model.layer3[3].conv2 = depthwise_separable_conv(256, 256, kernel_size = 3, padding = 1, bias=False)
model.layer3[4].conv2 = depthwise_separable_conv(256, 256, kernel_size = 3, padding = 1, bias=False)
model.layer3[5].conv2 = depthwise_separable_conv(256, 256, kernel_size = 3, padding = 1, bias=False)
model.layer3[6].conv2 = depthwise_separable_conv(256, 256, kernel_size = 3, padding = 1, bias=False)
model.layer3[7].conv2 = depthwise_separable_conv(256, 256, kernel_size = 3, padding = 1, bias=False)
model.layer3[8].conv2 = depthwise_separable_conv(256, 256, kernel_size = 3, padding = 1, bias=False)
model.layer3[9].conv2 = depthwise_separable_conv(256, 256, kernel_size = 3, padding = 1, bias=False)
model.layer3[10].conv2 = depthwise_separable_conv(256, 256, kernel_size = 3, padding = 1, bias=False)
model.layer3[11].conv2 = depthwise_separable_conv(256, 256, kernel_size = 3, padding = 1, bias=False)
model.layer3[12].conv2 = depthwise_separable_conv(256, 256, kernel_size = 3, padding = 1, bias=False)
model.layer3[13].conv2 = depthwise_separable_conv(256, 256, kernel_size = 3, padding = 1, bias=False)
model.layer3[14].conv2 = depthwise_separable_conv(256, 256, kernel_size = 3, padding = 1, bias=False)
model.layer3[15].conv2 = depthwise_separable_conv(256, 256, kernel_size = 3, padding = 1, bias=False)
model.layer3[16].conv2 = depthwise_separable_conv(256, 256, kernel_size = 3, padding = 1, bias=False)
model.layer3[17].conv2 = depthwise_separable_conv(256, 256, kernel_size = 3, padding = 1, bias=False)
model.layer3[18].conv2 = depthwise_separable_conv(256, 256, kernel_size = 3, padding = 1, bias=False)
model.layer3[19].conv2 = depthwise_separable_conv(256, 256, kernel_size = 3, padding = 1, bias=False)
model.layer3[20].conv2 = depthwise_separable_conv(256, 256, kernel_size = 3, padding = 1, bias=False)
model.layer3[21].conv2 = depthwise_separable_conv(256, 256, kernel_size = 3, padding = 1, bias=False)
model.layer3[22].conv2 = depthwise_separable_conv(256, 256, kernel_size = 3, padding = 1, bias=False)
#layer4
model.layer4[0].conv2 = depthwise_separable_conv(512, 512, kernel_size = 3, padding = 1, stride=2, bias=False)
model.layer4[1].conv2 = depthwise_separable_conv(512, 512, kernel_size = 3, padding = 1, bias=False)
model.layer4[2].conv2 = depthwise_separable_conv(512, 512, kernel_size = 3, padding = 1, bias=False)


model.to(device)
model.train()

In [None]:
# for name, module in model.named_modules():
#     if isinstance(module, nn.Conv2d):
#         if 'downsample' in name:
#             module.stride = (1, 1)

In [None]:
summary(model.cuda(), (3, 32, 32))

In [None]:
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=1e-5)
scheduler = CosineAnnealingWarmupRestarts(optimizer,
                                          first_cycle_steps=200,
                                          cycle_mult=1.0,
                                          max_lr=0.1,
                                          min_lr=0.0001,
                                          warmup_steps=50,
                                          gamma=1.0)

In [None]:
from google.colab import drive
drive.mount('/content/drive')

save_dir = pathlib.Path('/content/drive/MyDrive/Colab_Notebooks/MGU/Resnet_CIFAR10_v2_DepthwiseSeparable')

if not os.path.isdir(save_dir):
    os.makedirs(save_dir)

In [None]:
last_epoch = -1;
last_file_name = ''
for file in os.listdir(save_dir):
    x = file.rsplit('.')[1]
    if int(x) > last_epoch:
        last_epoch = int(x)
        last_file_name = file
last_epoch

In [None]:
_best_acc = 0
for epoch in range(250):
    if (epoch > last_epoch):
        print("Started epoch:", epoch)
        _loss = 0.0
        model.train()
        for i_batch, (inputs, labels) in enumerate(train_loader):
            inputs = inputs.cuda()
            labels = labels.cuda()

            optimizer.zero_grad()        

            outputs = model(inputs)
            loss = loss_fn(outputs, labels)
            loss.backward()
            optimizer.step()

            _loss += loss.item()

        _train_loss = _loss

        _loss = 0.0
        correct = 0
        total = 0

        model.eval()
        with torch.no_grad():
          for i_batch, (inputs, labels) in enumerate(test_loader):
              inputs = inputs.cuda()
              labels = labels.cuda()
          
              outputs = model(inputs)
              loss = loss_fn(outputs, labels)
              
              _loss += loss.item()
              _, predicted = torch.max(outputs, 1)
              total += labels.size(0)
              correct += (predicted == labels).sum().item()   

        _test_loss = _loss
        _test_acc = correct/total

        print("Done: ", _train_loss, _test_loss, _test_acc)
        writer.add_scalar("Loss/train", _train_loss, epoch)
        writer.add_scalar("Loss/test", _test_loss, epoch)
        writer.add_scalar("Acc/test", _test_acc, epoch)

        if _test_acc > _best_acc:
            _best_acc = _test_acc
            model_name = 'resnet101.%s.h5' % epoch
            torch.save(model.state_dict(), os.path.join(save_dir, model_name))
    elif (epoch == last_epoch):
        # load model
        print(last_epoch)
        model.load_state_dict(torch.load(os.path.join(save_dir, last_file_name)))
    
    scheduler.step()