Unstructure Pruning에 대해 작성할 것이다.

# Basis

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms, datasets
from torch.autograd import Variable
from torch.nn.parameter import Parameter
import numpy as np
import os, math, time
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
class ToyNet(nn.Module):
  def __init__(self):
    super(ToyNet, self).__init__()
    self.layer1 = nn.Sequential(
        nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3, stride=1, padding=1, bias=False),
        nn.BatchNorm2d(16),
        nn.ReLU(inplace=True)
    )
    self.layer2 = nn.Sequential(
        nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, stride=2, padding=1, bias=False),
        nn.BatchNorm2d(32),
        nn.ReLU(inplace=True)
    )
    self.layer3 = nn.Sequential(
        nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=2, padding=1, bias=False),
        nn.BatchNorm2d(64),
        nn.ReLU(inplace=True)
    )
    self.fc = nn.Sequential(
        nn.Linear(8*8*64, 10)
    )
    for m in self.modules():
      if isinstance(m, nn.Conv2d):
        n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
        m.weight.data.normal_(0, math.sqrt(2./n))
      elif isinstance(m, nn.BatchNorm2d):
        m.weight.data.fill_(1)
        m.bias.data.zero_()

  def forward(self, x):
    out = self.layer1(x) #32x32
    out = self.layer2(out) #16x16
    out = self.layer3(out) #8x8

    out = out.view(x.size(0), -1)
    out = self.fc(out)
    
    return out

In [None]:
from google.colab import drive
drive.mount('/content/drive')

!ls '/content/drive/Shareddrives/Data/Internship_study_2/Data'

normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
transform_train = transforms.Compose([transforms.RandomHorizontalFlip(), transforms.RandomCrop(32, padding=4), transforms.ToTensor(), normalize])
transform_valid = transforms.Compose([transforms.ToTensor(), normalize])

trainset = datasets.CIFAR10('/content/drive/Shareddrives/Data/Internship_study_2/Data', train=True, transform=transform_train, download=True)
validset = datasets.CIFAR10('/content/drive/Shareddrives/Data/Internship_study_2/Data', train=False, transform=transform_valid, download=True)

trainloader = torch.utils.data.DataLoader(trainset, batch_size=256, shuffle=True)
validloader = torch.utils.data.DataLoader(validset, batch_size=256, shuffle=True)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
digit-recognizer  pre_trained_model
Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to /content/drive/Shareddrives/Data/Internship_study_2/Data/cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting /content/drive/Shareddrives/Data/Internship_study_2/Data/cifar-10-python.tar.gz to /content/drive/Shareddrives/Data/Internship_study_2/Data
Files already downloaded and verified


# Utils

In [None]:
class AverageMeter(object):
  def __init__(self, name, fmt=':f'):
    self.name = name
    self.fmt = fmt
    self.reset()
  
  def reset(self):
    self.val = 0
    self.avg = 0
    self.sum = 0
    self.count = 0
  
  def update(self, val, n=1):
    self.val = val
    self.sum += val * n
    self.count += n
    self.avg = self.sum / self.count

  def __str__(self):
    fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})'
    return fmtstr.format(**self.__dict__)

class ProgressMeter(object):
  def __init__(self, num_batches, *meters, prefix=""):
    self.batch_fmtstr = self._get_batch_fmtstr(num_batches)
    self.meters = meters
    self.prefix = prefix

  def print(self, batch):
    entries = [self.prefix + self.batch_fmtstr.format(batch)]
    entries += [str(meter) for meter in self.meters]
    print('\t'.join(entries))

  def _get_batch_fmtstr(self, num_batches):
    num_digits = len(str(num_batches // 1))
    fmt = '{:' + str(num_digits) + 'd}'
    return '[' + fmt + '/' + fmt.format(num_batches) + ']'

def adjust_learning_rate(optimi, epoch, lr):
  if epoch>=100:
    0.02
  for param_group in optimi.param_groups:
    param_group['lr'] = lr

def accuracy(output, target, topk=(1,)):
  with torch.no_grad():
    maxk = max(topk)
    batch_size = target.size(0)
    _, pred = output.topk(maxk, 1, True, True)
    pred = pred.t()
    correct = pred.eq(target.view(1,-1).expand_as(pred))

    res = []
    for k in topk:
      correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True)
      res.append(correct_k.mul_(100.0 / batch_size))
    return res

# Define train, valid

In [None]:
def train(train_loader, **kwargs):
  epoch = kwargs.get('epoch')
  model = kwargs.get('model')
  criterion = kwargs.get('criterion')
  optimizer = kwargs.get('optimizer')

  batch_time = AverageMeter('Time', ':6.3f')
  data_time = AverageMeter('Data', ':6.3f')
  losses = AverageMeter('Loss', ':.4e')
  top1 = AverageMeter('Acc@1', ':6.2f')
  top5 = AverageMeter('Acc@5', ':6.2f')
  progress = ProgressMeter(len(train_loader), batch_time, data_time, losses, top1, top5, prefix="Epoch:[{}]".format(epoch))
  
  model.train()

  end = time.time()

  for i, (input, target) in enumerate(train_loader):
    data_time.update(time.time() - end)
    input = Variable(input).cuda()
    target = Variable(target).cuda()

    output = model(input)
    loss = criterion(output, target)

    acc1, acc5 = accuracy(output, target, topk=(1,5))
    losses.update(loss.item(), input.size(0))
    top1.update(acc1[0], input.size(0))
    top5.update(acc5[0], input.size(0))

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    batch_time.update(time.time() - end)

    if i % 100==0:
      progress.print(i)

  print('====> Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}'.format(top1=top1, top5=top5))

  return top1.avg, top5.avg

def validate(val_loader, model, criterion):
  batch_time = AverageMeter('Time', ':6.3f')
  losses = AverageMeter('Loss', ':.4e')
  top1 = AverageMeter('Acc@1', ':6.2f')
  top5 = AverageMeter('Acc@5', ':6.2f')
  progress = ProgressMeter(len(val_loader), batch_time, losses, top1, top5, prefix='Test: ')

  model.eval()

  with torch.no_grad():
    end = time.time()
    
    for i, (input, target) in enumerate(val_loader):
      input = Variable(input).cuda()
      target = target.cuda(non_blocking=True)

      output = model(input)
      loss = criterion(output, target)

      acc1, acc5 = accuracy(output, target, topk=(1,5))
      losses.update(loss.item(), input.size(0))
      top1.update(acc1[0], input.size(0))
      top5.update(acc5[0], input.size(0))

      batch_time.update(time.time() - end)

      if i % 100 == 0:
        progress.print(i)

      end = time.time()

    print('====> Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}'.format(top1=top1, top5=top5))

  return top1.avg, top5.avg

# Implement

기존 모델에서 mask를 추가한 class를 새로 만드는 것은 나중에 확장을 고려하였을 때 불필요한 점이 많을 것으로 예상된다.

대신 model를 input으로 주면 mask를 씌어주는 함수를 만들기로 하였다.

Custum Conv2d 만드는 법

In [None]:
class MaskConv2d(nn.Conv2d):
    def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros', device=None, dtype=None,threshold=0):
        super(MaskConv2d, self).__init__(in_channels, out_channels, kernel_size, stride, padding,dilation,groups,bias,padding_mode)
        self.mask = torch.zeros_like(self.weight,requires_grad=False,dtype=torch.float)
        # if torch.cuda.is_available():
        #     self.mask=self.mask.cuda()
    def forward(self, input):
        # init에서 cuda로 변경하면 오류 발생 왜그러지?
        self.weight=self.weight.cuda()
        self.mask=self.mask.cuda()
        self.bias=self.bias.cuda()
        return self._conv_forward(input,self.weight*self.mask,self.bias)

In [None]:
def Conv2MConv(model):
    for name, child in model.named_children():
        if isinstance(child, nn.Conv2d):
            model._modules[name]=MaskConv2d(child.in_channels,child.out_channels,child.kernel_size,child.stride,child.padding)
        elif isinstance(child, nn.Sequential):
            for sname, schild in child.named_children():
                if isinstance(schild, nn.Conv2d):
                    model._modules[name]._modules[sname]=MaskConv2d(schild.in_channels,schild.out_channels,schild.kernel_size,schild.stride,schild.padding)
    print("---Masking Finished---")
    print(model)            

In [None]:
model = ToyNet().cuda()
print(model)
print()
Conv2MConv(model)

ToyNet(
  (layer1): Sequential(
    (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
  )
  (layer2): Sequential(
    (0): Conv2d(16, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
  )
  (layer3): Sequential(
    (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
  )
  (fc): Sequential(
    (0): Linear(in_features=4096, out_features=10, bias=True)
  )
)

---Masking Finished---
ToyNet(
  (layer1): Sequential(
    (0): MaskConv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_runnin

# Train & Test

In [None]:
maskedmodel = ToyNet()
Conv2MConv(maskedmodel)
# cuda 문제 발생
# maskConv2의 forward에서 self.weigt=self.weight.cuda로 변경하고, maskedmodel.cuda()를 이후에 해주니 해결 왜그런걸까?
maskedmodel.cuda()
# optimizer = optim.Adam([param for name, param in maskedmodel.named_parameters() if 'mask' not in name]) # list of all parameter except mask tensor
optimizer = optim.Adam(maskedmodel.parameters())
criterion = nn.CrossEntropyLoss().cuda()

---Masking Finished---
ToyNet(
  (layer1): Sequential(
    (0): MaskConv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
  )
  (layer2): Sequential(
    (0): MaskConv2d(16, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
  )
  (layer3): Sequential(
    (0): MaskConv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
  )
  (fc): Sequential(
    (0): Linear(in_features=4096, out_features=10, bias=True)
  )
)


모든 weight에 대한 mask를 0으로 설정해주어 학습이 되지않고 있는 모습이다.

In [None]:
lr = 1e-4
best_acc1 = 0
best_acc5 = 0

for epoch in range(5):
  adjust_learning_rate(optimizer, epoch, lr)

  print("Epoch : {}, lr : {}".format(epoch, optimizer.param_groups[0]['lr']))
  print('===> [ Training ]')
  acc1_train, acc5_train = train(trainloader,
                                 epoch=epoch, model=maskedmodel,
                                 criterion=criterion, optimizer=optimizer)
  
  print('===> [ Validation ]')
  acc1_valid, acc5_valid = validate(validloader, maskedmodel, criterion)

  best_acc1 = max(acc1_valid, best_acc1)
  best_acc5 = max(acc5_valid, best_acc5)

Epoch : 0, lr : 0.0001
===> [ Training ]
Epoch:[0][  0/196]	Time  0.411 ( 0.411)	Data  0.158 ( 0.158)	Loss 2.3030e+00 (2.3030e+00)	Acc@1   8.98 (  8.98)	Acc@5  48.44 ( 48.44)
Epoch:[0][100/196]	Time 13.458 ( 6.980)	Data 13.441 ( 6.960)	Loss 2.3032e+00 (2.3027e+00)	Acc@1   9.38 (  9.94)	Acc@5  49.61 ( 49.64)
====> Acc@1 9.922 Acc@5 49.810
===> [ Validation ]
Test: [ 0/40]	Time  0.081 ( 0.081)	Loss 2.3027e+00 (2.3027e+00)	Acc@1   8.98 (  8.98)	Acc@5  50.00 ( 50.00)
====> Acc@1 10.000 Acc@5 50.000
Epoch : 1, lr : 0.0001
===> [ Training ]
Epoch:[1][  0/196]	Time  0.137 ( 0.137)	Data  0.120 ( 0.120)	Loss 2.3021e+00 (2.3021e+00)	Acc@1  14.06 ( 14.06)	Acc@5  53.91 ( 53.91)
Epoch:[1][100/196]	Time 13.209 ( 6.686)	Data 13.192 ( 6.668)	Loss 2.3025e+00 (2.3027e+00)	Acc@1  10.55 ( 10.07)	Acc@5  46.88 ( 49.68)
====> Acc@1 10.000 Acc@5 49.856
===> [ Validation ]
Test: [ 0/40]	Time  0.082 ( 0.082)	Loss 2.3025e+00 (2.3025e+00)	Acc@1   8.59 (  8.59)	Acc@5  49.22 ( 49.22)
====> Acc@1 10.000 Acc@5 50.000