In [1]:
import math
import torch.nn as nn
import torch.nn.init as init

import argparse
import os
import shutil
import time
from tqdm import tqdm
import gc

import torch
import torch.nn as nn
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.optim
import torch.utils.data
import torchvision.transforms as transforms
import torchvision.datasets as datasets

In [2]:
class VGG(nn.Module):
    def __init__(self, features):
        super(VGG, self).__init__()
        self.features = features
        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(512, 512),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(512, 512),
            nn.ReLU(True),
            nn.Linear(512, 10),
        )
        # Initialize weights (kernels with normal randoms, bias with 0s)
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
                m.bias.data.zero_()

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x

def make_layers(cfg, batch_norm=False):
    layers = []
    in_channels = 3
    for v in cfg:
        if v == 'M':
            layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
        else:
            conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
            if batch_norm:
                layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
            else:
                layers += [conv2d, nn.ReLU(inplace=True)]
            in_channels = v
    return nn.Sequential(*layers)

cfg = {
    'A': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'B': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'D': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
    'E': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M',
          512, 512, 512, 512, 'M'],
}

def vgg19():
    """VGG 19-layer model (configuration "E")"""
    return VGG(make_layers(cfg['E']))

In [3]:
model = vgg19()
# model.features = torch.nn.DataParallel(model.features)
model = model.to('cuda')

In [6]:
batch_size = 512
workers = 4

normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

# train_loader = torch.utils.data.DataLoader(
#     datasets.CIFAR100(root='./data', train=True, transform=transforms.Compose([
#         transforms.RandomHorizontalFlip(),
#         transforms.RandomCrop(32, 4),
#         transforms.ToTensor(),
#         normalize,
#     ]), download=True),
#     batch_size=batch_size, shuffle=True,
#     num_workers=workers, pin_memory=True)

# val_loader = torch.utils.data.DataLoader(
#     datasets.CIFAR100(root='./data', train=False, transform=transforms.Compose([
#         transforms.ToTensor(),
#         normalize,
#     ])),
#     batch_size=batch_size, shuffle=False,
#     num_workers=workers, pin_memory=True)

train_loader = torch.utils.data.DataLoader(
    datasets.SVHN(root='./data', split='train', transform=transforms.Compose([
        transforms.RandomHorizontalFlip(),
        transforms.RandomCrop(32, 4),
        transforms.ToTensor(),
        normalize,
    ]), download=True),
    batch_size=batch_size, shuffle=True,
    num_workers=workers, pin_memory=True)

val_loader = torch.utils.data.DataLoader(
    datasets.SVHN(root='./data', split='test', transform=transforms.Compose([
        transforms.ToTensor(),
        normalize,
    ]), download=True),
    batch_size=batch_size, shuffle=False,
    num_workers=workers, pin_memory=True)

Using downloaded and verified file: ./data/train_32x32.mat
Downloading http://ufldl.stanford.edu/housenumbers/test_32x32.mat to ./data/test_32x32.mat


100%|██████████| 64275384/64275384 [08:04<00:00, 132585.71it/s] 


In [7]:
# Custom Accuracy Function
def compute_accuracy(model, data_loader):
    model.eval()
    correct_pred, num_examples = 0, 0
    for i, (features, targets) in enumerate(data_loader):

        features = features.to('cuda')
        targets = targets.to('cuda')

        logits = model(features)
        _, predicted_labels = torch.max(logits, 1)
        num_examples += targets.size(0)
        correct_pred += (predicted_labels == targets).sum()
    return correct_pred.float()/num_examples * 100

# Custom loss Function
def compute_epoch_loss(model, data_loader):
    model.eval()
    curr_loss, num_examples = 0., 0
    with torch.no_grad():
        for features, targets in data_loader:
            features = features.to('cuda')
            targets = targets.to('cuda')
            logits = model(features)
            loss = torch.nn.functional.cross_entropy(logits, targets, reduction='sum')
            num_examples += targets.size(0)
            curr_loss += loss

        curr_loss = curr_loss / num_examples
        return curr_loss

In [8]:
# define loss function (criterion) and optimizer
criterion = nn.CrossEntropyLoss()
# optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
optimizer = torch.optim.Adagrad(model.parameters(), lr=0.001)

In [9]:
num_epochs = 400

start_time = time.time()
for epoch in range(num_epochs):

    model.train()
    for batch_idx, (features, targets) in enumerate(train_loader):

        features = features.to('cuda')
        targets = targets.to('cuda')

        ## forward + backprop + loss
        logits = model(features)
        cost = criterion(logits, targets)
        optimizer.zero_grad()

        cost.backward()

        ### UPDATE MODEL PARAMETERS
        optimizer.step()

        ### LOGGING
        if not batch_idx % 50:
            print ('Epoch: %03d/%03d | Batch %04d/%04d | Cost: %.4f'
                   %(epoch+1, num_epochs, batch_idx,
                     len(train_loader), cost))

    # model.eval()
    # with torch.set_grad_enabled(False): # save memory during inference
    #     print('Epoch: %03d/%03d | Train: %.3f%% | Loss: %.3f' % (
    #           epoch+1, num_epochs,
    #           compute_accuracy(model, train_loader),
    #           compute_epoch_loss(model, train_loader)))


    print('Time elapsed: %.2f min' % ((time.time() - start_time)/60))

print('Total Training Time: %.2f min' % ((time.time() - start_time)/60))

Epoch: 001/400 | Batch 0000/0144 | Cost: 4.6016
Epoch: 001/400 | Batch 0050/0144 | Cost: 2.3183


Epoch: 001/400 | Batch 0100/0144 | Cost: 2.3005
Time elapsed: 0.10 min
Epoch: 002/400 | Batch 0000/0144 | Cost: 2.2525
Epoch: 002/400 | Batch 0050/0144 | Cost: 2.2947
Epoch: 002/400 | Batch 0100/0144 | Cost: 2.2621
Time elapsed: 0.19 min
Epoch: 003/400 | Batch 0000/0144 | Cost: 2.3071
Epoch: 003/400 | Batch 0050/0144 | Cost: 2.2645
Epoch: 003/400 | Batch 0100/0144 | Cost: 2.2450
Time elapsed: 0.29 min
Epoch: 004/400 | Batch 0000/0144 | Cost: 2.9412
Epoch: 004/400 | Batch 0050/0144 | Cost: 2.2517
Epoch: 004/400 | Batch 0100/0144 | Cost: 2.2718
Time elapsed: 0.38 min
Epoch: 005/400 | Batch 0000/0144 | Cost: 2.2381
Epoch: 005/400 | Batch 0050/0144 | Cost: 2.1961
Epoch: 005/400 | Batch 0100/0144 | Cost: 2.2301
Time elapsed: 0.47 min
Epoch: 006/400 | Batch 0000/0144 | Cost: 2.1191
Epoch: 006/400 | Batch 0050/0144 | Cost: 2.2123
Epoch: 006/400 | Batch 0100/0144 | Cost: 2.1341
Time elapsed: 0.56 min
Epoch: 007/400 | Batch 0000/0144 | Cost: 2.1462
Epoch: 007/400 | Batch 0050/0144 | Cost: 2.135

In [10]:
def test(dataloader, model, loss_fn):
    device = (
        "cuda:0" if torch.cuda.is_available()
        else "mps" if torch.backends.mps.is_available()
        else "cpu"
    )
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.to(device)
    model.eval()
    test_loss, correct = 0, 0
    pbar = tqdm(total=num_batches)
    with torch.no_grad():
        iteration = 0
        for X, y in dataloader:
            gc.collect()
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
            # print(f"{iteration}/{num_batches}, {correct/num_batches}")
            iteration += 1
            pbar.update(1)
            del X, y
            gc.collect()
            torch.cuda.empty_cache()
    del model
    gc.collect()
    torch.cuda.empty_cache()
    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")
    pbar.close()
    return correct*100

In [13]:
print(test(val_loader, model, nn.CrossEntropyLoss()))

100%|██████████| 51/51 [00:10<00:00,  4.69it/s]

Test Error: 
 Accuracy: 92.7%, Avg loss: 0.347010 

92.72433927473878





In [12]:
torch.save(model.state_dict(), f'../PruningAlgo/models/vgg19_svhn_base_model')
# torch.save(model,f'./cifar10_vgg16_model_0')

In [15]:
model.state_dict().keys()

odict_keys(['features.0.weight', 'features.0.bias', 'features.2.weight', 'features.2.bias', 'features.5.weight', 'features.5.bias', 'features.7.weight', 'features.7.bias', 'features.10.weight', 'features.10.bias', 'features.12.weight', 'features.12.bias', 'features.14.weight', 'features.14.bias', 'features.16.weight', 'features.16.bias', 'features.19.weight', 'features.19.bias', 'features.21.weight', 'features.21.bias', 'features.23.weight', 'features.23.bias', 'features.25.weight', 'features.25.bias', 'features.28.weight', 'features.28.bias', 'features.30.weight', 'features.30.bias', 'features.32.weight', 'features.32.bias', 'features.34.weight', 'features.34.bias', 'classifier.1.weight', 'classifier.1.bias', 'classifier.4.weight', 'classifier.4.bias', 'classifier.6.weight', 'classifier.6.bias'])