In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import os
os.chdir('/content/drive/My Drive/ColabNotebooks/10605/project')

In [3]:

#imports
import torch
import numpy as np
import pandas as pd
import torchvision.models as models
from torch.utils.tensorboard import SummaryWriter
import seaborn as sns
import matplotlib.pyplot as plt
from torch.utils.data import TensorDataset
import time
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import datetime
import copy
import os
import pickle
from tqdm import tqdm



In [4]:
#mean and std of cifar100 dataset
CIFAR100_TRAIN_MEAN = (0.5070751592371323, 0.48654887331495095, 0.4409178433670343)
CIFAR100_TRAIN_STD = (0.2673342858792401, 0.2564384629170883, 0.27615047132568404)

BATCH_SIZE = 16
WARM = False # typically used in new training

DATE_FORMAT = '%A_%d_%B_%Y_%Hh_%Mm_%Ss'
#time of we run the script
TIME_NOW = datetime.datetime.now().strftime(DATE_FORMAT)
#data settings
subset = False #for local running
k = 10 #number of samples needed to each class in validation set, because we need to split train and validation

#model settings
USE_TENSORBOARD = False
if USE_TENSORBOARD:
    foo = SummaryWriter()
use_gpu = True

#lr scheduler
BASE_LR = 0.001
EPOCH_DECAY = 4
DECAY_WEIGHT = 0.5

DEVICE = 'cpu'
if use_gpu and torch.cuda.is_available():
    DEVICE = 'cuda'

In [5]:
#read files
def unpickle(file):
    
    with open(file, 'rb') as fo:
        dictionary = pickle.load(fo, encoding='bytes')
    return dictionary

In [6]:
def compute_mean_std(cifar100_dataset):
    """compute the mean and std of cifar100 dataset
    Args:
        cifar100_training_dataset or cifar100_test_dataset
        witch derived from class torch.utils.data

    Returns:
        a tuple contains mean, std value of entire dataset
    """

    data_r = numpy.dstack([cifar100_dataset[i][1][:, :, 0] for i in range(len(cifar100_dataset))])
    data_g = numpy.dstack([cifar100_dataset[i][1][:, :, 1] for i in range(len(cifar100_dataset))])
    data_b = numpy.dstack([cifar100_dataset[i][1][:, :, 2] for i in range(len(cifar100_dataset))])
    mean = numpy.mean(data_r), numpy.mean(data_g), numpy.mean(data_b)
    std = numpy.std(data_r), numpy.std(data_g), numpy.std(data_b)

    return mean, std

In [7]:
#data processing
def reshape_images(data_dict):
    reshaped = data_dict.numpy().reshape(len(data_dict), 1024, 3, order = 'F').reshape(len(data_dict), 32,32,3)
    reshaped_processed = torch.from_numpy(reshaped).float().permute(0, 3, 1, 2)
    return reshaped_processed

## Dataloader


In [8]:
def get_training_val_dataloader(mean, std, batch_size=16, num_workers=2, shuffle=True):
    """ return training dataloader
    Args:
        mean: mean of cifar100 training dataset
        std: std of cifar100 training dataset
        path: path to cifar100 training python dataset
        batch_size: dataloader batchsize
        num_workers: dataloader num_works
        shuffle: whether to shuffle
    Returns: train_data_loader:torch dataloader object
    """

    transform_train = transforms.Compose([
        #transforms.ToPILImage(),
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(15),
        transforms.ToTensor(),
        transforms.Normalize(mean, std)
    ])

    cifar100_training = torchvision.datasets.CIFAR100(root='', train=True, download=True, transform=transform_train)
    
    try:
        random_index = pickle.load(open("random_index.pkl", 'rb'))
    except:
        random_index = np.random.permutation([i for i in range(50000)])
        pickle.dump(random_index, open("random_index.pkl", 'wb'))
    
    train_index = random_index[:45000]
    validation_index = random_index[45000:]
    train_dataset = torch.utils.data.Subset(cifar100_training, train_index)
    validation_dataset = torch.utils.data.Subset(cifar100_training, validation_index)
    
    cifar100_training_loader = DataLoader(
        train_dataset, shuffle=shuffle, num_workers=num_workers, batch_size=batch_size)
    
    cifar100_validation_loader = DataLoader(
        validation_dataset, shuffle=shuffle, num_workers=num_workers, batch_size=batch_size)

    return cifar100_training_loader, cifar100_validation_loader

In [9]:
def get_training_dataloader(mean, std, batch_size=16, num_workers=2, shuffle=True):
    """ return training dataloader
    Args:
        mean: mean of cifar100 training dataset
        std: std of cifar100 training dataset
        path: path to cifar100 training python dataset
        batch_size: dataloader batchsize
        num_workers: dataloader num_works
        shuffle: whether to shuffle
    Returns: train_data_loader:torch dataloader object
    """

    transform_train = transforms.Compose([
        #transforms.ToPILImage(),
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(15),
        transforms.ToTensor(),
        transforms.Normalize(mean, std),
    ])

    cifar100_training = torchvision.datasets.CIFAR100(root='cifar-100-python', train=True, download=True, transform=transform_train)
    
    cifar100_training_loader = DataLoader(
        cifar100_training, shuffle=shuffle, num_workers=num_workers, batch_size=batch_size)

    return cifar100_training_loader

In [10]:
def get_test_dataloader(mean, std, batch_size=16, num_workers=2, shuffle=True):
    """ return training dataloader
    Args:
        mean: mean of cifar100 test dataset
        std: std of cifar100 test dataset
        path: path to cifar100 test python dataset
        batch_size: dataloader batchsize
        num_workers: dataloader num_works
        shuffle: whether to shuffle
    Returns: cifar100_test_loader:torch dataloader object
    """

    transform_test = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean, std),
    ])

    cifar100_test = torchvision.datasets.CIFAR100(root='cifar-100-python', train=False, download=True, transform=transform_test)

    cifar100_test_loader = DataLoader(
        cifar100_test, shuffle=shuffle, num_workers=num_workers, batch_size=batch_size)

    return cifar100_test_loader

## train and evaluate

In [11]:
def train(model, epoch, train_dataloader, optimizer, loss_function, callbacks = None):

    start = time.time()
    model.to(DEVICE)
    model.train()
    # keep track of the zero mask
    if callbacks != None:
        callbacks.get_zeros_mask(model)
    
    for batch_index, (images, labels) in enumerate(train_dataloader):
        '''
        if epoch <= WARM:
            warmup_scheduler.step()
        '''
            
        if use_gpu:
            labels = labels.to(DEVICE)
            images = images.to(DEVICE)

        optimizer.zero_grad()
        outputs = model(images)
        loss = loss_function(outputs, labels)
        loss.backward()
        optimizer.step()
        if callbacks != None:
            callbacks.apply_zeros_mask(model)
            
        n_iter = (epoch - 1) * len(train_dataloader) + batch_index + 1

        last_layer = list(model.children())[-1]
        for name, para in last_layer.named_parameters():
            if 'weight' in name:
                writer.add_scalar('LastLayerGradients/grad_norm2_weights', para.grad.norm(), n_iter)
            if 'bias' in name:
                writer.add_scalar('LastLayerGradients/grad_norm2_bias', para.grad.norm(), n_iter)

        print('Training Epoch: {epoch} [{trained_samples}/{total_samples}]\tLoss: {:0.4f}\tLR: {:0.6f}'.format(
            loss.item(),
            optimizer.param_groups[0]['lr'],
            epoch=epoch,
            trained_samples=batch_index * BATCH_SIZE + len(images),
            total_samples=len(train_dataloader.dataset)
        ))

        #update training loss for each iteration
        writer.add_scalar('Train/loss', loss.item(), n_iter)

    for name, param in model.named_parameters():
        layer, attr = os.path.splitext(name)
        attr = attr[1:]
        writer.add_histogram("{}/{}".format(layer, attr), param, epoch)

    finish = time.time()

    print('epoch {} training time consumed: {:.2f}s'.format(epoch, finish - start))

In [12]:
def evaluate_model(model, val_dataloader):
    # for validation set or testing set
    start = time.time()
    model.to(DEVICE)
    model.eval()
    
    total_preds = 0
    total_corrects = 0
    
    for batch_index, (images, labels) in enumerate(val_dataloader):
        if use_gpu:
            # labels = labels.to(DEVICE)
            images = images.to(DEVICE)
            
        outputs = model(images)
        _, preds = torch.max(outputs.data, 1)
        
        
        total_preds += len(labels)
        total_corrects += np.sum(preds.cpu().numpy() == labels.numpy())
    
    # print("Accuracy is {:.5f}".format(total_corrects/total_preds))
    
    return total_corrects/total_preds

In [13]:
'''
cifar100_training_loader, cifar100_validation_loader = get_training_dataloader(
    CIFAR100_TRAIN_MEAN,
    CIFAR100_TRAIN_STD,
    num_workers = 4,
    batch_size = BATCH_SIZE,
    shuffle = True
)
'''
cifar100_training_loader = get_training_dataloader(
    CIFAR100_TRAIN_MEAN,
    CIFAR100_TRAIN_STD,
    num_workers = 4,
    batch_size = BATCH_SIZE,
    shuffle = True
)


cifar100_test_loader = get_test_dataloader(
    CIFAR100_TRAIN_MEAN,
    CIFAR100_TRAIN_STD,
    num_workers = 4,
    batch_size = BATCH_SIZE,
    shuffle = True
)

Files already downloaded and verified
Files already downloaded and verified


## model 

In [14]:
import torch
import torch.nn as nn

cfg = {
    'A' : [64,     'M', 128,      'M', 256, 256,           'M', 512, 512,           'M', 512, 512,           'M'],
    'B' : [64, 64, 'M', 128, 128, 'M', 256, 256,           'M', 512, 512,           'M', 512, 512,           'M'],
    'D' : [64, 64, 'M', 128, 128, 'M', 256, 256, 256,      'M', 512, 512, 512,      'M', 512, 512, 512,      'M'],
    'E' : [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M']
}

class VGG(nn.Module):

    def __init__(self, features, num_class=100):
        super().__init__()
        self.features = features

        self.classifier = nn.Sequential(
            nn.Linear(512, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, num_class)
        )

    def forward(self, x):
        output = self.features(x)
        output = output.view(output.size()[0], -1)
        output = self.classifier(output)

        return output

def make_layers(cfg, batch_norm=False):
    layers = []

    input_channel = 3
    for l in cfg:
        if l == 'M':
            layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
            continue

        layers += [nn.Conv2d(input_channel, l, kernel_size=3, padding=1)]

        if batch_norm:
            layers += [nn.BatchNorm2d(l)]

        layers += [nn.ReLU(inplace=True)]
        input_channel = l

    return nn.Sequential(*layers)

def vgg11_bn():
    return VGG(make_layers(cfg['A'], batch_norm=True))

def vgg13_bn():
    return VGG(make_layers(cfg['B'], batch_norm=True))

def vgg16_bn():
    return VGG(make_layers(cfg['D'], batch_norm=True))

def vgg19_bn():
    return VGG(make_layers(cfg['E'], batch_norm=True))

In [15]:
def load_vgg(path):
    model = vgg16_bn()
    weights = torch.load(path)
    model.load_state_dict(weights)
    model.to(DEVICE)
    
    return model

In [16]:
model = load_vgg("vgg16-197-best.pth")

In [17]:

def model_report(model, dataloader):
    # local final score on validation data
    accuracy = evaluate_model(model, dataloader)
    print("accuracy is ", accuracy)
    return accuracy

In [18]:
test_acc = model_report(model, cifar100_test_loader)
print(test_acc)

accuracy is  0.7201
0.7201


In [19]:
print(model)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU(inplace=True)
    (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (7): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (9): ReLU(inplace=True)
    (10): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (12): ReLU(inplace=True)
    (13): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (14): Conv2d(128, 256

In [None]:
model.state_dict()

## Model SVD Compression

In [32]:
import numpy as np

idx = [0, 3, 7, 10, 14, 17, 20, 24, 27, 30, 34, 37, 40]
# idx = [40]
conv_layers = ['features.'+str(i)+'.weight' for i in idx]
new_model = vgg16_bn()   
new_state_dict = model.state_dict()


In [33]:

# for sval_nums in range(5, 10):

for i in range(1,4):
    print("===========================================")
    retain = 0
    original = 0
    new_model = vgg16_bn()   
    new_state_dict = model.state_dict()
    for conv in conv_layers:
        conv1_weight = model.state_dict()[conv].cpu().numpy()
        # print(conv1_weight.shape)
        sval_nums = int(min(conv1_weight.shape[0], conv1_weight.shape[1]*conv1_weight.shape[2]*conv1_weight.shape[3])/i)
        original += conv1_weight.shape[0]*conv1_weight.shape[1]*conv1_weight.shape[2]*conv1_weight.shape[3]
        U,Sigma,VT = np.linalg.svd(np.reshape(conv1_weight,(conv1_weight.shape[0],\
                                      conv1_weight.shape[1]*conv1_weight.shape[2]*conv1_weight.shape[3])))
        con_restruct1 = (U[:,0:sval_nums]).dot(np.diag(Sigma[0:sval_nums])).dot(VT[0:sval_nums,:])
        retain += sval_nums*conv1_weight.shape[0]+ \
        sval_nums**2 + sval_nums*conv1_weight.shape[2]*conv1_weight.shape[3]*conv1_weight.shape[1]
        conv1_weight = np.reshape(con_restruct1,(conv1_weight.shape[0],conv1_weight.shape[1],\
                                      conv1_weight.shape[2],conv1_weight.shape[3]))
        new_state_dict[conv] = torch.from_numpy(conv1_weight)
      
    new_model.load_state_dict(new_state_dict)
    model.to(DEVICE)
    test_acc = evaluate_model(new_model, cifar100_test_loader)
    print("rank percentage:", 1/i)
    print("percentage:", retain/original)
    print("Accuracy is", test_acc)

rank percentage: 1.0
percentage: 1.2456842965660362
Accuracy is 0.7201
rank percentage: 0.5
percentage: 0.5921276174565262
Accuracy is 0.6617
rank percentage: 0.3333333333333333
percentage: 0.3862768706683895
Accuracy is 0.4288


In [31]:

# only compress large conv
idx = [17, 20, 24, 27, 30, 34, 37, 40]
conv_layers = ['features.'+str(i)+'.weight' for i in idx]

for i in range(3,5):
    print("===========================================")
    retain = 0
    original = 0
    new_model = vgg16_bn()   
    new_state_dict = model.state_dict()
    for conv in conv_layers:
        conv1_weight = model.state_dict()[conv].cpu().numpy()
        # print(conv1_weight.shape)
        sval_nums = int(min(conv1_weight.shape[0], conv1_weight.shape[1]*conv1_weight.shape[2]*conv1_weight.shape[3])/i)
        original += conv1_weight.shape[0]*conv1_weight.shape[1]*conv1_weight.shape[2]*conv1_weight.shape[3]
        U,Sigma,VT = np.linalg.svd(np.reshape(conv1_weight,(conv1_weight.shape[0],\
                                      conv1_weight.shape[1]*conv1_weight.shape[2]*conv1_weight.shape[3])))
        con_restruct1 = (U[:,0:sval_nums]).dot(np.diag(Sigma[0:sval_nums])).dot(VT[0:sval_nums,:])
        retain += sval_nums*conv1_weight.shape[0]+ \
        sval_nums**2 + sval_nums*conv1_weight.shape[2]*conv1_weight.shape[3]*conv1_weight.shape[1]
        conv1_weight = np.reshape(con_restruct1,(conv1_weight.shape[0],conv1_weight.shape[1],\
                                      conv1_weight.shape[2],conv1_weight.shape[3]))
        new_state_dict[conv] = torch.from_numpy(conv1_weight)
      
    new_model.load_state_dict(new_state_dict)
    new_model.to(DEVICE)
    test_acc = evaluate_model(new_model, cifar100_test_loader)
    print("rank percentage:", 1/i)
    print("percentage:", retain/original)
    print("Accuracy is", test_acc)

rank percentage: 0.3333333333333333
percentage: 0.38526817604347513
Accuracy is 0.6523
rank percentage: 0.25
percentage: 0.28761574074074076
Accuracy is 0.5463
