In [0]:
import torch
from torch import nn
import torch.nn.utils.prune as prune
import torch.nn.functional as F

from __future__ import print_function
from __future__ import division
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy

#vgg11 = models.vgg11(pretrained=True)

from google.colab import drive
drive.mount('/content/drive/')

data_dir = "/content/drive/My Drive/Research/hymenoptera_data/hymenoptera_data"

# Number of classes in the dataset
num_classes = 2

# Batch size for training (change depending on how much memory you have)
batch_size = 8

# Number of epochs to train for 
num_epochs = 5

# Flag for feature extracting. When False, we finetune the whole model, 
#   when True we only update the reshaped layer params
feature_extract = False

input_size = 224

def train_model(model, dataloaders, criterion, optimizer, num_epochs=25):
    since = time.time()

    val_acc_history = []
    
    # best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    # Get model outputs and calculate loss
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)

                    _, preds = torch.max(outputs, 1)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
            
            prune_buffers(model)

            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))

            # # deep copy the model
            # if phase == 'val' and epoch_acc > best_acc:
            #     best_acc = epoch_acc
            #     best_model_wts = copy.deepcopy(model.state_dict())
            if phase == 'val':
                val_acc_history.append(epoch_acc)
        # if epoch > 3:
        #     print("pruned")
        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    # model.load_state_dict(best_model_wts)
    return model, val_acc_history

data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(input_size),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize(input_size),
        transforms.CenterCrop(input_size),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

print("Initializing Datasets and Dataloaders...")

# Create training and validation datasets
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x]) for x in ['train', 'val']}
# Create training and validation dataloaders
dataloaders_dict = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=batch_size, shuffle=True, num_workers=4) for x in ['train', 'val']}

# Detect if we have a GPU available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
resultDevice = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")


Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive/
Initializing Datasets and Dataloaders...


In [0]:
import  torch
import  torch . nn  as  nn
from  torchvision . models  import  vgg11_bn
from  collections  import  namedtuple
#from StrngIO import StringIO
#import sys

def getLayer(model, child = 'classifier', layer_tp = nn.Linear):
    """Returns indexs to all the modules of type layer_tp which have name 'child' in model's .chilren().
    Layer starts at 1 since .modules() returns ALL modules in model, the first elment of which is it's model.
    return  index against .children()[child], index against .modules()"""
    layer = 1
    layers = []
    l = float('inf')
    assert child in model._modules.keys(), f"{child} is not a direct child of model, must be one of: {str(model._modules.keys())}"
    for c in model.named_children():
        if c[0] == child:
            l = min(l, layer)
            for m in c[1].modules():
                if isinstance(m, layer_tp):
                    layers += [layer]
                layer += 1
        else:
        #This won't get all the sub-sub modules of network, but since those wouldn't be indexed before the target layer in nn.modules don't care
        #len(module_iter(c[1])) to get all sub-sub modules 
            layer += len(list(c[1].modules()))
    return [i-l for i in layers], layers

# def redirectPrint(func):
#     "funciton to return prinout as a string"
#     pass

def module_iter(model):
    "Get's the 'bottom most' modules of model"
    if not list(model.children()):#bottom node 
        return [model]
    else:
        return [mc for m in model.children() for mc in module_iter(m)]


class  Vgg11bn ( torch.nn.Module ):
    """a Vgg11 model with batch normalization and instead of 1000 predictive classes,
    only has 2"""

    def  __init__ ( self, num_classes = 2, only_tune_classifier = False, numResults = batch_size ):      
        super(). __init__ ()
        vgg_pre = models.vgg11_bn(pretrained=True)
        #self = models.vgg11_bn(pretrained=True, {'num_classes':num_classes})
        self.features  = copy.deepcopy(vgg_pre.features)
        self.avgpool = copy.deepcopy(vgg_pre.avgpool)              
        self.classifier = copy.deepcopy(vgg_pre.classifier)
        num_ftrs = vgg_pre.classifier[6].in_features
        self.classifier[6] = nn.Linear(num_ftrs,num_classes)#changes last linear layer
        #only_tune_class when True we only update the reshaped layer params; when false we finetune the whole model, 
        self.only_tune_classifier = only_tune_classifier
        if only_tune_classifier:
            for param in self.features.parameters():
                param.requires_grad = False
        self.numResults = numResults

        self.featurePruneIx, featurePruneIx = \
                        getLayer(self, child='features', layer_tp = nn.Conv2d)
        laterFeat = lambda i: i > 5
        self.featurePruneIx = list(filter(laterFeat, self.featurePruneIx))
        featurePruneIx = list(filter(laterFeat, featurePruneIx))
        self.classifierPruneIx, classifierPruneIx = \
                        getLayer(self, child='classifier', layer_tp = nn.Linear)
        self.pruneLayersIx = featurePruneIx + classifierPruneIx
        self.pruneNames, self.pruneLayers = zip(*[list(self.named_modules())[i]
                                            for i in self.pruneLayersIx])
        self.pruneNames = [i.replace(".", "") for i in self.pruneNames]

        zero = torch.zeros(3,224,224).view(-1, 3, 224, 224)
        self.ret_pruneLayers = True
        self.training = False
        _, prune_out = self.forward(zero)
        self.ret_pruneLayers = False
        self.currently_pruning = {}
        for ix, (name, tnsr) in enumerate(prune_out.items()):
            buf_name = name.replace(".", "") + "buffer"
            shape = [numResults] + list(tnsr.shape)
            module = self.pruneLayers[ix]
            module.register_buffer(buf_name, torch.zeros(*shape))
            layer_ix = self.pruneLayersIx[ix]
            buffer = eval(f"module.{buf_name}")
            self.currently_pruning[buf_name] = (True, layer_ix, buffer)
        self.bufferCount = 0 #incremented each time buffer modified 
        # print(self.buffers, self.currently_pruning)

    def  forward ( self , x):
        "propogates forward, with sideffect of storing the 'prune layers'"
        results  = []
        for  i , model  in  enumerate ( self. features ):
            x  =  model ( x )
            if  i  in self.featurePruneIx:
                results.append ( x )
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        for  ii , model  in  enumerate ( self . classifier ):
            x  =  model ( x )
            if  ii  in self.classifierPruneIx:
                results.append ( x )
        out = {n: r for n,r in zip(self.pruneNames, results)}
        if self.training:
             for buf_name, (is_pruning, layer_ix, buffer) in self.currently_pruning.items():
                if is_pruning:
                    name = buf_name.replace("buffer", "")
                    shape =  list(out[name].shape)
                    newShape = [shape[0]] + [1] + shape[1:]
                    # print(shape, buffer.shape, name)
                    buffer[:shape[0]] = out[name].view(*newShape)
                    #since all buffer sizes passsed in at once; 
                    #but the batch size might not evenly divide the data

                    #grib to allow larger buffers w/ intermediat pruning
             self.bufferCount += 1
             self.bufferCount %= self.numResults
        if self.ret_pruneLayers:
            return  x, out
        else:
            return x

    def critOpt(self):
        device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
        self.to(device)
        criterion = nn.CrossEntropyLoss()
        optzer = optim.SGD(self.parameters(), lr=0.001, momentum=0.9)
        return criterion, optzer

class repeatedLayerOut(prune.BasePruningMethod):
    """Prune every entry that's approximitly repeated;
    if outputs are within epison across buffer at least propEqual% of the time
    
    #grib, instead of selecting epsilon region around median shift to select sort,
    get range [propEqual/2, 1-propEqual/2]? But could be clustered eg. [1,1,1,1, 5, 10] with prop = 0.3
    """
    PRUNING_TYPE = 'structured'
    #device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    epsilon = 0.08 #same for all layers
    propEqual = 0.7#must be greater than 0.5

    def __init__(self, buffer):
        self.buffer = buffer
        print(buffer)

    def compute_mask(self, tnsr, default_mask):#idx, default_mask, prevResults):
        "Iteratively finds the outputs that were close in previous batches and prunes them"
        buffer = self.buffer
        print(buffer)
        # buffer = module.buffers()[0]
        num_results = buffer.shape[0]
        med_value, _ = torch.median(buffer, dim = 0)
        buf_list = torch.split(buffer, numResults, dim = 0)#0 dim is where were stacked along
        isSimilar = torch.sum(
                        torch.stack([torch.ge(a, med_value-epsilon/2) & torch.ge(med_value+epsilon/2,a)
                                        for a in buf_list]),
                            0)
        mask = isSimilar >= propEqual*num_results
        return mask 

def prune_buffers(model):
    "Calls pruning wrapper based on all buffers that have been accumulated"
    param_name = 'weight'
    for name, (is_pruning, layer_ix, buffer) in model.currently_pruning.items():
        if is_pruning:
            module = list(model.modules())[layer_ix]
            #buffer = model.__get_attr__(name)#model.buffers()
            # print(buffer)
            # buffer = eval(f"module.{buf_name}")
            repeatedLayerOut.apply(module, 
                                   param_name,
                                   buffer)#,
                                #    buffer = buffer,
                                #    batch_size = model.numResults)

# Let's try it out!
model = Vgg11bn()

criterion, optimizer_ft = model.critOpt()

train_model(model, dataloaders_dict, criterion, optimizer_ft, num_epochs=1)
for ix, c in enumerate(model.modules()):
    if ix in model.pruneLayersIx:
        try:
            # print(vars(c).keys())
            # print(c._parameters['weight'])
            w = [i for i in c.named_parameters() if i[0] =='weight'][0]
            print("% Pruned", ix, c, 100. * float(torch.sum(w[1]== 0))
                    / float(w[1].nelement())) 
        except Exception as e:
            print(e)
print("################")



Epoch 0/0
----------
[8, 256, 56, 56] torch.Size([8, 1, 256, 56, 56]) features4
[8, 256, 56, 56] torch.Size([8, 1, 256, 56, 56]) features8
[8, 512, 28, 28] torch.Size([8, 1, 512, 28, 28]) features11
[8, 512, 28, 28] torch.Size([8, 1, 512, 28, 28]) features15
[8, 512, 14, 14] torch.Size([8, 1, 512, 14, 14]) features18
[8, 512, 14, 14] torch.Size([8, 1, 512, 14, 14]) features22
[8, 4096] torch.Size([8, 1, 4096]) features25
[8, 4096] torch.Size([8, 1, 4096]) classifier0
[8, 256, 56, 56] torch.Size([8, 1, 256, 56, 56]) features4
[8, 256, 56, 56] torch.Size([8, 1, 256, 56, 56]) features8
[8, 512, 28, 28] torch.Size([8, 1, 512, 28, 28]) features11
[8, 512, 28, 28] torch.Size([8, 1, 512, 28, 28]) features15
[8, 512, 14, 14] torch.Size([8, 1, 512, 14, 14]) features18
[8, 512, 14, 14] torch.Size([8, 1, 512, 14, 14]) features22
[8, 4096] torch.Size([8, 1, 4096]) features25
[8, 4096] torch.Size([8, 1, 4096]) classifier0
[8, 256, 56, 56] torch.Size([8, 1, 256, 56, 56]) features4
[8, 256, 56, 56] t

NameError: ignored

In [0]:
######################################################################
# Now, to apply this to a parameter in an ``nn.Module``, you should
# also provide a simple function that instantiates the method and
# applies it.
def layerOut_unstructured(model, idx, name):
    """Prunes tensor corresponding to parameter called `name` in `module`
    by removing every other entry in the tensors.
    Modifies module in place (and also return the modified module) 
    by:
    1) adding a named buffer called `name+'_mask'` corresponding to the 
    binary mask applied to the parameter `name` by the pruning method.
    The parameter `name` is replaced by its pruned version, while the 
    original (unpruned) parameter is stored in a new parameter named 
    `name+'_orig'`.
    Args:
        module (nn.Module): module containing the tensor to prune
        name (string): parameter name within `module` on which pruning
                will act.
    Returns:
        module (nn.Module): modified (i.e. pruned) version of the input
            module
    
    Examples:
        >>> m = nn.Linear(3, 4)
        >>> foobar_unstructured(m, name='bias')
    """
    assert(idx in model.pruneLayersIx)
    module = list(model.modules())[idx]
    module_name = 
    repeatedLayerOut.apply(module, name, {'buffer': model.buffers})
    return module   

######################################################################

# Models to choose from [resnet, alexnet, vgg, squeezenet, densenet, inception]
model_name = "vgg11"

def set_parameter_requires_grad(model, feature_extracting):
    if feature_extracting:
        for param in model.parameters():
            param.requires_grad = False


def initialize_model(num_classes, feature_extract, use_pretrained=True):
    # Initialize these variables which will be set in this if statement. Each of these
    #   variables is model specific.
    """ VGG11_bn
    """
    model_ft = None
    #input_size = 0#?
    model_ft = models.vgg11_bn(pretrained=use_pretrained)
    set_parameter_requires_grad(model_ft, feature_extract)
    num_ftrs = model_ft.classifier[6].in_features
    model_ft.classifier[6] = nn.Linear(num_ftrs,num_classes)
    input_size = 224
    return model_ft, input_size

model_ft, input_size = initialize_model(num_classes, feature_extract, use_pretrained=True)

# Send the model to GPU
model_ft = model_ft.to(device)

# Gather the parameters to be optimized/updated in this run. If we are
#  finetuning we will be updating all parameters. However, if we are 
#  doing feature extract method, we will only update the parameters
#  that we have just initialized, i.e. the parameters with requires_grad
#  is True.
params_to_update = model_ft.parameters()
print("Params to learn:")
if feature_extract:
    params_to_update = []
    for name,param in model_ft.named_parameters():
        if param.requires_grad == True:
            params_to_update.append(param)
else:
    for name,param in model_ft.named_parameters():
        if param.requires_grad == True:

# Observe that all parameters are being optimized
optimizer_ft = optim.SGD(params_to_update, lr=0.001, momentum=0.9)

# Setup the loss fxn
criterion = nn.CrossEntropyLoss()

# Train and evaluate
#model_ft, hist = train_model(model_ft, dataloaders_dict, criterion, optimizer_ft, num_epochs=num_epochs)

Params to learn:
	 features.0.weight
	 features.0.bias
	 features.1.weight
	 features.1.bias
	 features.4.weight
	 features.4.bias
	 features.5.weight
	 features.5.bias
	 features.8.weight
	 features.8.bias
	 features.9.weight
	 features.9.bias
	 features.11.weight
	 features.11.bias
	 features.12.weight
	 features.12.bias
	 features.15.weight
	 features.15.bias
	 features.16.weight
	 features.16.bias
	 features.18.weight
	 features.18.bias
	 features.19.weight
	 features.19.bias
	 features.22.weight
	 features.22.bias
	 features.23.weight
	 features.23.bias
	 features.25.weight
	 features.25.bias
	 features.26.weight
	 features.26.bias
	 classifier.0.weight
	 classifier.0.bias
	 classifier.3.weight
	 classifier.3.bias
	 classifier.6.weight
	 classifier.6.bias


In [0]:
# miniMod = torch.nn.ModuleList(model_ft.features[:5])
# def miniForward(x):
#     for module in miniMod.children():
#         x = module(x)
#     return x
epsilon = 0.01
propEqual = 0.9
cpuDevice = torch.device("cpu")
model.to(cpuDevice)
a = torch.rand(1,224,224)
repIn = [torch.cat((a,torch.rand(2,224,224)), dim=0).view(-1, 3, 224, 224) for i in range(batch_size)]

out = torch.stack([model(i)[1] for i in repIn], dim = 0)

med_value, indx= torch.median(out, dim = 0)
isSimilar = torch.sum(
                torch.stack([torch.ge(a, med_value-epsilon/2) & torch.ge(med_value+epsilon/2,a)
                                 for a in out]),
                      0)
isSimilar = isSimilar > propEqual*batch_size
isSimilar

AttributeError: ignored

In [0]:
from torchsummary import summary
class  myVgg11bn ( torch.nn.Module ):
    def  __init__ ( self ):
        #super( Vgg11_bn , self ). __init__ ()
        #features  =  list ( vgg11_bn ( pretrained = True ). features )
        super(myVgg11bn, self).__init__()
        mod = models.vgg11_bn(pretrained=True).children()
        self.net  =  nn.ModuleList(mod)
        mod2 = models.vgg11_bn(pretrained=True).children()
        self.net2  =  nn.ModuleList(mod2)
        #[ix for ix, i in enumerate(model.modules()) if isinstance(i, nn.Conv2d)]
    
    def  forward ( self , x ):
      return self.net2(self.net(x))

class  myVgg11bn3 ( myVgg11bn):
    def  __init__ ( self ):
        #super( Vgg11_bn , self ). __init__ ()
        #features  =  list ( vgg11_bn ( pretrained = True ). features )
        super(myVgg11bn, self).__init__()
        my = myVgg11bn()
        self.out  =  nn.ModuleList(my.modules())
        #[ix for ix, i in enumerate(model.modules()) if isinstance(i, nn.Conv2d)]
    
    def  forward ( self , x ):
      return self.net2(self.net(x))

# me = myVgg11bn3()
# [list(me.modules())[i] for i in getLayer(me, child='out')]

# print(model.pruneLayersIx)
# model.modules
# getLayers(model.children()['classifier'], child=None)
result, _ = summary_string(model, (3,224,224), -1, device, dtype = None)
[result[i] for i  in model.pruneLayersIx]

NameError: ignored

In [0]:
#model_ft = models.vgg11_bn(pretrained=True)
# model_ft.children
# pruneIx = Vgg11bn.getLayer(model_ft, child='features', layer_tp = nn.Conv2d) \
#                           +  Vgg11bn.getLayer(model_ft, child='classifier', layer_tp = nn.Linear)
# list(model.modules())[model.pruneLayers[-1]].weights
# print(summary(model, (3,224, 224)))
# summary(model_ft,(3,224, 224))
# for  ii , mod  in  enumerate ( model. features ):
#     if  ii+1  in model.pruneLayers:
#         print(ii, mod)
tmp = models.vgg11_bn(pretrained=True)
tmp.to(device)
summary(tmp.features, (3, 224,224))
tmp.features(torch.rand(1,3,224,224))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 224, 224]           1,792
       BatchNorm2d-2         [-1, 64, 224, 224]             128
              ReLU-3         [-1, 64, 224, 224]               0
         MaxPool2d-4         [-1, 64, 112, 112]               0
            Conv2d-5        [-1, 128, 112, 112]          73,856
       BatchNorm2d-6        [-1, 128, 112, 112]             256
              ReLU-7        [-1, 128, 112, 112]               0
         MaxPool2d-8          [-1, 128, 56, 56]               0
            Conv2d-9          [-1, 256, 56, 56]         295,168
      BatchNorm2d-10          [-1, 256, 56, 56]             512
             ReLU-11          [-1, 256, 56, 56]               0
           Conv2d-12          [-1, 256, 56, 56]         590,080
      BatchNorm2d-13          [-1, 256, 56, 56]             512
             ReLU-14          [-1, 256,

RuntimeError: ignored