In [2]:
import torch
from torch import nn
import torch.nn.utils.prune as prune
import torch.nn.functional as F

from __future__ import print_function
from __future__ import division
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy
import pdb

#Code in this section was taken from
#https://colab.research.google.com/github/pytorch/tutorials/blob/gh-pages/_downloads/finetuning_torchvision_models_tutorial.ipynb

from google.colab import drive
drive.mount('/content/drive/')

data_dir = "/content/drive/My Drive/Research/hymenoptera_data/hymenoptera_data"

# Number of classes in the dataset
num_classes = 2

# Batch size for training (change depending on how much memory you have)
batch_size = 8

# Number of epochs to train for 
num_epochs = 1

# Flag for feature extracting. When False, we finetune the whole model, 
#   when True we only update the reshaped layer params
feature_extract = False

input_size = 224

def train_model(model, dataloaders, criterion, optimizer, num_epochs=25, is_pruning = False):
    since = time.time()

    val_acc_history = []
    prune_percent = []
    # best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for ix, (inputs, labels) in enumerate(dataloaders[phase]):
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    # Get model outputs and calculate loss
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)
                    # backward + optimize only if in training phase
                    if phase == 'train':#len(dataloaders_dict[phase])-1:
                        loss.backward()
                        if is_pruning:
                            with torch.no_grad():
                                prune_buffers(model)#can't consume grads
                        optimizer.step()
                    _, preds = torch.max(outputs, 1)
                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
            
            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))

            # # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
            #     best_model_wts = copy.deepcopy(model.state_dict())
            if phase == 'val':
                val_acc_history.append(epoch_acc)

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    # model.load_state_dict(best_model_wts)
    return val_acc_history[-1]

data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(input_size),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize(input_size),
        transforms.CenterCrop(input_size),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'test': transforms.Compose([
        transforms.Resize(input_size),
        transforms.CenterCrop(input_size),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

# Create training and validation datasets
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x]) for x in ['train', 'val', 'test']}
# Create training and validation dataloaders
dataloaders_dict = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=batch_size, shuffle=True, num_workers=4) for x in ['train', 'val', 'test']}

# Detect if we have a GPU available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
resultDevice = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")

# train_model(model, dataloaders_dict, criterion, optimizer_ft, num_epochs=1)
print("""Epoch 0/0
----------
train Loss: 0.5233 Acc: 0.6886
val Loss: 0.2095 Acc: 0.9412

Training complete in 2m 23s
Best val Acc: 0.941176
tensor(0.9412, dtype=torch.float64)""")

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive/
Epoch 0/0
----------
train Loss: 0.5233 Acc: 0.6886
val Loss: 0.2095 Acc: 0.9412

Training complete in 2m 23s
Best val Acc: 0.941176
tensor(0.9412, dtype=torch.float64)


In [9]:
import  torch . nn  as  nn
from  torchvision . models  import  vgg11_bn
from  collections  import  namedtuple
from torchsummary import summary
import re
import time

def getLayer(model, child = 'classifier', layer_tp = nn.Linear):
    """Returns indexs to all the modules of type layer_tp which are under the 
    name 'child' in model's .chilren()
    Layer starts at 1 since .modules() returns ALL modules in model, the first elment of which is it's model.
    return  index against .children()[child], index against .modules()
    """
    layer = 1
    layers = []
    l = float('inf')
    assert child in model._modules.keys(), f"{child} is not a direct child of model, must be one of: {str(model._modules.keys())}"
    for c in model.named_children():
        if c[0] == child:
            l = min(l, layer)
            for m in c[1].modules():
                if isinstance(m, layer_tp):
                    layers += [layer]
                layer += 1
        else:
        #This won't get all the sub-sub modules of network, but since those wouldn't be indexed before the target layer in nn.modules don't care
        #len(module_iter(c[1])) to get all sub-sub modules 
            layer += len(list(c[1].modules()))
    return [i-l for i in layers], layers

def module_iter(model):
    "Get's the 'bottom most' modules of model"
    if not list(model.children()):#bottom node 
        return [model]
    else:
        return [mc for m in model.children() for mc in module_iter(m)]

def get_buf(buf_name, model):
    "Returns the buffer matching buf_name in model"
    buf = next(filter(lambda i: i[0] == buf_name, model.named_buffers()))
    return buf[1]

class  Vgg11bn (torch.nn.Module):
    """a Vgg11 model with batch normalization and instead of 1000 predictive classes,
    only has 2.
    Buffer's attached to module that they're used to prune, regardless of if 
    output/input to that module
    """

    def  __init__ (self, num_classes = 2, only_tune_classifier = False, numResults = batch_size):      
        super(). __init__ ()
        vgg_pre = models.vgg11_bn(pretrained=True)
        #self = models.vgg11_bn(pretrained=True, {'num_classes':num_classes})
        self.features  = copy.deepcopy(vgg_pre.features)
        # self.avgpool = copy.deepcopy(vgg_pre.avgpool)     
        self.features.add_module("AvgPool_Feat", copy.deepcopy(vgg_pre.avgpool))

        self.classifier = copy.deepcopy(vgg_pre.classifier)
        num_ftrs = vgg_pre.classifier[6].in_features
        self.classifier[6] = nn.Linear(num_ftrs,num_classes)#changes last linear layer
        del vgg_pre
        #only_tune_class when True we only update the reshaped layer params; when false we finetune the whole model, 
        self.only_tune_classifier = only_tune_classifier
        if only_tune_classifier:
            for param in self.features.parameters():
                param.requires_grad = False
        self.numResults = numResults
        #select which layers to prune
        self.layer_prune_init()
        #Make Buffer's to store info for each layer
        self.prune_buffer_init()

    def layer_prune_init(self):
        """self.classifierPruneIx is the index of the modules used for pruning;
        regardless of on input/output.
        featuresPruneIx is based on the output of the layers (convolutional)
        classifierPruneIx is based on the input to the layers (Linear Layers)
        Both are the indexs for model under the self.feature, self.classifier
        PruneIx_* is the index if used .modules(), releative to full model
        *Module*PruneIx is the index for the index within that *Module*
        self.pruneNames: names of the modules which will be pruned, [".".join(re.findall("([a-zA-z]+)(\d+)", i)[0]) for i in self.pruneNames], convert back
        """
        featurePruneIx, PruneIx_feature = \
                getLayer(self, child='features', layer_tp = nn.Conv2d)
        #getayer returns indexes for .children(), .modules(); NOT the ith matching entry
        laterFeat = lambda i: i > 5
        featurePruneIx = list(filter(laterFeat, featurePruneIx))
        PruneIx_feature = PruneIx_feature[-len(featurePruneIx):]
        classifierPruneIx, PruneIx_classifier = \
                        getLayer(self, child='classifier', layer_tp = nn.Linear)
        self.pruneLayersIx =  PruneIx_classifier#PruneIx_feature +
        self.featureInputIx = []
        self.featureOutputIx = []#[i-1 for i in featurePruneIx]
        self.classifierInputIx = [i-1 for i in classifierPruneIx]
        self.classifierOutputIx = [i-1 for i in classifierPruneIx]#, both in and output
        self.pruneModuleNames, self.pruneModules = zip(*[list(self.named_modules())[i]
                                            for i in self.pruneLayersIx])
        # self.pruneNames = [i.replace(".", "") for i in self.pruneNames]

    def prune_buffer_init(self):
        """Currently only sets linear layers as pruneable
        self.currently_pruning: dict of module name + pruning buffer type(in/out) to layer 
        contains seperate entries for each buffer attached to the same model
        """
        zero = torch.zeros(1, 3,224,224)
        self.training = False
        prune_out = self.forward(zero, ret_pruneLayers = True)
        self.currently_pruning = {}
        for ix, (name, tnsrS) in enumerate(prune_out.items()):
            for tnsr, tp in zip(tnsrS, ("in", "out")):
                if tnsr is not None:
                    shape = [self.numResults] + list(tnsr.shape)
                    module = self.pruneModules[ix]
                    module.register_buffer("prune_buf_" + tp, torch.zeros(*shape))
                    layer_ix = self.pruneLayersIx[ix]
                    is_pruning = isinstance(module, nn.Linear)#GRIBB!!!!!!
                    self.currently_pruning[name + ".prune_buf_" + tp] = [is_pruning, layer_ix]
        self.bufferCount = 0 #incremented each time buffer modified 


    def  forward (self , x, ret_pruneLayers = False):
        """propogates forward, with sideffect of storing the 'prune layers'
        ret_pruneLayers: returns intermediate pruning layers; used for init
        """
        results = []
        # pdb.set_trace()
        for  i , model  in  enumerate(self.features):
            y  =  model(x)
            if  i  in self.featureInputIx and  i  in self.featureOutputIx:
                results.append((x, y))
            elif  i  in self.featureInputIx:
                results.append((x, None))
            elif  i  in self.featureOutputIx:
                results.append((None, y))
            x = y
        x = torch.flatten(x, 1)
        for  i , model  in  enumerate(self.classifier):
            #model._buffers['weight_mask'].shape
            y  =  model(x)
            # print(ii, x.shape, y.shape)
            if  i  in self.classifierInputIx and  i in self.classifierOutputIx:
                results.append((x, y))
            elif  i  in self.classifierInputIx:
                results.append((x, None))
            elif  i  in self.classifierOutputIx:
                results.append((None, y))
            x = y
        out = {n: r for n,r in zip(self.pruneModuleNames, results)}
        if ret_pruneLayers:
            return  out
        if self.training:
             for buf_name, (is_pruning, layer_ix) in self.currently_pruning.items():
                if is_pruning:
                    buffer = get_buf(buf_name, self)
                    if buf_name[-3:] == "_in":
                        module_name = buf_name.replace(".prune_buf_in", "")
                        shape =  list(out[module_name][0].shape)
                        newShape = [shape[0]] + [1] + shape[1:]
                        buffer[:shape[0]] = out[module_name][0].view(*newShape)
                    else:
                        module_name = buf_name.replace(".prune_buf_out", "")
                        shape =  list(out[module_name][1].shape)
                        newShape = [shape[0]] + [1] + shape[1:]
                        buffer[:shape[0]] = out[module_name][1].view(*newShape)
                    #since all buffer sizes passsed in at once; 
                    #but the batch size might not evenly divide the data
                    #are setting batch values for a full 'update' cycle, 
                    #since numResults = batch_size  
                    #grib to allow larger buffers w/ intermediate pruning
             self.bufferCount += 1
             self.bufferCount %= self.numResults
        return x

    def get_params(self, buffer):
        return default_params

    def critOpt(self):
        device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
        self.to(device)
        criterion = nn.CrossEntropyLoss()
        optzer = optim.SGD(self.parameters(), lr=0.001, momentum=0.9)
        return criterion, optzer

class repeatedLayerOut(prune.BasePruningMethod):
    """Prune every entry that's approximitly repeated;
    if outputs are within epison across buffer at least propEqual% of the time
    
    #grib, instead of selecting epsilon region around median shift to select sort,
    get range [propEqual/2, 1-propEqual/2]? But could be clustered eg. [1,1,1,1, 5, 10] with prop = 0.3
    """
    PRUNING_TYPE = 'unstructured'
    #get's returned as "raveled" list of unmasked entries  

    def __init__(self, module, in_buffer, out_buffer, params):
        #device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
        self.module = module
        self.in_buffer = in_buffer #get_buf('prune_buf', module)
        self.out_buffer = out_buffer
        self.params = params

    def compute_mask(self, tnsr, default_mask):#idx, default_mask, prevResults):
        """Iteratively finds the outputs that were close in previous batches and prunes them
        epsilon: the numerical range in which values are considerd similar
        propEqual: proportion of values which are similar for output to be considered constant
        minMask: number of outputs which have constant neurons which will be pruned
        propFilter: proportion of nuerons attached to constant outputs which will be filtered
        retains all gradients
        """   
        epsilon = self.params['epsilon'] #same for all layers
        propEqual = self.params['propEqual'] #must be greater than 0.5
        propFilter = self.params['propFilter'] #randomly selects the neurons to prune
        num_results = self.params['numResults']
        in_buffer = self.in_buffer
        out_buffer = self.out_buffer
        # print("MASK\n", default_mask.shape, tnsr.shape, in_buffer.shape, out_buffer.shape)
        in_med_value, _ = torch.median(in_buffer, dim = 0)
        in_buf_list = torch.split(in_buffer, 1, dim = 0)#0 dim is where were stacked along
        in_isSimilar = torch.sum(
                        torch.stack([torch.ge(a, in_med_value-epsilon/2) 
                                    & torch.le(a, in_med_value+epsilon/2)
                                        for a in in_buf_list]),
                            0)
        in_areConst = in_isSimilar >= propEqual*num_results#outputs which are similar
        
        out_med_value, _ = torch.median(out_buffer, dim = 0)
        out_buf_list = torch.split(out_buffer, 1, dim = 0)#0 dim is where were stacked along
        out_isSimilar = torch.sum(
                        torch.stack([torch.ge(a, out_med_value-epsilon/2) 
                                    & torch.le(a, out_med_value+epsilon/2)
                                        for a in out_buf_list]),
                            0)
        out_areConst = out_isSimilar >= propEqual*num_results#outputs which are similar
        #Get from similar outputs to neurons to be pruned
        if isinstance(self.module, nn.Conv2d):
            const_neurons = torch.grad(~areConst, self, retain_graph = True)
            #if is constant neuron and rand_filter = 1, then is NOT pruned
            mask = torch.where(const_neurons == 0, 
                               rand_filter, 
                               torch.ones(*default_mask.shape, dtype=bool))
            return mask
        elif isinstance(self.module, nn.Linear):
            #have to cast to byte then back to bool since can't take bool outerproducts 
            module_mask = ~torch.ger(torch.squeeze(out_areConst.byte()),
                                     torch.squeeze(in_areConst.byte())
                                     ).bool()#doesn't overwieght intersection
            rand_filter = torch.rand(out_buffer.shape[-1],
                                     in_buffer.shape[-1]
                                     ) > propFilter
            mask = module_mask | rand_filter#, self.module._parameters['weight_orig'].shape
            if mask.shape != default_mask.shape:
                unpruned_ix = self.module.weight_mask.bool().view(-1)#Bad!!
                return mask.view(-1)[unpruned_ix]
            else:
                return mask
        else:
            print("Module Type not recognized: ", type(self.module))

def prune_buffers(model):
    "Calls pruning wrapper based on all buffers that have been accumulated"
    param_name = 'weight'
    for module_name in model.pruneModuleNames:
        in_buf_name = module_name + '.prune_buf_in'
        out_buf_name = module_name + '.prune_buf_out'
        in_buffer = None
        out_buffer = None
        if in_buf_name in model.currently_pruning:
            is_pruning_in, layer_ix = model.currently_pruning[in_buf_name]
            in_buffer = get_buf(in_buf_name, model)
        else:
            is_pruning_in = False
        if out_buf_name in model.currently_pruning:
            is_pruning_out, layer_ix = model.currently_pruning[out_buf_name]
            out_buffer = get_buf(out_buf_name, model)
        else:
            is_pruning_out = False

        if is_pruning_in or is_pruning_out:
            module = list(model.modules())[layer_ix]
            params = model.get_params(None)
            myPruner = repeatedLayerOut(module, in_buffer, out_buffer, params) 
            myPruner.apply(module, 
                           param_name,
                           module,#for init inside apply method
                           in_buffer,#apply method
                           out_buffer,
                           params
                           )
            
def fill_buffers(model, dataloaders = dataloaders_dict, num_passes = 1):
    "fill_buffers_only: returns as soon as buffers have been filled"
    for ix, (inputs, labels) in enumerate(dataloaders['train']):
        inputs = inputs.to(device)
        labels = labels.to(device)
        with torch.set_grad_enabled(True):
            # Get model outputs and calculate loss
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward
            prune_buffers(model)
        if (ix -1) >= num_passes:
            return None

def check_accuracy(model, dataloaders = dataloaders_dict, phase='test'):
    "phase in val or test"
    model.eval()
    running_corrects = 0
    for ix, (inputs, labels) in enumerate(dataloaders[phase]):
        inputs = inputs.to(device)
        labels = labels.to(device)
        with torch.set_grad_enabled(False):
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            running_corrects += torch.sum(preds == labels.data)
    epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)
    return epoch_acc

def total_pruned(model, print_out = False):
    total_prune = 0
    for ix, c in enumerate(model.modules()):
        if ix in model.pruneLayersIx:
            total_prune +=torch.sum(c.weight == 0)
            if print_out:
                print(ix, c, 
                    f"{100.0*torch.sum(c.weight == 0)/c.weight.nelement()}% Pruned") 
    return total_prune
    
# model = Vgg11bn()
# total_pruned(model, print_out = True)
# criterion, optimizer_ft = model.critOpt()

print(f"### Validation Accuracy\
    ### Elements Pruned\
    ### Time (Seconds)\n")

within_epsilon = [1, 2, 4, 8]
prop_equal = [0.5, 0.60, 0.7, 0.8]
prop_filter = [0.1, 0.2, 0.3]

results_untrained = torch.zeros(4,4,3)
pruned_untrained = torch.zeros(4,4,3)
time1 = torch.zeros(4,4,3)
max_acc_untrained = 0
params_untrained = None
untrained_model = None

results_trained = torch.zeros(4,4,3)
pruned_trained = torch.zeros(4,4,3)
time2 = torch.zeros(4,4,3)
max_acc_trained = 0
params_trained = None
trained_model = None

break

for ie, e in enumerate(within_epsilon):
    for ipE, pE in enumerate(prop_equal):
        for ipF, pF in enumerate(prop_filter):
            default_params  = {'epsilon': e,
                                'propEqual': pE,
                                'propFilter': pF, 
                                'numResults': batch_size}
            model = Vgg11bn()
            criterion, optimizer_ft = model.critOpt()
            a = time.time()
            results_trained[ie, ipE, ipF] = train_model(model, 
                                                        dataloaders_dict, 
                                                        criterion, 
                                                        optimizer_ft,
                                                        num_epochs=1,
                                                        is_pruning = True)
            time2[ie, ipE, ipF] = time.time() - a
            pruned_trained[ie, ipE, ipF] = total_pruned(model, print_out=True)
            if max_acc_trained < results_trained[ie, ipE, ipF]:
                max_acc_trained = results_trained[ie, ipE, ipF]
                params_trained = default_params
                trained_model = model
            print(f"### {results_trained[ie, ipE, ipF]:.2f}\
                ### {pruned_trained[ie, ipE, ipF]:.2f}\
                ### {time2[ie, ipE, ipF]:.2f}\n")
            del model

print("%%%%%%%End trained%%%%%%%\n\n\n\n$$$$$Start Untrained$$$$$")


for ie, e in enumerate(within_epsilon):
    for ipE, pE in enumerate(prop_equal):
        for ipF, pF in enumerate(prop_filter):
            default_params  = {'epsilon': e,
                            'propEqual': pE,
                            'propFilter': pF, 
                            'numResults': batch_size}
            model = Vgg11bn()
            criterion, optimizer_ft = model.critOpt()
            a = time.time()
            fill_buffers(model, num_passes=4)
            results_untrained[ie, ipE, ipF] = check_accuracy(model, phase='val')
            time1[ie, ipE, ipF] = time.time() - a
            pruned_untrained[ie, ipE, ipF] = total_pruned(model, print_out=True)

            if max_acc_untrained < results_untrained[ie, ipE, ipF]:
                max_acc_untrained = results_untrained[ie, ipE, ipF]
                params_untrained = default_params
                # untrained_model = model
            print(f"### {results_untrained[ie, ipE, ipF]:.2f}\
                    ### {pruned_untrained[ie, ipE, ipF]:.2f}\
                    ### {time1[ie, ipE, ipF]:.2f}\n\n")
            del model


### Validation Accuracy    ### Elements Pruned    ### Time (Seconds)



SyntaxError: ignored

In [0]:
#@title
s = """33 Linear(in_features=25088, out_features=4096, bias=True) 46.86307144165039% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 46.8585205078125% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 47.509765625% Pruned
### 0.48                    ### 56022152.00                    ### 82.85


33 Linear(in_features=25088, out_features=4096, bias=True) 73.7840576171875% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 73.79650115966797% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 73.876953125% Pruned
### 0.46                    ### 88207872.00                    ### 80.40


33 Linear(in_features=25088, out_features=4096, bias=True) 88.2318344116211% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 88.23563385009766% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 88.34228515625% Pruned
### 0.46                    ### 105478144.00                    ### 73.65


33 Linear(in_features=25088, out_features=4096, bias=True) 46.851566314697266% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 46.86142349243164% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 46.6552734375% Pruned
### 0.40                    ### 56010744.00                    ### 77.72


33 Linear(in_features=25088, out_features=4096, bias=True) 73.7934799194336% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 73.79026794433594% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 73.5107421875% Pruned
### 0.41                    ### 88216480.00                    ### 74.90


33 Linear(in_features=25088, out_features=4096, bias=True) 88.23404693603516% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 88.23883819580078% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 88.09814453125% Pruned
### 0.54                    ### 105480944.00                    ### 70.58


33 Linear(in_features=25088, out_features=4096, bias=True) 46.853515625% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 46.863895416259766% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 46.3134765625% Pruned
### 0.62                    ### 56013128.00                    ### 76.86


33 Linear(in_features=25088, out_features=4096, bias=True) 73.78302001953125% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 73.78704071044922% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 73.2666015625% Pruned
### 0.34                    ### 88205184.00                    ### 75.37


33 Linear(in_features=25088, out_features=4096, bias=True) 88.23873901367188% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 88.24845886230469% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 88.36669921875% Pruned
### 0.54                    ### 105487400.00                    ### 71.41


33 Linear(in_features=25088, out_features=4096, bias=True) 46.852508544921875% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 46.86733627319336% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 46.49658203125% Pruned
### 0.44                    ### 56012692.00                    ### 84.37


33 Linear(in_features=25088, out_features=4096, bias=True) 73.79354858398438% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 73.8038330078125% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 74.06005859375% Pruned
### 0.56                    ### 88218880.00                    ### 76.57


33 Linear(in_features=25088, out_features=4096, bias=True) 88.23758697509766% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 88.23320007324219% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 87.97607421875% Pruned
### 0.45                    ### 105483616.00                    ### 71.62


33 Linear(in_features=25088, out_features=4096, bias=True) 46.85257339477539% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 46.87030792236328% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 46.533203125% Pruned
### 0.59                    ### 56013264.00                    ### 76.75


33 Linear(in_features=25088, out_features=4096, bias=True) 73.78116607666016% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 73.78683471679688% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 74.76806640625% Pruned
### 0.44                    ### 88203360.00                    ### 74.86


33 Linear(in_features=25088, out_features=4096, bias=True) 88.23784637451172% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 88.23371887207031% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 88.6474609375% Pruned
### 0.54                    ### 105484024.00                    ### 70.32


33 Linear(in_features=25088, out_features=4096, bias=True) 46.85880661010742% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 46.84529113769531% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 47.4853515625% Pruned
### 0.42                    ### 56015548.00                    ### 76.19


33 Linear(in_features=25088, out_features=4096, bias=True) 73.7872085571289% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 73.77244567871094% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 74.0966796875% Pruned
### 0.54                    ### 88207096.00                    ### 74.50


33 Linear(in_features=25088, out_features=4096, bias=True) 88.23336791992188% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 88.22205352783203% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 88.70849609375% Pruned
### 0.54                    ### 105477472.00                    ### 71.61


33 Linear(in_features=25088, out_features=4096, bias=True) 46.86458206176758% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 46.8434944152832% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 46.39892578125% Pruned
### 0.37                    ### 56021092.00                    ### 76.76


33 Linear(in_features=25088, out_features=4096, bias=True) 73.79022216796875% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 73.78691864013672% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 73.91357421875% Pruned
### 0.54                    ### 88212608.00                    ### 74.34


33 Linear(in_features=25088, out_features=4096, bias=True) 88.23995208740234% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 88.22856903076172% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 88.73291015625% Pruned
### 0.46                    ### 105485336.00                    ### 70.56


33 Linear(in_features=25088, out_features=4096, bias=True) 46.85671615600586% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 46.86176300048828% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 46.93603515625% Pruned
### 0.45                    ### 56016116.00                    ### 77.67


33 Linear(in_features=25088, out_features=4096, bias=True) 73.79084777832031% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 73.7819595336914% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 73.47412109375% Pruned
### 0.68                    ### 88212384.00                    ### 75.16


33 Linear(in_features=25088, out_features=4096, bias=True) 88.24044036865234% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 88.25282287597656% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 86.80419921875% Pruned
### 0.46                    ### 105489760.00                    ### 70.89


33 Linear(in_features=25088, out_features=4096, bias=True) 46.854862213134766% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 46.84408187866211% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 46.1181640625% Pruned
### 0.53                    ### 56011176.00                    ### 76.82


33 Linear(in_features=25088, out_features=4096, bias=True) 73.78094482421875% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 73.78980255126953% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 73.79150390625% Pruned
### 0.52                    ### 88203552.00                    ### 76.14


33 Linear(in_features=25088, out_features=4096, bias=True) 88.23804473876953% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 88.2342529296875% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 88.5498046875% Pruned
### 0.54                    ### 105484312.00                    ### 71.10


33 Linear(in_features=25088, out_features=4096, bias=True) 46.86110305786133% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 46.8686408996582% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 46.3623046875% Pruned
### 0.41                    ### 56021732.00                    ### 77.24


33 Linear(in_features=25088, out_features=4096, bias=True) 73.7739028930664% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 73.7862548828125% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 73.53515625% Pruned
### 0.48                    ### 88195696.00                    ### 74.54


33 Linear(in_features=25088, out_features=4096, bias=True) 88.23578643798828% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 88.24055480957031% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 88.525390625% Pruned
### 0.46                    ### 105483056.00                    ### 70.59


33 Linear(in_features=25088, out_features=4096, bias=True) 46.85675811767578% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 46.83415222167969% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 46.7041015625% Pruned
### 0.55                    ### 56011508.00                    ### 76.31


33 Linear(in_features=25088, out_features=4096, bias=True) 73.7848892211914% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 73.78089141845703% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 73.388671875% Pruned
### 0.49                    ### 88206072.00                    ### 74.34


33 Linear(in_features=25088, out_features=4096, bias=True) 88.23382568359375% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 88.2318344116211% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 87.8662109375% Pruned
### 0.54                    ### 105479520.00                    ### 70.11


33 Linear(in_features=25088, out_features=4096, bias=True) 46.86415481567383% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 46.850196838378906% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 46.22802734375% Pruned
### 0.56                    ### 56021760.00                    ### 77.33


33 Linear(in_features=25088, out_features=4096, bias=True) 73.78787994384766% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 73.78739929199219% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 73.193359375% Pruned
### 0.55                    ### 88210216.00                    ### 75.28


33 Linear(in_features=25088, out_features=4096, bias=True) 88.2351303100586% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 88.23873138427734% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 87.85400390625% Pruned
### 0.65                    ### 105482016.00                    ### 70.43


33 Linear(in_features=25088, out_features=4096, bias=True) 46.85382843017578% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 46.84412384033203% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 47.412109375% Pruned
### 0.31                    ### 56010228.00                    ### 76.66


33 Linear(in_features=25088, out_features=4096, bias=True) 73.78926086425781% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 73.78945922851562% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 72.59521484375% Pruned
### 0.45                    ### 88211944.00                    ### 75.11


33 Linear(in_features=25088, out_features=4096, bias=True) 88.23656463623047% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 88.24127197265625% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 87.744140625% Pruned
### 0.55                    ### 105483904.00                    ### 71.04


33 Linear(in_features=25088, out_features=4096, bias=True) 46.85751724243164% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 46.860416412353516% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 47.44873046875% Pruned
### 0.45                    ### 56016752.00                    ### 76.48


33 Linear(in_features=25088, out_features=4096, bias=True) 73.78726196289062% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 73.7730941772461% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 73.64501953125% Pruned
### 0.48                    ### 88207224.00                    ### 75.11


33 Linear(in_features=25088, out_features=4096, bias=True) 88.23760223388672% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 88.23046112060547% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 87.98828125% Pruned
### 0.55                    ### 105483176.00                    ### 71.34


33 Linear(in_features=25088, out_features=4096, bias=True) 46.8482780456543% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 46.87440872192383% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 45.69091796875% Pruned
### 0.54                    ### 56009464.00                    ### 77.11


33 Linear(in_features=25088, out_features=4096, bias=True) 73.7865982055664% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 73.79535675048828% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 74.365234375% Pruned
### 0.46                    ### 88210336.00                    ### 75.35


33 Linear(in_features=25088, out_features=4096, bias=True) 88.2386703491211% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 88.23358154296875% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 88.7939453125% Pruned
### 0.46                    ### 105484864.00                    ### 70.44


33 Linear(in_features=25088, out_features=4096, bias=True) 46.859500885009766% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 46.862518310546875% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 46.6552734375% Pruned
### 0.44                    ### 56019080.00                    ### 77.07


33 Linear(in_features=25088, out_features=4096, bias=True) 73.78606414794922% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 73.77337646484375% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 74.03564453125% Pruned
### 0.46                    ### 88206072.00                    ### 75.73


33 Linear(in_features=25088, out_features=4096, bias=True) 88.2383804321289% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 88.23115539550781% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 88.85498046875% Pruned
### 0.54                    ### 105484176.00                    ### 71.29"""

s2 = """Epoch 0/0
----------
train Loss: 0.6804 Acc: 0.6140
val Loss: 0.6935 Acc: 0.4575

Training complete in 5m 2s
Best val Acc: 0.457516
33 Linear(in_features=25088, out_features=4096, bias=True) 95.28697967529297% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 95.28395080566406% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 95.5078125% Pruned
### 0.46                ### 113911144.00                ### 301.97

Epoch 0/0
----------
train Loss: 0.6965 Acc: 0.5000
val Loss: 0.6935 Acc: 0.4575

Training complete in 4m 23s
Best val Acc: 0.457516
33 Linear(in_features=25088, out_features=4096, bias=True) 99.84536743164062% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 99.84488677978516% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 99.84130859375% Pruned
### 0.46                ### 119360912.00                ### 262.70

Epoch 0/0
----------
train Loss: 0.6965 Acc: 0.4868
val Loss: 0.6926 Acc: 0.5425

Training complete in 4m 14s
Best val Acc: 0.542484
33 Linear(in_features=25088, out_features=4096, bias=True) 99.9967041015625% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 99.99674987792969% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 100.0% Pruned
### 0.54                ### 119541920.00                ### 253.65

Epoch 0/0
----------
train Loss: 0.6919 Acc: 0.5877
val Loss: 0.6929 Acc: 0.5425

Training complete in 4m 55s
Best val Acc: 0.542484
33 Linear(in_features=25088, out_features=4096, bias=True) 95.25799560546875% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 95.23308563232422% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 95.4345703125% Pruned
### 0.54                ### 113872824.00                ### 295.10

Epoch 0/0
----------
train Loss: 0.7002 Acc: 0.4737
val Loss: 0.6939 Acc: 0.4575

Training complete in 4m 26s
Best val Acc: 0.457516
33 Linear(in_features=25088, out_features=4096, bias=True) 99.84182739257812% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 99.84190368652344% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 99.8291015625% Pruned
### 0.46                ### 119356768.00                ### 265.86

Epoch 0/0
----------
train Loss: 0.6935 Acc: 0.4825
val Loss: 0.6936 Acc: 0.4575

Training complete in 4m 12s
Best val Acc: 0.457516
33 Linear(in_features=25088, out_features=4096, bias=True) 99.99674224853516% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 99.99671936035156% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 100.0% Pruned
### 0.46                ### 119541952.00                ### 252.46

Epoch 0/0
----------
train Loss: 0.6930 Acc: 0.5132
val Loss: 0.6914 Acc: 0.5425

Training complete in 4m 59s
Best val Acc: 0.542484
33 Linear(in_features=25088, out_features=4096, bias=True) 94.82119750976562% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 94.88768005371094% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 94.6044921875% Pruned
### 0.54                ### 113365952.00                ### 299.17

Epoch 0/0
----------
train Loss: 0.7000 Acc: 0.4781
val Loss: 0.6937 Acc: 0.4575

Training complete in 4m 24s
Best val Acc: 0.457516
33 Linear(in_features=25088, out_features=4096, bias=True) 99.80309295654297% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 99.8256607055664% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 99.81689453125% Pruned
### 0.46                ### 119314248.00                ### 263.88

Epoch 0/0
----------
train Loss: 0.6966 Acc: 0.5263
val Loss: 0.6922 Acc: 0.5425

Training complete in 4m 12s
Best val Acc: 0.542484
33 Linear(in_features=25088, out_features=4096, bias=True) 99.99495697021484% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 99.9966049194336% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 100.0% Pruned
### 0.54                ### 119540104.00                ### 252.27

Epoch 0/0
----------
train Loss: 0.6211 Acc: 0.7368
val Loss: 0.6746 Acc: 0.5425

Training complete in 5m 20s
Best val Acc: 0.542484
33 Linear(in_features=25088, out_features=4096, bias=True) 90.34745025634766% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 90.93275451660156% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 89.75830078125% Pruned
### 0.54                ### 108104776.00                ### 319.68

Epoch 0/0
----------
train Loss: 0.6824 Acc: 0.5307
val Loss: 0.6928 Acc: 0.5425

Training complete in 4m 37s
Best val Acc: 0.542484
33 Linear(in_features=25088, out_features=4096, bias=True) 99.30992889404297% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 99.64393615722656% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 99.62158203125% Pruned
### 0.54                ### 118776968.00                ### 276.50

Epoch 0/0
----------
train Loss: 0.6903 Acc: 0.4781
val Loss: 0.6932 Acc: 0.4575

Training complete in 4m 21s
Best val Acc: 0.457516
33 Linear(in_features=25088, out_features=4096, bias=True) 99.93717193603516% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 99.98912048339844% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 99.96337890625% Pruned
### 0.46                ### 119479464.00                ### 260.75

Epoch 0/0
----------
train Loss: 0.6907 Acc: 0.5044
val Loss: 0.6956 Acc: 0.4575

Training complete in 4m 56s
Best val Acc: 0.457516
33 Linear(in_features=25088, out_features=4096, bias=True) 95.29124450683594% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 95.28609466552734% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 95.41015625% Pruned
### 0.46                ### 113915880.00                ### 295.60

Epoch 0/0
----------
train Loss: 0.6970 Acc: 0.4781
val Loss: 0.6936 Acc: 0.4575

Training complete in 4m 29s
Best val Acc: 0.457516
33 Linear(in_features=25088, out_features=4096, bias=True) 99.84538269042969% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 99.8441390991211% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 99.76806640625% Pruned
### 0.46                ### 119360800.00                ### 268.59

Epoch 0/0
----------
train Loss: 0.6953 Acc: 0.5088
val Loss: 0.6945 Acc: 0.4575

Training complete in 4m 11s
Best val Acc: 0.457516
33 Linear(in_features=25088, out_features=4096, bias=True) 99.99681854248047% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 99.99685668945312% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 100.0% Pruned
### 0.46                ### 119542064.00                ### 251.46

Epoch 0/0
----------
train Loss: 0.6896 Acc: 0.5044
val Loss: 0.6931 Acc: 0.4575

Training complete in 4m 55s
Best val Acc: 0.457516
33 Linear(in_features=25088, out_features=4096, bias=True) 95.28926086425781% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 95.2913818359375% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 95.39794921875% Pruned
### 0.46                ### 113914728.00                ### 294.74

Epoch 0/0
----------
train Loss: 0.6891 Acc: 0.5482
val Loss: 0.6939 Acc: 0.4575

Training complete in 4m 26s
Best val Acc: 0.457516
33 Linear(in_features=25088, out_features=4096, bias=True) 99.84538269042969% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 99.84619140625% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 99.86572265625% Pruned
### 0.46                ### 119361152.00                ### 265.82

Epoch 0/0
----------
train Loss: 0.6953 Acc: 0.4737
val Loss: 0.6938 Acc: 0.4575

Training complete in 4m 16s
Best val Acc: 0.457516
33 Linear(in_features=25088, out_features=4096, bias=True) 99.99684143066406% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 99.99657440185547% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 100.0% Pruned
### 0.46                ### 119542032.00                ### 255.51

Epoch 0/0
----------
train Loss: 0.6892 Acc: 0.4956
val Loss: 0.6933 Acc: 0.4575

Training complete in 4m 54s
Best val Acc: 0.457516
33 Linear(in_features=25088, out_features=4096, bias=True) 95.26688385009766% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 95.26675415039062% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 95.2880859375% Pruned
### 0.46                ### 113887584.00                ### 293.94

Epoch 0/0
----------
train Loss: 0.6955 Acc: 0.5044
val Loss: 0.6930 Acc: 0.5425

Training complete in 4m 32s
Best val Acc: 0.542484
33 Linear(in_features=25088, out_features=4096, bias=True) 99.84333038330078% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 99.84467315673828% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 99.81689453125% Pruned
### 0.54                ### 119358792.00                ### 271.60

Epoch 0/0
----------
train Loss: 0.6914 Acc: 0.4430
val Loss: 0.6932 Acc: 0.4575

Training complete in 4m 17s
Best val Acc: 0.457516
33 Linear(in_features=25088, out_features=4096, bias=True) 99.99665832519531% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 99.99658203125% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 100.0% Pruned
### 0.46                ### 119541856.00                ### 256.57

Epoch 0/0
----------
train Loss: 0.6860 Acc: 0.5877
val Loss: 0.6927 Acc: 0.6471

Training complete in 4m 55s
Best val Acc: 0.647059
33 Linear(in_features=25088, out_features=4096, bias=True) 94.87027740478516% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 95.01087951660156% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 95.3125% Pruned
### 0.65                ### 113437112.00                ### 294.57

Epoch 0/0
----------
train Loss: 0.6912 Acc: 0.4649
val Loss: 0.6934 Acc: 0.4575

Training complete in 4m 26s
Best val Acc: 0.457516
33 Linear(in_features=25088, out_features=4096, bias=True) 99.81127166748047% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 99.831298828125% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 99.90234375% Pruned
### 0.46                ### 119323616.00                ### 266.07

Epoch 0/0
----------
train Loss: 0.6978 Acc: 0.4781
val Loss: 0.6923 Acc: 0.5425

Training complete in 4m 13s
Best val Acc: 0.542484
33 Linear(in_features=25088, out_features=4096, bias=True) 99.99501037597656% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 99.99641418457031% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 100.0% Pruned
### 0.54                ### 119540120.00                ### 252.51

Epoch 0/0
----------
train Loss: 0.6805 Acc: 0.5921
val Loss: 0.6949 Acc: 0.4575

Training complete in 4m 54s
Best val Acc: 0.457516
33 Linear(in_features=25088, out_features=4096, bias=True) 95.28924560546875% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 95.2918930053711% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 95.42236328125% Pruned
### 0.46                ### 113914792.00                ### 293.97

Epoch 0/0
----------
train Loss: 0.6973 Acc: 0.5000
val Loss: 0.6938 Acc: 0.4575

Training complete in 4m 24s
Best val Acc: 0.457516
33 Linear(in_features=25088, out_features=4096, bias=True) 99.84489440917969% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 99.84407043457031% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 99.8291015625% Pruned
### 0.46                ### 119360296.00                ### 263.72

Epoch 0/0
----------
train Loss: 0.6966 Acc: 0.5000
val Loss: 0.6928 Acc: 0.5425

Training complete in 4m 13s
Best val Acc: 0.542484
33 Linear(in_features=25088, out_features=4096, bias=True) 99.99681854248047% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 99.99675750732422% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 100.0% Pruned
### 0.54                ### 119542048.00                ### 253.32

Epoch 0/0
----------
train Loss: 0.6882 Acc: 0.5439
val Loss: 0.6942 Acc: 0.4575

Training complete in 4m 55s
Best val Acc: 0.457516
33 Linear(in_features=25088, out_features=4096, bias=True) 95.28770446777344% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 95.29801177978516% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 95.34912109375% Pruned
### 0.46                ### 113914240.00                ### 295.37

Epoch 0/0
----------
train Loss: 0.6933 Acc: 0.5175
val Loss: 0.6936 Acc: 0.4575

Training complete in 4m 25s
Best val Acc: 0.457516
33 Linear(in_features=25088, out_features=4096, bias=True) 99.8459243774414% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 99.84550476074219% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 99.8046875% Pruned
### 0.46                ### 119361592.00                ### 265.02

Epoch 0/0
----------
train Loss: 0.6944 Acc: 0.4737
val Loss: 0.6936 Acc: 0.4575

Training complete in 4m 12s
Best val Acc: 0.457516
33 Linear(in_features=25088, out_features=4096, bias=True) 99.99667358398438% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 99.9970474243164% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 99.98779296875% Pruned
### 0.46                ### 119541936.00                ### 251.76

Epoch 0/0
----------
train Loss: 0.6912 Acc: 0.5614
val Loss: 0.6924 Acc: 0.5425

Training complete in 4m 55s
Best val Acc: 0.542484
33 Linear(in_features=25088, out_features=4096, bias=True) 95.2898178100586% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 95.28968048095703% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 95.1416015625% Pruned
### 0.54                ### 113915000.00                ### 294.65

Epoch 0/0
----------
train Loss: 0.6946 Acc: 0.5088
val Loss: 0.6938 Acc: 0.4575

Training complete in 4m 27s
Best val Acc: 0.457516
33 Linear(in_features=25088, out_features=4096, bias=True) 99.84547424316406% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 99.84436798095703% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 99.67041015625% Pruned
### 0.46                ### 119360928.00                ### 266.99

Epoch 0/0
----------
train Loss: 0.6919 Acc: 0.4605
val Loss: 0.6940 Acc: 0.4575

Training complete in 4m 12s
Best val Acc: 0.457516
33 Linear(in_features=25088, out_features=4096, bias=True) 99.99681854248047% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 99.99693298339844% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 100.0% Pruned
### 0.46                ### 119542080.00                ### 252.31

Epoch 0/0
----------
train Loss: 0.6957 Acc: 0.4781
val Loss: 0.6943 Acc: 0.4575

Training complete in 4m 56s
Best val Acc: 0.457516
33 Linear(in_features=25088, out_features=4096, bias=True) 95.27536010742188% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 95.28797149658203% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 94.970703125% Pruned
### 0.46                ### 113899832.00                ### 295.58

Epoch 0/0
----------
train Loss: 0.6931 Acc: 0.4781
val Loss: 0.6938 Acc: 0.4575

Training complete in 4m 25s
Best val Acc: 0.457516
33 Linear(in_features=25088, out_features=4096, bias=True) 99.84431457519531% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 99.84562683105469% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 99.8779296875% Pruned
### 0.46                ### 119359960.00                ### 264.91

Epoch 0/0
----------
train Loss: 0.6944 Acc: 0.5044
val Loss: 0.6929 Acc: 0.5425

Training complete in 4m 14s
Best val Acc: 0.542484
33 Linear(in_features=25088, out_features=4096, bias=True) 99.99677276611328% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 99.9969253540039% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 99.98779296875% Pruned
### 0.54                ### 119542024.00                ### 253.80

Epoch 0/0
----------
train Loss: 0.6854 Acc: 0.5658
val Loss: 0.6934 Acc: 0.4575

Training complete in 4m 59s
Best val Acc: 0.457516
33 Linear(in_features=25088, out_features=4096, bias=True) 95.28684997558594% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 95.28763580322266% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 95.30029296875% Pruned
### 0.46                ### 113911608.00                ### 298.66

Epoch 0/0
----------
train Loss: 0.6892 Acc: 0.5263
val Loss: 0.6926 Acc: 0.5425

Training complete in 4m 28s
Best val Acc: 0.542484
33 Linear(in_features=25088, out_features=4096, bias=True) 99.84467315673828% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 99.84490203857422% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 99.81689453125% Pruned
### 0.54                ### 119360216.00                ### 268.38

Epoch 0/0
----------
train Loss: 0.6927 Acc: 0.5000
val Loss: 0.6926 Acc: 0.5425

Training complete in 4m 14s
Best val Acc: 0.542484
33 Linear(in_features=25088, out_features=4096, bias=True) 99.99671173095703% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 99.99666595458984% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 100.0% Pruned
### 0.54                ### 119541920.00                ### 253.83

Epoch 0/0
----------
train Loss: 0.6859 Acc: 0.5219
val Loss: 0.6934 Acc: 0.4575

Training complete in 4m 56s
Best val Acc: 0.457516
33 Linear(in_features=25088, out_features=4096, bias=True) 95.29088592529297% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 95.29882049560547% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 95.03173828125% Pruned
### 0.46                ### 113917616.00                ### 296.34

Epoch 0/0
----------
train Loss: 0.6977 Acc: 0.4737
val Loss: 0.6931 Acc: 0.5425

Training complete in 4m 37s
Best val Acc: 0.542484
33 Linear(in_features=25088, out_features=4096, bias=True) 99.84465789794922% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 99.84480285644531% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 99.8046875% Pruned
### 0.54                ### 119360176.00                ### 276.91

Epoch 0/0
----------
train Loss: 0.6919 Acc: 0.4868
val Loss: 0.6935 Acc: 0.4575

Training complete in 4m 14s
Best val Acc: 0.457516
33 Linear(in_features=25088, out_features=4096, bias=True) 99.99674224853516% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 99.99674224853516% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 99.98779296875% Pruned
### 0.46                ### 119541968.00                ### 253.97

Epoch 0/0
----------
train Loss: 0.6909 Acc: 0.5965
val Loss: 0.6924 Acc: 0.5425

Training complete in 4m 54s
Best val Acc: 0.542484
33 Linear(in_features=25088, out_features=4096, bias=True) 95.28970336914062% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 95.28330993652344% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 95.66650390625% Pruned
### 0.54                ### 113913840.00                ### 294.41

Epoch 0/0
----------
train Loss: 0.6921 Acc: 0.4912
val Loss: 0.6928 Acc: 0.5425

Training complete in 4m 29s
Best val Acc: 0.542484
33 Linear(in_features=25088, out_features=4096, bias=True) 99.84536743164062% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 99.84416198730469% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 99.8291015625% Pruned
### 0.54                ### 119360792.00                ### 268.70

Epoch 0/0
----------
train Loss: 0.7026 Acc: 0.4737
val Loss: 0.6926 Acc: 0.5425

Training complete in 4m 15s
Best val Acc: 0.542484
33 Linear(in_features=25088, out_features=4096, bias=True) 99.99681091308594% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 99.99693298339844% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 100.0% Pruned
### 0.54                ### 119542064.00                ### 254.89

Epoch 0/0
----------
train Loss: 0.6875 Acc: 0.5263
val Loss: 0.6925 Acc: 0.5425

Training complete in 4m 55s
Best val Acc: 0.542484
33 Linear(in_features=25088, out_features=4096, bias=True) 95.2888412475586% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 95.2926254272461% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 95.556640625% Pruned
### 0.54                ### 113914520.00                ### 295.08

Epoch 0/0
----------
train Loss: 0.6926 Acc: 0.5044
val Loss: 0.6927 Acc: 0.5425

Training complete in 4m 28s
Best val Acc: 0.542484
33 Linear(in_features=25088, out_features=4096, bias=True) 99.84564208984375% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 99.8451156616211% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 99.8046875% Pruned
### 0.54                ### 119361240.00                ### 268.18

Epoch 0/0
----------
train Loss: 0.6893 Acc: 0.5570
val Loss: 0.6930 Acc: 0.5425

Training complete in 4m 34s
Best val Acc: 0.542484
33 Linear(in_features=25088, out_features=4096, bias=True) 99.9967041015625% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 99.99678802490234% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 99.98779296875% Pruned
### 0.54                ### 119541920.00                ### 274.26"""


In [0]:

# results_untrained = torch.zeros(4,4,3)
# pruned_untrained = torch.zeros(4,4,3)
# time1 = torch.zeros(4,4,3)
# results_trained = torch.zeros(4,4,3)
# pruned_trained = torch.zeros(4,4,3)
# time2 = torch.zeros(4,4,3)

# max_acc_untrained = 0
# params_untrained = None
# untrained_model = None

sl = s.split("\n\n\n")

i = 0 
levels = []
for ie, e in enumerate(within_epsilon):
    # i = ie*12
    for ipE, pE in enumerate(prop_equal):
        for ipF, pF in enumerate(prop_filter):
            sub = sl[i].split("\n")[-1]
            sb = re.findall("(\d+\.\d*)", sub)
            results_untrained[ie, ipE, ipF] = float(sb[0])
            pruned_untrained[ie, ipE, ipF] = float(sb[1])
            time1[ie, ipE, ipF] = float(sb[2])
            levels.append([float(i) for i in re.findall("""(\d*\.\d*)\% Pruned""", sl[i])])
            i += 1
a,b,c = list(zip(*levels))
print(np.corrcoef(a,b), np.corrcoef(b,c), sep='\n')

ix = results_untrained.view(-1).argmax()
print(results_untrained.view(-1)[ix], within_epsilon[ix//12], prop_equal[(ix%12)//3], prop_filter[ix%3])

# print(pruned_untrained.view(-1)[ix]/(4096*25088 + 4096*4096 + 4096*2 + 2))
# print(pruned_untrained.view(-1)[ix]/128780034)
# summary(model, (3,224,224))

In [7]:
import matplotlib.pyplot as plt
import numpy as np
cord_list = (within_epsilon, prop_equal, prop_filter)
cord_tensor = torch.tensor(np.meshgrid(*cord_list))
cordinates = [cord_tensor[i] for i in range(3)]
axis_names = ["Epsilon", "Proportion Equal",  "Proportion Filtered"]
(results_trained, pruned_trained, time2) = (results_untrained, pruned_untrained, time1)
                 
for title, params, i in zip(("Untrained Pruning", 
                          "Trained Pruning"), 
                         ((results_untrained, pruned_untrained, time1), 
                          (results_trained, pruned_trained, time2)),
                          (0,1)
                         ):
    fig1 = plt.figure(figsize=(15,20))
    # fig1, axes = plt.subplots(nrows = 2, ncols=3,figsize=(30,20))
    fig1.tight_layout()
    ttl = fig1.suptitle(title, size = 20)
    ttl.set_position([.5, 0.9])
    for c, j, t in zip(params, (0,1,2), ("Accuracy", "% Pruned", "Time")):
        # ax1 = axes[i,j]#fig1.add_subplot(s, projection='3d')
        ax1 = fig1.add_subplot(3, 1, j+1,  projection='3d')
        a = ax1.scatter(*cordinates, c=c.view(-1), s=24)
        fig1.colorbar(a)
        ax1.set_xlabel("Epsilon")
        ax1.set_ylabel("Proportion Equal")
        ax1.set_zlabel("Proportion Filtered")
        ax1.set_title(t)
    plt.show()


for title, params in zip(("Results for Untrained Pruning", 
                          "Results for Trained Pruning"), 
                         ((results_untrained, pruned_untrained), 
                          (results_trained, pruned_trained))
                         ):
    fig1 = plt.figure(figsize=(10,10))
    fig1.suptitle(title)
    for c, s2, y_name in zip(params, (0, 10), ("Accuracy", "% Pruned")):
        for axis_cord, s in zip(cordinates, (311, 312, 313)): 
            dim = (s - 311)
            s += s2
            ax1 = fig1.add_subplot(s)
            # axis_sz = len(axis_cord)
            # num_rep = c.nelements()//axis_sz
            m_view = [len(i) for i in cordinates]
            m_view[dim] = -1
            # cor = torch.tensor(axis_cord).view(-1,-1,-1)
            # cor = cor.expand(*m_view)
            ax1.scatter(cordinates[dim], c.view(-1))
            ax1.set_xlabel(axis_names[dim])
            ax1.set_ylabel(y_name)
            # ax1.set_xticklabel([f"" for i in cordinates[dim])
        plt.show()

NameError: ignored

In [0]:

within_epsilon = [1, 2, 4, 8]
prop_equal = [0.5, 0.60, 0.7, 0.8]
prop_filter = [0.1, 0.2, 0.3]

results_untrained = torch.zeros(4,4,3)
pruned_untrained = torch.zeros(4,4,3)
time1 = torch.zeros(4,4,3)
max_acc_untrained = 0
params_untrained = None
untrained_model = None

sl = s.split("\n\n\n")
i = 0 
levels = []
for ie, e in enumerate(within_epsilon):
    # i = ie*12
    for ipE, pE in enumerate(prop_equal):
        for ipF, pF in enumerate(prop_filter):
            sub = sl[i].split("\n")[-1]
            sb = re.findall("(\d+\.\d*)", sub)
            results_untrained[ie, ipE, ipF] = float(sb[0])
            pruned_untrained[ie, ipE, ipF] = float(sb[1])
            time1[ie, ipE, ipF] = float(sb[2])
            levels.append([float(i) for i in re.findall("""(\d*\.\d*)\% Pruned""", sl[i])])
            i += 1
a,b,c = list(zip(*levels))
print(np.corrcoef(a,b), np.corrcoef(b,c), sep='\n')

ix = results_untrained.view(-1).argmax()
print(results_untrained.view(-1)[ix], within_epsilon[ix//12], prop_equal[(ix%12)//3], prop_filter[ix%3])

# print(pruned_untrained.view(-1)[ix]/(4096*25088 + 4096*4096 + 4096*2 + 2))
# print(pruned_untrained.view(-1)[ix]/128780034)
# summary(model, (3,224,224))

In [183]:
model = Vgg11bn()
print("Unpruned model Accuracy: ", check_accuracy(model, phase = 'test'))

e, pE, pF = within_epsilon[ix//12], prop_equal[(ix%12)//3], prop_filter[ix%3]
default_params  = {'epsilon': e,
                'propEqual': pE,
                'propFilter': pF, 
                'numResults': batch_size}
fill_buffers(model, num_passes=4)
prune_buffers(model)
print("Pruned model Accuracy: ", check_accuracy(model, phase = 'test'))


total_pruned(model, print_out = True)
criterion, optimizer_ft = model.critOpt()
train_model(model, dataloaders_dict, criterion, optimizer_ft, num_epochs=3, is_pruning=False)

33 Linear(in_features=25088, out_features=4096, bias=True) 79.03739929199219% Pruned
36 Linear(in_features=4096, out_features=4096, bias=True) 79.0262451171875% Pruned
39 Linear(in_features=4096, out_features=2, bias=True) 79.248046875% Pruned
Epoch 0/1
----------
train Loss: 0.6891 Acc: 0.5175
val Loss: 0.6764 Acc: 0.9085

Epoch 1/1
----------
train Loss: 0.6760 Acc: 0.7061
val Loss: 0.6645 Acc: 0.9281

Training complete in 5m 26s
Best val Acc: 0.928105


tensor(0.9281, dtype=torch.float64)