In [2]:
import torch
from torch import nn
import torch.nn.utils.prune as prune
import torch.nn.functional as F
from torchvision import datasets
from torchvision.transforms import ToTensor
from torch.autograd import Variable
from torch import optim
import numpy as np
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
from torchvision import models
from torchsummary import summary
import copy
# from summary import summary

### Network parameters: ###

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(1, 6, kernel_size=5, stride=1, padding=0),
            nn.BatchNorm2d(6),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2, stride = 2))
        self.layer2 = nn.Sequential(
            nn.Conv2d(6, 16, kernel_size=5, stride=1, padding=0),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2, stride = 2))
        self.fc1 = nn.Linear(400,120)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(120,84)
        self.relu1 = nn.ReLU()
        self.fc3 = nn.Linear(84,10)

    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = out.reshape(out.size(0), -1)
        out = self.fc1(out)
        out = self.relu(out)
        out = self.fc2(out)
        out = self.relu1(out)
        out = self.fc3(out)
        return out

model = LeNet().to(device=device)

### Loading Mnist dataset, training method, and testing method ###

In [4]:
# Define relevant variables for the ML task
batch_size = 100
num_classes = 10
learning_rate = 0.001

#Loading the dataset and preprocessing
train_dataset = torchvision.datasets.MNIST(root = '/Users/dimademler/Desktop/me-Programming/pytorch',
                                           train = True,
                                           transform = transforms.Compose([
                                                  transforms.Resize((32,32)),
                                                  transforms.ToTensor(),
                                                  transforms.Normalize(mean = (0.1307,), std = (0.3081,))]),
                                           download = True)


test_dataset = torchvision.datasets.MNIST(root = '/Users/dimademler/Desktop/me-Programming/pytorch',
                                          train = False,
                                          transform = transforms.Compose([
                                                  transforms.Resize((32,32)),
                                                  transforms.ToTensor(),
                                                  transforms.Normalize(mean = (0.1325,), std = (0.3105,))]),
                                          download=True)


train_loader = torch.utils.data.DataLoader(dataset = train_dataset,
                                           batch_size = batch_size,
                                           shuffle = True)


test_loader = torch.utils.data.DataLoader(dataset = test_dataset,
                                           batch_size = batch_size,
                                           shuffle = True)

#Setting the loss function
cost = nn.CrossEntropyLoss()

#this is defined to print how many steps are remaining when training
total_step = len(train_loader)

def train(model,train_loader,test_loader,num_epochs,optimizer):
  total_step = len(train_loader)
  for epoch in range(num_epochs):
      for i, (images, labels) in enumerate(train_loader):  
          images = images.to(device)
          labels = labels.to(device)
          
          #Forward pass
          outputs = model(images)
          loss = cost(outputs, labels)
            
          # Backward and optimize
          optimizer.zero_grad()
          loss.backward()
          optimizer.step()
              
          if (i+1) % 400 == 0:
              print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                        .format(epoch+1, num_epochs, i+1, total_step, loss.item()))
  
def test(model, test_loader):
    # Test the model
    model.eval()
    with torch.no_grad():
        correct = 0
        total = 0
        for i, (images, labels) in enumerate(test_loader):  
            test_output = model(images)
            pred_y = torch.max(test_output, 1)[1].data.squeeze()
            accuracy = (pred_y == labels).sum().item() / float(labels.size(0))

    print('Test Accuracy of the model on the 10000 test images: %.2f' % accuracy)
    return accuracy


## saving initial model weights ##

In [5]:
initial_state = model.state_dict()
torch.save(initial_state, "initial_model_state.pth")

## Training Network ##

In [6]:

num_epochs = 6

model = LeNet()



optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)


train(model, train_loader,test_loader,num_epochs, optimizer = optimizer)
# modeloriginal= copy.deepcopy(model)


Epoch [1/6], Step [400/600], Loss: 0.0829
Epoch [2/6], Step [400/600], Loss: 0.0844
Epoch [3/6], Step [400/600], Loss: 0.0146
Epoch [4/6], Step [400/600], Loss: 0.0182
Epoch [5/6], Step [400/600], Loss: 0.0054
Epoch [6/6], Step [400/600], Loss: 0.0045


## Attempt 2 at making iterative pruning ##

In [None]:
num_timespruned=30
num_epochs=10
k= 15 # number of epochs trained before resetting


# First training
train(model, train_loader,test_loader,num_epochs=k, optimizer = optimizer)

parameters_to_prune2 = (
    (model.fc1, 'weight'),
    (model.fc2, 'weight'),
)

prev_state = model.state_dict()
torch.save(prev_state, "initial_model_state.pth")

prune.global_unstructured(parameters_to_prune2,prune.L1Unstructured, importance_scores=None, amount=0.2)

for i in range(num_timespruned):
    
    
    # model.load_state_dict(initial_state)

    # Saving masks method from Luke
    masks = []
    for module, _ in parameters_to_prune2:
      masks.append(module.get_buffer('weight_mask').data)
    model.load_state_dict(torch.load(prev_state))
    for i, (module, _) in enumerate(parameters_to_prune2):
      prune.custom_from_mask(module,'weight',masks[i])
    
    prune.global_unstructured(parameters_to_prune2,prune.L1Unstructured, importance_scores=None, amount=0.2)

    train(model, train_loader,test_loader,num_epochs = num_epochs, optimizer = optimizer)    










In [None]:
def find_ticket(model, parameters_to_prune, name, train_loader, test_loader, start_iter = 0, end_iter = 30, num_epochs = 4, learning_rate = .001, prune_amount = .2, k = 1):
  optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
  train(model, train_loader,test_loader,num_epochs = k, optimizer = optimizer) #Kth epoch save weights
  torch.save(model.state_dict(), 'K_iterweights.pth')
  train(model, train_loader,test_loader,num_epochs = num_epochs - k, optimizer = optimizer) #Pretrain

  accuracy = []

  for i in range(start_iter, end_iter):  
    #Prune
    prune.global_unstructured(parameters_to_prune,pruning_method=prune.L1Unstructured,amount=prune_amount,)

    #Save Masks, Rewind Weights, then place Masks back
    masks = []
    for module, _ in parameters_to_prune:
      masks.append(module.get_buffer('weight_mask').data)
    model.load_state_dict(torch.load('K_iterweights.pth'))
    for i, (module, _) in enumerate(parameters_to_prune):
      prune.custom_from_mask(module,'weight',masks[i])

    #Train
    train(model, train_loader,test_loader,num_epochs = num_epochs, optimizer = optimizer)
    accuracy.append(test(model, test_loader))

    plt.plot(np.arange(len(accuracy)), accuracy)
    plt.show()
    print('Saving iteration ', str(i+1))
    torch.save(model.state_dict(), name + '_iter' + str(i+1)) 

In [13]:
model = LeNet()
parameters_to_prune3 = (
    (model.fc1, 'weight'),
    (model.fc2, 'weight'),
    (model.fc3, 'weight'),
)
find_ticket(model, parameters_to_prune3, 'MNIST', train_loader, test_loader)

Epoch [1/1], Step [400/600], Loss: 0.0625
Epoch [1/9], Step [400/600], Loss: 0.0608
Epoch [2/9], Step [400/600], Loss: 0.0336
Epoch [3/9], Step [400/600], Loss: 0.0066
Epoch [4/9], Step [400/600], Loss: 0.0478
Epoch [5/9], Step [400/600], Loss: 0.0011
Epoch [6/9], Step [400/600], Loss: 0.0072
Epoch [7/9], Step [400/600], Loss: 0.0017
Epoch [8/9], Step [400/600], Loss: 0.0202
Epoch [9/9], Step [400/600], Loss: 0.0055


RuntimeError: Error(s) in loading state_dict for LeNet:
	Missing key(s) in state_dict: "fc1.weight_orig", "fc1.weight_mask", "fc2.weight_orig", "fc2.weight_mask", "fc3.weight_orig", "fc3.weight_mask". 
	Unexpected key(s) in state_dict: "fc1.weight", "fc2.weight", "fc3.weight". 

In [None]:
# Make a list of 10 friends and print them

## Attempting iterative pruning ##

In [23]:
# modelcopy2= copy.deepcopy(model)




num_timespruned=30

parameters_to_prune2 = (
    (model.fc1, 'weight'),
    (model.fc2, 'weight'),
)
prune.global_unstructured(
    parameters_to_prune2,
    pruning_method=prune.L1Unstructured,
    amount=0.1
)
# run pruning once
# prune.global_unstructured.pruning_method
  # torch.nn.utils.prune.random_unstructured(module, name, num_prunedfloat)

# torch.nn.utils.prune.global_unstructured(parameters_to_prune2, )

print("Testing after global pruning: ")
test(model,test_loader)




for i in range(num_timespruned):
    
    prune.global_unstructured(parameters_to_prune2,prune.L1Unstructured, importance_scores=None, amount=0.2)
    model.load_state_dict(initial_state)
    
    # Need to save masks somehow and load them in
    train(model, train_loader,test_loader,num_epochs, optimizer = optimizer)
    print("pruning number: ", i)
    test(model,test_loader)
    



    

    # maskweights_fc1=model.fc1.named_buffers()

#   modelretrain=  LeNet().to(device=device)
#   modelretrain.load_state_dict(torch.load("modelcopy2_state.pth"))


# fc1mask=modelcopy2.fc1
# fc2mask=modelcopy2.fc2





Testing after global pruning: 
Test Accuracy of the model on the 10000 test images: 0.20


0.2

In [20]:
modelcopy= copy.deepcopy(model)

# max_growth = 30
num_prunedfloat=0.2


parameters_to_prune = (
    (modelcopy.fc1, 'weight'),
    (modelcopy.fc2, 'weight'),
)

for module,name in parameters_to_prune:
  torch.nn.utils.prune.random_unstructured(module, name, num_prunedfloat)


print("Testing after pruning: ")
test(modelcopy,test_loader)

module2 = modelcopy.fc2
# print(list(module2.named_buffers()))
# print(list(module2.weight))
# vgg = models.vgg16()
# summary(vgg, (3, 224, 224))




Testing after pruning: 
Test Accuracy of the model on the 10000 test images: 1.00
[('weight_mask', tensor([[1., 1., 1.,  ..., 1., 0., 0.],
        [1., 1., 1.,  ..., 1., 0., 1.],
        [1., 0., 1.,  ..., 1., 1., 1.],
        ...,
        [1., 0., 1.,  ..., 1., 1., 1.],
        [1., 1., 0.,  ..., 1., 0., 1.],
        [0., 1., 1.,  ..., 1., 1., 0.]]))]


In [8]:
max_growth = 20
num_epochs = 10
gamma = 10 #num new nodes on first growth

model = LeNet(max_growth = max_growth)
baseline = LeNet(max_growth = 0)

parameters_to_prune = (
#    (model.conv1, 'weight'),
#    (model.conv2, 'weight'),
    (model.fc1, 'weight'),
    (model.fc2, 'weight'),
#    (model.fc3, 'weight'),
)
for module,name in parameters_to_prune:
  torch.nn.utils.prune.random_structured(module, name, max_growth,  0)

optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
baseline_optim = torch.optim.Adam(baseline.parameters(), lr=learning_rate)

model_acc = []
baseline_acc = []
for i in range(5):
  #grow 1/i*growth_factor params total:
  
  #for module,name in parameters_to_prune:
    #grow
  
    

  


  train(model, train_loader,test_loader,num_epochs = 5, optimizer = optimizer)
  # train(baseline, train_loader,test_loader,num_epochs = 5, optimizer = baseline_optim)
  # model_acc.append(test(model, test_loader))
  baseline_acc.append(test(baseline, test_loader))

  plt.plot(np.arange(len(model_acc)), model_acc, label = 'Adaptive Model')
  plt.plot(np.arange(len(baseline_acc)), baseline_acc, label = 'Baseline')
  plt.show()

  prune.global_unstructured(
    parameters_to_prune,
    pruning_method=prune.L1Unstructured,
    amount=0.10+(-1*i*.01),
  )

Epoch [1/5], Step [400/600], Loss: 0.0537
Epoch [2/5], Step [400/600], Loss: 0.0773
Epoch [3/5], Step [400/600], Loss: 0.0305
Epoch [4/5], Step [400/600], Loss: 0.0182


KeyboardInterrupt: 