In [1]:
import torch
from torch import nn
import torch.nn.utils.prune as prune
import torch.nn.functional as F
from torchvision import datasets
from torchvision.transforms import ToTensor
from torch.autograd import Variable
from torch import optim
import numpy as np
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
from torchvision import models
from torchsummary import summary
import copy
# from summary import summary

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

class LeNet(nn.Module):
    def __init__(self, max_growth):
        super(LeNet, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(1, 6, kernel_size=5, stride=1, padding=0),
            nn.BatchNorm2d(6),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2, stride = 2))
        self.layer2 = nn.Sequential(
            nn.Conv2d(6, 16, kernel_size=5, stride=1, padding=0),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2, stride = 2))
        self.fc1 = nn.Linear(400,120+max_growth)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(120+max_growth,84+max_growth)
        self.relu1 = nn.ReLU()
        self.fc3 = nn.Linear(84+max_growth,10)

    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = out.reshape(out.size(0), -1)
        out = self.fc1(out)
        out = self.relu(out)
        out = self.fc2(out)
        out = self.relu1(out)
        out = self.fc3(out)
        return out

model = LeNet(20).to(device=device)

In [3]:
# Define relevant variables for the ML task
batch_size = 100
num_classes = 10
learning_rate = 0.001

#Loading the dataset and preprocessing
train_dataset = torchvision.datasets.MNIST(root = '/Users/dimademler/Desktop/me-Programming/pytorch',
                                           train = True,
                                           transform = transforms.Compose([
                                                  transforms.Resize((32,32)),
                                                  transforms.ToTensor(),
                                                  transforms.Normalize(mean = (0.1307,), std = (0.3081,))]),
                                           download = True)


test_dataset = torchvision.datasets.MNIST(root = '/Users/dimademler/Desktop/me-Programming/pytorch',
                                          train = False,
                                          transform = transforms.Compose([
                                                  transforms.Resize((32,32)),
                                                  transforms.ToTensor(),
                                                  transforms.Normalize(mean = (0.1325,), std = (0.3105,))]),
                                          download=True)


train_loader = torch.utils.data.DataLoader(dataset = train_dataset,
                                           batch_size = batch_size,
                                           shuffle = True)


test_loader = torch.utils.data.DataLoader(dataset = test_dataset,
                                           batch_size = batch_size,
                                           shuffle = True)

#Setting the loss function
cost = nn.CrossEntropyLoss()

#this is defined to print how many steps are remaining when training
total_step = len(train_loader)

def train(model,train_loader,test_loader,num_epochs,optimizer):
  total_step = len(train_loader)
  for epoch in range(num_epochs):
      for i, (images, labels) in enumerate(train_loader):  
          images = images.to(device)
          labels = labels.to(device)
          
          #Forward pass
          outputs = model(images)
          loss = cost(outputs, labels)
            
          # Backward and optimize
          optimizer.zero_grad()
          loss.backward()
          optimizer.step()
              
          if (i+1) % 400 == 0:
              print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                        .format(epoch+1, num_epochs, i+1, total_step, loss.item()))
  
def test(model, test_loader):
    # Test the model
    model.eval()
    with torch.no_grad():
        correct = 0
        total = 0
        for i, (images, labels) in enumerate(test_loader):  
            test_output = model(images)
            pred_y = torch.max(test_output, 1)[1].data.squeeze()
            accuracy = (pred_y == labels).sum().item() / float(labels.size(0))

    print('Test Accuracy of the model on the 10000 test images: %.2f' % accuracy)
    return accuracy


In [4]:
max_growth = 20
num_epochs = 10

# train(model, train_loader,test_loader,num_epochs = 5, optimizer = optimizer)
# test(model,test_loader)


In [5]:
max_growth = 30
num_epochs = 6

model = LeNet(max_growth = max_growth)
# baseline = LeNet(max_growth = 0)

parameters_to_prune = (
    (model.fc1, 'weight'),
    (model.fc2, 'weight'),
)



optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
# baseline_optim = torch.optim.Adam(baseline.parameters(), lr=learning_rate)

model_acc = []
baseline_acc = []

train(model, train_loader,test_loader,num_epochs, optimizer = optimizer)


Epoch [1/6], Step [400/600], Loss: 0.0163
Epoch [2/6], Step [400/600], Loss: 0.0580
Epoch [3/6], Step [400/600], Loss: 0.0425
Epoch [4/6], Step [400/600], Loss: 0.0245
Epoch [5/6], Step [400/600], Loss: 0.0380
Epoch [6/6], Step [400/600], Loss: 0.0034


In [6]:
print("Testing before pruning: ")

# print(LeNet)
test(model,test_loader)
vgg = models.vgg16()
# summary(vgg, (3, 224, 224))




Testing before pruning: 
Test Accuracy of the model on the 10000 test images: 1.00


In [10]:
modelcopy= copy.deepcopy(model)

# max_growth = 30
num_prunedfloat=0.7


parameters_to_prune = (
    (modelcopy.fc1, 'weight'),
    (modelcopy.fc2, 'weight'),
)

for module,name in parameters_to_prune:
  torch.nn.utils.prune.random_structured(module, name, num_prunedfloat,  0)

print("Testing after pruning: ")
test(modelcopy,test_loader)
# vgg = models.vgg16()
# summary(vgg, (3, 224, 224))

# print(model.state_dict())
for param_tensor in modelcopy.state_dict():
    print(param_tensor, "\t", modelcopy.state_dict()[param_tensor].size())



Testing after pruning: 
Test Accuracy of the model on the 10000 test images: 0.62
layer1.0.weight 	 torch.Size([6, 1, 5, 5])
layer1.0.bias 	 torch.Size([6])
layer1.1.weight 	 torch.Size([6])
layer1.1.bias 	 torch.Size([6])
layer1.1.running_mean 	 torch.Size([6])
layer1.1.running_var 	 torch.Size([6])
layer1.1.num_batches_tracked 	 torch.Size([])
layer2.0.weight 	 torch.Size([16, 6, 5, 5])
layer2.0.bias 	 torch.Size([16])
layer2.1.weight 	 torch.Size([16])
layer2.1.bias 	 torch.Size([16])
layer2.1.running_mean 	 torch.Size([16])
layer2.1.running_var 	 torch.Size([16])
layer2.1.num_batches_tracked 	 torch.Size([])
fc1.bias 	 torch.Size([150])
fc1.weight_orig 	 torch.Size([150, 400])
fc1.weight_mask 	 torch.Size([150, 400])
fc2.bias 	 torch.Size([114])
fc2.weight_orig 	 torch.Size([114, 150])
fc2.weight_mask 	 torch.Size([114, 150])
fc3.weight 	 torch.Size([10, 114])
fc3.bias 	 torch.Size([10])


In [None]:
modelco

In [8]:
max_growth = 20
num_epochs = 10
gamma = 10 #num new nodes on first growth

model = LeNet(max_growth = max_growth)
baseline = LeNet(max_growth = 0)

parameters_to_prune = (
#    (model.conv1, 'weight'),
#    (model.conv2, 'weight'),
    (model.fc1, 'weight'),
    (model.fc2, 'weight'),
#    (model.fc3, 'weight'),
)
for module,name in parameters_to_prune:
  torch.nn.utils.prune.random_structured(module, name, max_growth,  0)

optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
baseline_optim = torch.optim.Adam(baseline.parameters(), lr=learning_rate)

model_acc = []
baseline_acc = []
for i in range(5):
  #grow 1/i*growth_factor params total:
  
  #for module,name in parameters_to_prune:
    #grow
  
    

  


  train(model, train_loader,test_loader,num_epochs = 5, optimizer = optimizer)
  # train(baseline, train_loader,test_loader,num_epochs = 5, optimizer = baseline_optim)
  # model_acc.append(test(model, test_loader))
  baseline_acc.append(test(baseline, test_loader))

  plt.plot(np.arange(len(model_acc)), model_acc, label = 'Adaptive Model')
  plt.plot(np.arange(len(baseline_acc)), baseline_acc, label = 'Baseline')
  plt.show()

  prune.global_unstructured(
    parameters_to_prune,
    pruning_method=prune.L1Unstructured,
    amount=0.10+(-1*i*.01),
  )

Epoch [1/5], Step [400/600], Loss: 0.0537
Epoch [2/5], Step [400/600], Loss: 0.0773
Epoch [3/5], Step [400/600], Loss: 0.0305
Epoch [4/5], Step [400/600], Loss: 0.0182


KeyboardInterrupt: 