In [None]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.optim as optim
import torch.nn as nn
from torchvision import models

transform =  transforms.Compose([
      transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=4, shuffle=True, num_workers=2)
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=4, shuffle=False, num_workers=2)
vgg = models.vgg16(pretrained=False)
print(list(vgg.classifier.children()))
num_features = vgg.classifier[6].in_features
features = list(vgg.classifier.children())[:-1]
features.extend([nn.Linear(num_features, 10)])
vgg.classifier = nn.Sequential(*features)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(vgg.parameters(), lr=0.001, momentum=0.9)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
vgg.to(device)
for i in range(50):
  print(i)
  for j,data in enumerate(trainloader,0):
    input = data[0].to(device)
    label = data[1].to(device)
    optimizer.zero_grad()
    output = vgg(input)
    loss = criterion(output,label)
    loss.backward()
    optimizer.step()

correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        input = data[0].to(device)
        label = data[1].to(device)
        outputs = vgg(input)
        _, predicted = torch.max(outputs.data, 1)
        total += label.size(0)
        correct += (predicted == label).sum().item()

print(f'Accuracy : {100 * correct / total}%')


Files already downloaded and verified
Files already downloaded and verified




[Linear(in_features=25088, out_features=4096, bias=True), ReLU(inplace=True), Dropout(p=0.5, inplace=False), Linear(in_features=4096, out_features=4096, bias=True), ReLU(inplace=True), Dropout(p=0.5, inplace=False), Linear(in_features=4096, out_features=1000, bias=True)]
0


KeyboardInterrupt: 

In [None]:
layers = dict([*vgg.named_modules()])
print(layers)

In [None]:
from copy import deepcopy
import numpy as np
def get_size(parameter):
    return parameter.element_size() * parameter.nelement()
def prune_layer(model, layer_name, p):
    layer = dict([*model.named_modules()])[layer_name]
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    layer = layer.to(device)
    w = layer.weight.data.flatten()
    n = w.numel()
    n1 = int(n * p)
    sorted, _ = torch.sort(w.abs(), descending=True)
    threshold = sorted[n1].item()
    pw = w.clone()
    pw[torch.abs(pw) < threshold] = 0
    layer.weight.data = pw.view(layer.weight.shape)
def sensitivity_scan(model, layers, sparsity_range):
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    vgg.to(device)
    acc = {}
    size = {}
    i = 0
    for layer_name, layer in layers.items():
        print(i)
        i += 1
        if isinstance(layer, nn.Conv2d):
            acc[layer_name] = []
            size[layer_name] = []
            for sparsity in sparsity_range:
                pmodel = deepcopy(model).to(device)
                prune_layer(pmodel, layer_name, sparsity)
                accuracy = evaluate_model(pmodel, testloader)
                acc[layer_name].append(accuracy)
                size_bytes = get_size(layer.weight)
                size_mbs = size_bytes / (1024 * 1024)
                size[layer_name].append(size_mbs)
    return acc, size
def evaluate_model(model, testloader):
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for data in testloader:
            images = data[0].to(device)
            labels = data[1].to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total
    return accuracy

layers = dict([*vgg.named_modules()])
perc = np.arange(0.1,0.9,0.1)
accuracies, sizes = sensitivity_scan(vgg, layers, perc)
for layer_name in layers.keys():
    if isinstance(layers[layer_name], nn.Conv2d):
        print(f"Layer: {layer_name}")
        for i in range(len(perc)):
          p = perc[i]
          acc = accuracies[layer_name][i]
          size = sizes[layer_name][i]
          print(f"Sparsity: {s:.1f}, Accuracy: {acc:.2f}%, Size: {size:.2f} MBs")
        print()



In [None]:
target_sparsity = {
    'features.0': 0.8,
    'features.2': 0.8,
    'features.5': 0.8,
    'features.7': 0.5,
    'features.10': 0.6,
    'features.12': 0.6,
    'features.14': 0.8,
    'features.17': 0.6,
    'features.19': 0.5,
    'features.21': 0.6,
    'features.24': 0.2,
    'features.26': 0.4,
    'features.28': 0.2,
}
pruned_model = deepcopy(vgg)
pruned_model = pruned_model.to('cuda')
for layer_name, perc in target_sparsity.items():
    prune_layer(pruned_model, layer_name, perc)
acc = evaluate_model(pruned_model, testloader)
print(f"Accuracy before fine-tuning: {acc:.2f}%")
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(pruned_model.parameters(), lr=0.001, momentum=0.9)
for epoch in range(10):
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs = data[0].to('cuda')
        labels =  data[1].to('cuda')
        optimizer.zero_grad()
        outputs = pruned_model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print(f'Epoch {epoch + 1}, Loss: {running_loss / len(trainloader)}')
acc = evaluate_model(pruned_model, testloader)
print(f"Accuracy after fine-tuning: {acc:.2f}%")


In [None]:
import matplotlib.pyplot as plt
def plot_weight_distribution(model, layer_names):
    for layer_name in layer_names:
        layer = dict(model.named_modules())[layer_name]
        weights = layer.weight.data.flatten().cpu().numpy()
        plt.figure(figsize=(8, 6))
        plt.hist(weights, bins=50, alpha=0.7, color='b')
        plt.title(f'Layer: {layer_name}')
        plt.xlabel('Value')
        plt.ylabel('Frequency')
        plt.legend()
        plt.grid(True)
        plt.show()
layers = dict([*vgg.named_modules()])
list = []
for layer_name, layer in layers.items():
        if isinstance(layer, nn.Conv2d):
          list.append(layer_name)
plot_weight_distribution(vgg,list)
plot_weight_distribution(pruned_model,list)