In [1]:
import os
import random

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torchvision import datasets, transforms

import time
import copy

import numpy as np

import sklearn.metrics


In [2]:
def set_random_seeds(random_seed=0):

    torch.manual_seed(random_seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(random_seed)
    random.seed(random_seed)

In [3]:
!nvidia-smi

Sat Mar 30 12:24:41 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.129.03             Driver Version: 535.129.03   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  Tesla P100-PCIE-16GB           Off | 00000000:00:04.0 Off |                    0 |
| N/A   32C    P0              27W / 250W |      0MiB / 16384MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [4]:
#torch.cuda.empty_cache()

In [5]:
#!nvidia-smi

In [6]:
#import os
#import time
import math
#import random
#import numpy as np
import pandas as pd
from pathlib import Path
import glob

import matplotlib.pyplot as plt
from PIL import Image, ImageEnhance, ImageOps

from tqdm import tqdm, tqdm_notebook

import torch
from torch import nn, cuda
from torch.autograd import Variable 
import torch.nn.functional as F
import torchvision as vision
import torchvision.models as models
from torch.utils.data import Dataset, DataLoader
from torch.optim import Adam, SGD, Optimizer
from torch.optim.lr_scheduler import _LRScheduler, CosineAnnealingLR, ReduceLROnPlateau

from sklearn.metrics import f1_score

class CIFAR10Policy(object):
    """ Randomly choose one of the best 25 Sub-policies on CIFAR10.
        Example:
        >>> policy = CIFAR10Policy()
        >>> transformed = policy(image)
        Example as a PyTorch Transform:
        >>> transform=transforms.Compose([
        >>>     transforms.Resize(256),
        >>>     CIFAR10Policy(),
        >>>     transforms.ToTensor()])
    """
    def __init__(self, fillcolor=(128, 128, 128)):
        self.policies = [
            SubPolicy(0.1, "invert", 7, 0.2, "contrast", 6, fillcolor),
            SubPolicy(0.7, "rotate", 2, 0.3, "translateX", 9, fillcolor),
            SubPolicy(0.8, "sharpness", 1, 0.9, "sharpness", 3, fillcolor),
            SubPolicy(0.5, "shearY", 8, 0.7, "translateY", 9, fillcolor),
            SubPolicy(0.5, "autocontrast", 8, 0.9, "equalize", 2, fillcolor),

            SubPolicy(0.2, "shearY", 7, 0.3, "posterize", 7, fillcolor),
            SubPolicy(0.4, "color", 3, 0.6, "brightness", 7, fillcolor),
            SubPolicy(0.3, "sharpness", 9, 0.7, "brightness", 9, fillcolor),
            SubPolicy(0.6, "equalize", 5, 0.5, "equalize", 1, fillcolor),
            SubPolicy(0.6, "contrast", 7, 0.6, "sharpness", 5, fillcolor),

            SubPolicy(0.7, "color", 7, 0.5, "translateX", 8, fillcolor),
            SubPolicy(0.3, "equalize", 7, 0.4, "autocontrast", 8, fillcolor),
            SubPolicy(0.4, "translateY", 3, 0.2, "sharpness", 6, fillcolor),
            SubPolicy(0.9, "brightness", 6, 0.2, "color", 8, fillcolor),
            SubPolicy(0.5, "solarize", 2, 0.0, "invert", 3, fillcolor),

            SubPolicy(0.2, "equalize", 0, 0.6, "autocontrast", 0, fillcolor),
            SubPolicy(0.2, "equalize", 8, 0.8, "equalize", 4, fillcolor),
            SubPolicy(0.9, "color", 9, 0.6, "equalize", 6, fillcolor),
            SubPolicy(0.8, "autocontrast", 4, 0.2, "solarize", 8, fillcolor),
            SubPolicy(0.1, "brightness", 3, 0.7, "color", 0, fillcolor),

            SubPolicy(0.4, "solarize", 5, 0.9, "autocontrast", 3, fillcolor),
            SubPolicy(0.9, "translateY", 9, 0.7, "translateY", 9, fillcolor),
            SubPolicy(0.9, "autocontrast", 2, 0.8, "solarize", 3, fillcolor),
            SubPolicy(0.8, "equalize", 8, 0.1, "invert", 3, fillcolor),
            SubPolicy(0.7, "translateY", 9, 0.9, "autocontrast", 1, fillcolor)
        ]


    def __call__(self, img):
        policy_idx = random.randint(0, len(self.policies) - 1)
        return self.policies[policy_idx](img)

    def __repr__(self):
        return "AutoAugment CIFAR10 Policy"


class SubPolicy(object):
    def __init__(self, p1, operation1, magnitude_idx1, p2, operation2, magnitude_idx2, fillcolor=(128, 128, 128)):
        ranges = {
            "shearX": np.linspace(0, 0.3, 10),
            "shearY": np.linspace(0, 0.3, 10),
            "translateX": np.linspace(0, 150 / 331, 10),
            "translateY": np.linspace(0, 150 / 331, 10),
            "rotate": np.linspace(0, 30, 10),
            "color": np.linspace(0.0, 0.9, 10),
            "posterize": np.round(np.linspace(8, 4, 10), 0).astype(int),
            "solarize": np.linspace(256, 0, 10),
            "contrast": np.linspace(0.0, 0.9, 10),
            "sharpness": np.linspace(0.0, 0.9, 10),
            "brightness": np.linspace(0.0, 0.9, 10),
            "autocontrast": [0] * 10,
            "equalize": [0] * 10,
            "invert": [0] * 10
        }

        # from https://stackoverflow.com/questions/5252170/specify-image-filling-color-when-rotating-in-python-with-pil-and-setting-expand
        def rotate_with_fill(img, magnitude):
            rot = img.convert("RGBA").rotate(magnitude)
            return Image.composite(rot, Image.new("RGBA", rot.size, (128,) * 4), rot).convert(img.mode)

        func = {
            "shearX": lambda img, magnitude: img.transform(
                img.size, Image.AFFINE, (1, magnitude * random.choice([-1, 1]), 0, 0, 1, 0),
                Image.BICUBIC, fillcolor=fillcolor),
            "shearY": lambda img, magnitude: img.transform(
                img.size, Image.AFFINE, (1, 0, 0, magnitude * random.choice([-1, 1]), 1, 0),
                Image.BICUBIC, fillcolor=fillcolor),
            "translateX": lambda img, magnitude: img.transform(
                img.size, Image.AFFINE, (1, 0, magnitude * img.size[0] * random.choice([-1, 1]), 0, 1, 0),
                fillcolor=fillcolor),
            "translateY": lambda img, magnitude: img.transform(
                img.size, Image.AFFINE, (1, 0, 0, 0, 1, magnitude * img.size[1] * random.choice([-1, 1])),
                fillcolor=fillcolor),
            "rotate": lambda img, magnitude: rotate_with_fill(img, magnitude),
            # "rotate": lambda img, magnitude: img.rotate(magnitude * random.choice([-1, 1])),
            "color": lambda img, magnitude: ImageEnhance.Color(img).enhance(1 + magnitude * random.choice([-1, 1])),
            "posterize": lambda img, magnitude: ImageOps.posterize(img, magnitude),
            "solarize": lambda img, magnitude: ImageOps.solarize(img, magnitude),
            "contrast": lambda img, magnitude: ImageEnhance.Contrast(img).enhance(
                1 + magnitude * random.choice([-1, 1])),
            "sharpness": lambda img, magnitude: ImageEnhance.Sharpness(img).enhance(
                1 + magnitude * random.choice([-1, 1])),
            "brightness": lambda img, magnitude: ImageEnhance.Brightness(img).enhance(
                1 + magnitude * random.choice([-1, 1])),
            "autocontrast": lambda img, magnitude: ImageOps.autocontrast(img),
            "equalize": lambda img, magnitude: ImageOps.equalize(img),
            "invert": lambda img, magnitude: ImageOps.invert(img)
        }

        # self.name = "{}_{:.2f}_and_{}_{:.2f}".format(
        #     operation1, ranges[operation1][magnitude_idx1],
        #     operation2, ranges[operation2][magnitude_idx2])
        self.p1 = p1
        self.operation1 = func[operation1]
        self.magnitude1 = ranges[operation1][magnitude_idx1]
        self.p2 = p2
        self.operation2 = func[operation2]
        self.magnitude2 = ranges[operation2][magnitude_idx2]


    def __call__(self, img):
        if random.random() < self.p1: img = self.operation1(img, self.magnitude1)
        if random.random() < self.p2: img = self.operation2(img, self.magnitude2)
        return img
  

class TestDataset(Dataset):
    def __init__(self, df, mode='test', transforms=None):
        self.df = df
        self.mode = mode
        self.transform = transforms[self.mode]
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        
        image = Image.open(TEST_IMAGE_PATH / self.df[idx]).convert("RGB")
        
        if self.transform:
            image = self.transform(image)
            
        return image

In [7]:
"""
train_dataset = torchvision.datasets.CIFAR10(root="data",
                                             train=True,
                                             download=True,
                                             transform=torchvision.transforms.Compose([
        # Resize step is required as we will use a ResNet model, which accepts at leats 224x224 images
        torchvision.transforms.Resize((224,224)),  
        torchvision.transforms.ToTensor(),
    ]))

train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=512, shuffle=False, num_workers=2, pin_memory=False)

means = []
stdevs = []
for X, _ in train_dataloader:
    # Dimensions 0,2,3 are respectively the batch, height and width dimensions
    means.append(X.mean(dim=(0,2,3)))
    stdevs.append(X.std(dim=(0,2,3)))

mean = torch.stack(means, dim=0).mean(dim=0)
stdev = torch.stack(stdevs, dim=0).mean(dim=0)
print(mean, stdev)
"""

'\ntrain_dataset = torchvision.datasets.CIFAR10(root="data",\n                                             train=True,\n                                             download=True,\n                                             transform=torchvision.transforms.Compose([\n        # Resize step is required as we will use a ResNet model, which accepts at leats 224x224 images\n        torchvision.transforms.Resize((224,224)),  \n        torchvision.transforms.ToTensor(),\n    ]))\n\ntrain_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=512, shuffle=False, num_workers=2, pin_memory=False)\n\nmeans = []\nstdevs = []\nfor X, _ in train_dataloader:\n    # Dimensions 0,2,3 are respectively the batch, height and width dimensions\n    means.append(X.mean(dim=(0,2,3)))\n    stdevs.append(X.std(dim=(0,2,3)))\n\nmean = torch.stack(means, dim=0).mean(dim=0)\nstdev = torch.stack(stdevs, dim=0).mean(dim=0)\nprint(mean, stdev)\n'

In [8]:
#!nvidia-smi

In [9]:
def prepare_dataloader(num_workers=0,
                       train_batch_size=128,
                       eval_batch_size=256,
                       mean=(0.4914, 0.4822, 0.4466),
                       stdev=(0.2412, 0.2377, 0.2563)):

    train_transform = transforms.Compose([
        torchvision.transforms.Resize((224,224)),
        CIFAR10Policy(),
        torchvision.transforms.ToTensor(),
        torchvision.transforms.Normalize(mean=mean, std=stdev)
    ])

    test_transform = transforms.Compose([
        torchvision.transforms.Resize((224,224)),
        torchvision.transforms.ToTensor(),
        torchvision.transforms.Normalize(mean=torch.tensor(mean), std=stdev)
    ])

    train_set = torchvision.datasets.CIFAR10(root="data",
                                             train=True,
                                             download=True,
                                             transform=train_transform)

    test_set = torchvision.datasets.CIFAR10(root="data",
                                            train=False,
                                            download=True,
                                            transform=test_transform)

    train_sampler = torch.utils.data.RandomSampler(train_set)
    test_sampler = torch.utils.data.SequentialSampler(test_set)

    train_loader = torch.utils.data.DataLoader(dataset=train_set,
                                               batch_size=train_batch_size,
                                               #shuffle=True,
                                               sampler=train_sampler,
                                               num_workers=num_workers,
                                               pin_memory=True
                                              )

    test_loader = torch.utils.data.DataLoader(dataset=test_set,
                                              batch_size=eval_batch_size,
                                              #shuffle=False,
                                              sampler=test_sampler,
                                              num_workers=num_workers,
                                              pin_memory=True
                                             )

    classes = train_set.classes

    return train_loader, test_loader, classes

In [10]:
def train_model(model,
                train_loader,
                test_loader,
                device,
                model_dir,
                model_filename,
                l1_regularization_strength=0,
                l2_regularization_strength=0,
                weight_decay=5e-4,
                learning_rate=1e-4,
                num_epochs=200
                ):

    

    criterion = nn.CrossEntropyLoss()

    model.to(device)

    
    #optimizer = optim.SGD(model.parameters(),
    #                      lr=learning_rate,
    #                      momentum=0.9,
    #                      weight_decay=l2_regularization_strength)
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
    # scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=500)
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,
                                                     milestones=[num_epochs*3/7, num_epochs*45/7, num_epochs*6/7],
                                                     gamma=0.1,
                                                     last_epoch=-1)
    # optimizer = optim.Adam(model.parameters(), lr=learning_rate, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=False)

    # Evaluation
    model.eval()
    eval_loss, eval_accuracy = evaluate_model(model=model,
                                              test_loader=test_loader,
                                              device=device,
                                              criterion=criterion)
    print("Epoch: {:03d} Eval Loss: {:.3f} Eval Acc: {:.3f}".format(
        0, eval_loss, eval_accuracy))

    for epoch in range(num_epochs):

        # Training
        model.train()

        running_loss = 0
        running_corrects = 0

        for inputs, labels in train_loader:

            inputs = inputs.to(device)
            labels = labels.to(device)

            
            optimizer.zero_grad()

            
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            loss = criterion(outputs, labels)

            l1_reg = torch.tensor(0.).to(device)
            for module in model.modules():
                mask = None
                weight = None
                for name, buffer in module.named_buffers():
                    if name == "weight_mask":
                        mask = buffer
                for name, param in module.named_parameters():
                    if name == "weight_orig":
                        weight = param
                
                if mask is not None and weight is not None:
                    l1_reg += torch.norm(mask * weight, 1)

            loss += l1_regularization_strength * l1_reg 

            loss.backward()
            optimizer.step()

            # statistics
            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)

        train_loss = running_loss / len(train_loader.dataset)
        train_accuracy = running_corrects / len(train_loader.dataset)

        # Evaluation
        model.eval()
        eval_loss, eval_accuracy = evaluate_model(model=model,
                                                  test_loader=test_loader,
                                                  device=device,
                                                  criterion=criterion)
        if epoch % 40 == 0:
            save_model(model=model, model_dir=model_dir, model_filename="{}_epoch{}".format(model_filename, epoch))

        
        scheduler.step()

        print(
            "Epoch: {:03d} Train Loss: {:.3f} Train Acc: {:.3f} Eval Loss: {:.3f} Eval Acc: {:.3f}"
            .format(epoch + 1, train_loss, train_accuracy, eval_loss,
                    eval_accuracy))
        #torch.cuda.empty_cache()

    return model

In [11]:
def measure_module_sparsity(module, weight=True, bias=False, use_mask=False):

    num_zeros = 0
    num_elements = 0

    if use_mask == True:
        for buffer_name, buffer in module.named_buffers():
            if "weight_mask" in buffer_name and weight == True:
                num_zeros += torch.sum(buffer == 0).item()
                num_elements += buffer.nelement()
            if "bias_mask" in buffer_name and bias == True:
                num_zeros += torch.sum(buffer == 0).item()
                num_elements += buffer.nelement()
    else:
        for param_name, param in module.named_parameters():
            if "weight" in param_name and weight == True:
                num_zeros += torch.sum(param == 0).item()
                num_elements += param.nelement()
            if "bias" in param_name and bias == True:
                num_zeros += torch.sum(param == 0).item()
                num_elements += param.nelement()

    sparsity = num_zeros / num_elements

    return num_zeros, num_elements, sparsity

In [12]:
def measure_global_sparsity(model,
                            weight=True,
                            bias=False,
                            conv2d_use_mask=False,
                            linear_use_mask=False):

    num_zeros = 0
    num_elements = 0

    for module_name, module in model.named_modules():

        if isinstance(module, torch.nn.Conv2d):

            module_num_zeros, module_num_elements, _ = measure_module_sparsity(
                module, weight=weight, bias=bias, use_mask=conv2d_use_mask)
            num_zeros += module_num_zeros
            num_elements += module_num_elements

        elif isinstance(module, torch.nn.Linear):

            module_num_zeros, module_num_elements, _ = measure_module_sparsity(
                module, weight=weight, bias=bias, use_mask=linear_use_mask)
            num_zeros += module_num_zeros
            num_elements += module_num_elements

    sparsity = num_zeros / num_elements

    return num_zeros, num_elements, sparsity

In [13]:
def evaluate_model(model, test_loader, device, criterion=None):

    model.eval()
    model.to(device)

    running_loss = 0
    running_corrects = 0

    for inputs, labels in test_loader:

        inputs = inputs.to(device)
        labels = labels.to(device)

        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)

        if criterion is not None:
            loss = criterion(outputs, labels).item()
        else:
            loss = 0

        # statistics
        running_loss += loss * inputs.size(0)
        running_corrects += torch.sum(preds == labels.data)
        
        #torch.cuda.empty_cache()

    eval_loss = running_loss / len(test_loader.dataset)
    eval_accuracy = running_corrects / len(test_loader.dataset)
    

    return eval_loss, eval_accuracy

In [14]:
def create_classification_report(model, device, test_loader):

    model.eval()
    model.to(device)

    y_pred = []
    y_true = []

    with torch.no_grad():
        for data in test_loader:
            y_true += data[1].numpy().tolist()
            images, _ = data[0].to(device), data[1].to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            y_pred += predicted.cpu().numpy().tolist()

    classification_report = sklearn.metrics.classification_report(
        y_true=y_true, y_pred=y_pred)

    return classification_report

In [15]:
def save_model(model, model_dir, model_filename):

    if not os.path.exists(model_dir):
        os.makedirs(model_dir)
    model_filepath = os.path.join(model_dir, model_filename)
    torch.save(model.state_dict(), model_filepath)


In [16]:
def load_model(model, model_filepath, device):

    model.load_state_dict(torch.load(model_filepath, map_location=device))

    return model

In [17]:
def create_model(num_classes=10, model_func=torchvision.models.resnet34):

    
    model = model_func(num_classes=num_classes, pretrained=False)

    

    return model

In [18]:
def iterative_pruning_finetuning(model,
                                 train_loader,
                                 test_loader,
                                 device,
                                 learning_rate,
                                 l1_regularization_strength=0,
                                 l2_regularization_strength=0,
                                 weight_decay=5e-4,
                                 learning_rate_decay=0.6,
                                 conv2d_prune_amount=0.4,
                                 linear_prune_amount=0.2,
                                 num_iterations=10,
                                 num_epochs_per_iteration=10,
                                 model_filename_prefix="pruned_model",
                                 model_dir="saved_models",
                                 grouped_pruning=False):

    conv2d_one_iter_prune_amount = 1 - (1 - conv2d_prune_amount)**(1/num_iterations)
    linear_one_iter_prune_amount = 1 - (1 - linear_prune_amount)**(1/num_iterations)
    for i in range(num_iterations):

        print("Pruning and Finetuning {}/{}".format(i + 1, num_iterations))

        print("Pruning...")

        if grouped_pruning == True:
            
            parameters_to_prune = []
            for module_name, module in model.named_modules():
                if isinstance(module, torch.nn.Conv2d):
                    parameters_to_prune.append((module, "weight"))
            prune.global_unstructured(
                parameters_to_prune,
                pruning_method=prune.L1Unstructured,
                amount=conv2d_one_iter_prune_amount,
            )
        else:
            for module_name, module in model.named_modules():
                if isinstance(module, torch.nn.Conv2d):
                    prune.l1_unstructured(module,
                                          name="weight",
                                          amount=conv2d_one_iter_prune_amount)
                elif isinstance(module, torch.nn.Linear):
                    prune.l1_unstructured(module,
                                          name="weight",
                                          amount=linear_one_iter_prune_amount)

        _, eval_accuracy = evaluate_model(model=model,
                                          test_loader=test_loader,
                                          device=device,
                                          criterion=None)

        classification_report = create_classification_report(
            model=model, test_loader=test_loader, device=device)

        num_zeros, num_elements, sparsity = measure_global_sparsity(
            model,
            weight=True,
            bias=False,
            conv2d_use_mask=True,
            linear_use_mask=False)

        print("Test Accuracy: {:.3f}".format(eval_accuracy))
        print("Classification Report:")
        print(classification_report)
        print("Global Sparsity:")
        print("{:.2f}".format(sparsity))

        # print(model.conv1._forward_pre_hooks)
        
        if (i >= (num_iterations * 2/3)) and (num_iterations >= 3):
            cur_num_epochs_per_iter = num_epochs_per_iteration * 3/2
        else:
            cur_num_epochs_per_iter = num_epochs_per_iteration

        print("Fine-tuning...")

        train_model(model=model,
                    train_loader=train_loader,
                    test_loader=test_loader,
                    device=device,
                    model_dir=model_dir,
                    model_filename="{}_iter{}".format(model_filename_prefix, i + 1),
                    l1_regularization_strength=l1_regularization_strength,
                    l2_regularization_strength=l2_regularization_strength,
                    weight_decay=weight_decay,
                    learning_rate=learning_rate * (learning_rate_decay**i),
                    num_epochs=cur_num_epochs_per_iter)
        

        _, eval_accuracy = evaluate_model(model=model,
                                          test_loader=test_loader,
                                          device=device,
                                          criterion=None)

        classification_report = create_classification_report(
            model=model, test_loader=test_loader, device=device)

        num_zeros, num_elements, sparsity = measure_global_sparsity(
            model,
            weight=True,
            bias=False,
            conv2d_use_mask=True,
            linear_use_mask=False)

        print("Test Accuracy: {:.3f}".format(eval_accuracy))
        print("Classification Report:")
        print(classification_report)
        print("Global Sparsity:")
        print("{:.2f}".format(sparsity))

        model_filename = "{}_{}.pt".format(model_filename_prefix, i + 1)
        model_filepath = os.path.join(model_dir, model_filename)
        save_model(model=model,
                   model_dir=model_dir,
                   model_filename=model_filename)
        
        model = load_model(model=model,
                           model_filepath=model_filepath,
                           device=device)
        torch.cuda.empty_cache()
        

    return model




In [19]:
def remove_parameters(model):

    for module_name, module in model.named_modules():
        if isinstance(module, torch.nn.Conv2d):
            try:
                prune.remove(module, "weight")
            except:
                pass
            try:
                prune.remove(module, "bias")
            except:
                pass
        elif isinstance(module, torch.nn.Linear):
            try:
                prune.remove(module, "weight")
            except:
                pass
            try:
                prune.remove(module, "bias")
            except:
                pass

    return model

In [20]:
import torch.nn.utils.prune as prune

In [23]:
model_dir = "saved_models"
model_filename_prefix = "pruned_model"
pruned_model_filename = "resnet34_acc0.96_prunedto0.95_cifar10.pt"
pruned_model_filepath = os.path.join(model_dir, pruned_model_filename)

In [24]:
model_filepath = "/kaggle/input/resnet34-for-cifar10/pytorch/resnet34_accuracy0.96/1/resnet34_96_epoch20 (1)"

In [25]:
num_classes = 10
random_seed = 1
l1_regularization_strength = 0
l2_regularization_strength = 0
weight_decay = 5e-4
learning_rate = 3e-4
learning_rate_decay = 1

In [26]:
mean = (0.4914, 0.4822, 0.4466) # CIFAR10 train mean
std = (0.2412, 0.2377, 0.2563) # CIRAR10 train std

In [27]:
cuda_device = torch.device("cuda:0")
cpu_device = torch.device("cpu:0")

In [28]:
torch.cuda.is_available()

True

In [29]:
set_random_seeds(random_seed=random_seed)

In [30]:
model = create_model(num_classes=num_classes)


model = load_model(model=model,
                    model_filepath=model_filepath,
                    device=cuda_device) # cuda_device!!!



In [31]:
#torch.cuda.empty_cache()

In [32]:
#!nvidia-smi

In [33]:
train_loader, test_loader, classes = prepare_dataloader(
        num_workers=0, train_batch_size=128, eval_batch_size=128, mean=mean, stdev=std)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:05<00:00, 28831750.52it/s]


Extracting data/cifar-10-python.tar.gz to data
Files already downloaded and verified


In [34]:
!nvidia-smi

Sat Mar 30 12:26:10 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.129.03             Driver Version: 535.129.03   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  Tesla P100-PCIE-16GB           Off | 00000000:00:04.0 Off |                    0 |
| N/A   32C    P0              32W / 250W |    364MiB / 16384MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [35]:
#torch.cuda.empty_cache()

In [36]:
#!nvidia-smi

In [37]:
"""
_, eval_accuracy = evaluate_model(model=model,
                                    test_loader=test_loader,
                                    device=cuda_device,
                                    criterion=None)
"""

'\n_, eval_accuracy = evaluate_model(model=model,\n                                    test_loader=test_loader,\n                                    device=cuda_device,\n                                    criterion=None)\n'

In [38]:
"""
classification_report = create_classification_report(
        model=model, test_loader=test_loader, device=cuda_device)
"""

'\nclassification_report = create_classification_report(\n        model=model, test_loader=test_loader, device=cuda_device)\n'

In [39]:
"""
num_zeros, num_elements, sparsity = measure_global_sparsity(model)

print("Test Accuracy: {:.3f}".format(eval_accuracy))
print("Classification Report:")
print(classification_report)
print("Global Sparsity:")
print("{:.2f}".format(sparsity))
"""

'\nnum_zeros, num_elements, sparsity = measure_global_sparsity(model)\n\nprint("Test Accuracy: {:.3f}".format(eval_accuracy))\nprint("Classification Report:")\nprint(classification_report)\nprint("Global Sparsity:")\nprint("{:.2f}".format(sparsity))\n'

In [40]:
pruned_model = copy.deepcopy(model)

In [41]:
num_zeros, num_elements, sparsity = measure_global_sparsity(pruned_model)
print(sparsity)

0.0


In [42]:
!nvidia-smi

Sat Mar 30 12:26:32 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.129.03             Driver Version: 535.129.03   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  Tesla P100-PCIE-16GB           Off | 00000000:00:04.0 Off |                    0 |
| N/A   32C    P0              32W / 250W |    364MiB / 16384MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [43]:
#torch.cuda.empty_cache()

In [44]:
#!nvidia-smi

In [45]:
next(pruned_model.parameters()).is_cuda

False

In [46]:
pruned_model.to(cuda_device);

In [47]:
next(pruned_model.parameters()).is_cuda

True

In [48]:
!nvidia-smi

Sat Mar 30 12:26:46 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.129.03             Driver Version: 535.129.03   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  Tesla P100-PCIE-16GB           Off | 00000000:00:04.0 Off |                    0 |
| N/A   32C    P0              32W / 250W |    364MiB / 16384MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [49]:
print("Pruning...")
iterative_pruning_finetuning(
        model=pruned_model,
        train_loader=train_loader,
        test_loader=test_loader,
        device=cuda_device,
        learning_rate=learning_rate,
        learning_rate_decay=learning_rate_decay,
        l1_regularization_strength=l1_regularization_strength,
        l2_regularization_strength=l2_regularization_strength,
        weight_decay=weight_decay,
        conv2d_prune_amount=0.95,
        linear_prune_amount=0,
        num_iterations=1,            
        num_epochs_per_iteration=70, 
        model_filename_prefix=model_filename_prefix,
        model_dir=model_dir,
        grouped_pruning=True)

Pruning...
Pruning and Finetuning 1/1
Pruning...


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Test Accuracy: 0.100
Classification Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00      1000
           1       0.00      0.00      0.00      1000
           2       0.00      0.00      0.00      1000
           3       0.10      1.00      0.18      1000
           4       0.00      0.00      0.00      1000
           5       0.00      0.00      0.00      1000
           6       0.00      0.00      0.00      1000
           7       0.00      0.00      0.00      1000
           8       0.00      0.00      0.00      1000
           9       0.00      0.00      0.00      1000

    accuracy                           0.10     10000
   macro avg       0.01      0.10      0.02     10000
weighted avg       0.01      0.10      0.02     10000

Global Sparsity:
0.95
Fine-tuning...
Epoch: 000 Eval Loss: 20.971 Eval Acc: 0.100
Epoch: 001 Train Loss: 0.541 Train Acc: 0.820 Eval Loss: 0.248 Eval Acc: 0.916
Epoch: 002 Train Loss: 0.291 Train A

KeyboardInterrupt: 

In [52]:
!nvidia-smi


Sat Mar 30 16:01:07 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.129.03             Driver Version: 535.129.03   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  Tesla P100-PCIE-16GB           Off | 00000000:00:04.0 Off |                    0 |
| N/A   41C    P0              34W / 250W |   6838MiB / 16384MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [51]:
torch.cuda.empty_cache()

In [None]:
pruned_model

In [55]:
save_model(model=pruned_model, model_dir="saved_models", model_filename="trained_prune_rate0.95_accuracy0.961_with_masks")

In [53]:
classification_report = create_classification_report(
        model=pruned_model, test_loader=test_loader, device=cuda_device)
print(classification_report)

              precision    recall  f1-score   support

           0       0.97      0.97      0.97      1000
           1       0.97      0.98      0.98      1000
           2       0.96      0.95      0.95      1000
           3       0.93      0.90      0.92      1000
           4       0.96      0.97      0.97      1000
           5       0.92      0.94      0.93      1000
           6       0.96      0.99      0.97      1000
           7       0.98      0.98      0.98      1000
           8       0.98      0.98      0.98      1000
           9       0.98      0.97      0.97      1000

    accuracy                           0.96     10000
   macro avg       0.96      0.96      0.96     10000
weighted avg       0.96      0.96      0.96     10000



In [54]:
num_zeros, num_elements, sparsity = measure_global_sparsity(pruned_model, conv2d_use_mask=True)

#print("Test Accuracy: {:.3f}".format(eval_accuracy))
print("Classification Report:")
print(classification_report)
print("Global Sparsity:")
print("{:.4f}".format(sparsity))

Classification Report:
              precision    recall  f1-score   support

           0       0.97      0.97      0.97      1000
           1       0.97      0.98      0.98      1000
           2       0.96      0.95      0.95      1000
           3       0.93      0.90      0.92      1000
           4       0.96      0.97      0.97      1000
           5       0.92      0.94      0.93      1000
           6       0.96      0.99      0.97      1000
           7       0.98      0.98      0.98      1000
           8       0.98      0.98      0.98      1000
           9       0.98      0.97      0.97      1000

    accuracy                           0.96     10000
   macro avg       0.96      0.96      0.96     10000
weighted avg       0.96      0.96      0.96     10000

Global Sparsity:
0.9498


In [56]:

def train_model_another_mils(model,
                train_loader,
                test_loader,
                device,
                model_dir,
                model_filename,
                l1_regularization_strength=0,
                l2_regularization_strength=0,
                weight_decay=5e-4,
                learning_rate=1e-4,
                num_epochs=200
                ):

   

    criterion = nn.CrossEntropyLoss()

    model.to(device)

    
    #optimizer = optim.SGD(model.parameters(),
    #                      lr=learning_rate,
    #                      momentum=0.9,
    #                      weight_decay=l2_regularization_strength)
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
    # scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=500)
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,
                                                     milestones=[num_epochs/6, num_epochs*2/3],
                                                     gamma=0.1,
                                                     last_epoch=-1)
    # optimizer = optim.Adam(model.parameters(), lr=learning_rate, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=False)

    # Evaluation
    model.eval()
    eval_loss, eval_accuracy = evaluate_model(model=model,
                                              test_loader=test_loader,
                                              device=device,
                                              criterion=criterion)
    print("Epoch: {:03d} Eval Loss: {:.3f} Eval Acc: {:.3f}".format(
        0, eval_loss, eval_accuracy))

    for epoch in range(num_epochs):

        # Training
        model.train()

        running_loss = 0
        running_corrects = 0

        for inputs, labels in train_loader:

            inputs = inputs.to(device)
            labels = labels.to(device)

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            loss = criterion(outputs, labels)

            l1_reg = torch.tensor(0.).to(device)
            for module in model.modules():
                mask = None
                weight = None
                for name, buffer in module.named_buffers():
                    if name == "weight_mask":
                        mask = buffer
                for name, param in module.named_parameters():
                    if name == "weight_orig":
                        weight = param
                if mask is not None and weight is not None:
                    l1_reg += torch.norm(mask * weight, 1)

            loss += l1_regularization_strength * l1_reg 

            loss.backward()
            optimizer.step()

            # statistics
            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)

        train_loss = running_loss / len(train_loader.dataset)
        train_accuracy = running_corrects / len(train_loader.dataset)

        # Evaluation
        model.eval()
        eval_loss, eval_accuracy = evaluate_model(model=model,
                                                  test_loader=test_loader,
                                                  device=device,
                                                  criterion=criterion)
        if epoch % 40 == 0:
            save_model(model=model, model_dir=model_dir, model_filename="{}_epoch{}".format(model_filename, epoch))

        scheduler.step()

        print(
            "Epoch: {:03d} Train Loss: {:.3f} Train Acc: {:.3f} Eval Loss: {:.3f} Eval Acc: {:.3f}"
            .format(epoch + 1, train_loss, train_accuracy, eval_loss,
                    eval_accuracy))
        #torch.cuda.empty_cache()

    return model

In [57]:
train_model(model=pruned_model,
            train_loader=train_loader,
            test_loader=test_loader,
            device=cuda_device,
            model_dir="saved_models",
            model_filename="sparsity0.95_from46ep.pt",
                l1_regularization_strength=0,
                l2_regularization_strength=0,
                weight_decay=weight_decay,
                learning_rate=learning_rate*0.1,
                num_epochs=30)

Epoch: 000 Eval Loss: 0.124 Eval Acc: 0.962
Epoch: 001 Train Loss: 0.051 Train Acc: 0.983 Eval Loss: 0.124 Eval Acc: 0.961
Epoch: 002 Train Loss: 0.050 Train Acc: 0.983 Eval Loss: 0.124 Eval Acc: 0.961
Epoch: 003 Train Loss: 0.052 Train Acc: 0.983 Eval Loss: 0.126 Eval Acc: 0.961
Epoch: 004 Train Loss: 0.051 Train Acc: 0.983 Eval Loss: 0.127 Eval Acc: 0.961
Epoch: 005 Train Loss: 0.052 Train Acc: 0.983 Eval Loss: 0.122 Eval Acc: 0.962
Epoch: 006 Train Loss: 0.052 Train Acc: 0.983 Eval Loss: 0.129 Eval Acc: 0.959
Epoch: 007 Train Loss: 0.050 Train Acc: 0.984 Eval Loss: 0.123 Eval Acc: 0.961
Epoch: 008 Train Loss: 0.050 Train Acc: 0.984 Eval Loss: 0.128 Eval Acc: 0.960
Epoch: 009 Train Loss: 0.051 Train Acc: 0.983 Eval Loss: 0.123 Eval Acc: 0.961
Epoch: 010 Train Loss: 0.050 Train Acc: 0.984 Eval Loss: 0.122 Eval Acc: 0.961
Epoch: 011 Train Loss: 0.050 Train Acc: 0.984 Eval Loss: 0.123 Eval Acc: 0.962
Epoch: 012 Train Loss: 0.047 Train Acc: 0.985 Eval Loss: 0.121 Eval Acc: 0.961
Epoch: 0

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [58]:
classification_report = create_classification_report(
        model=pruned_model, test_loader=test_loader, device=cuda_device)
print(classification_report)

              precision    recall  f1-score   support

           0       0.97      0.97      0.97      1000
           1       0.97      0.98      0.98      1000
           2       0.96      0.95      0.95      1000
           3       0.93      0.90      0.91      1000
           4       0.95      0.98      0.97      1000
           5       0.93      0.94      0.93      1000
           6       0.96      0.98      0.97      1000
           7       0.98      0.97      0.98      1000
           8       0.98      0.98      0.98      1000
           9       0.97      0.97      0.97      1000

    accuracy                           0.96     10000
   macro avg       0.96      0.96      0.96     10000
weighted avg       0.96      0.96      0.96     10000



In [59]:
num_zeros, num_elements, sparsity = measure_global_sparsity(pruned_model, conv2d_use_mask=True)

#print("Test Accuracy: {:.3f}".format(eval_accuracy))
print("Classification Report:")
print(classification_report)
print("Global Sparsity:")
print("{:.4f}".format(sparsity))

Classification Report:
              precision    recall  f1-score   support

           0       0.97      0.97      0.97      1000
           1       0.97      0.98      0.98      1000
           2       0.96      0.95      0.95      1000
           3       0.93      0.90      0.91      1000
           4       0.95      0.98      0.97      1000
           5       0.93      0.94      0.93      1000
           6       0.96      0.98      0.97      1000
           7       0.98      0.97      0.98      1000
           8       0.98      0.98      0.98      1000
           9       0.97      0.97      0.97      1000

    accuracy                           0.96     10000
   macro avg       0.96      0.96      0.96     10000
weighted avg       0.96      0.96      0.96     10000

Global Sparsity:
0.9498


In [61]:
save_model(pruned_model, "saved_models", "sparsity0.95_final_acc0.96.pt")

In [None]:
"""
iterative_pruning_finetuning(
        model=pruned_model,
        train_loader=train_loader,
        test_loader=test_loader,
        device=cuda_device,
        learning_rate=learning_rate,
        learning_rate_decay=learning_rate_decay,
        l1_regularization_strength=l1_regularization_strength,
        l2_regularization_strength=l2_regularization_strength,
        conv2d_prune_amount=0.725, # 0.725^5 = 0.2
        linear_prune_amount=0,
        num_iterations=5,
        num_epochs_per_iteration=100,
        model_filename_prefix=model_filename_prefix,
        model_dir=model_dir,
        grouped_pruning=True)
"""