In [1]:
import gc
import json

from copy import deepcopy
from timeit import timeit
from flopco import FlopCo
from tqdm import tqdm

import torch
from torch.optim import AdamW
from torch.nn import CrossEntropyLoss
from torch.utils.data import DataLoader

from src.model_compressor.model_compressor import compress_model

from torchvision.models import (resnet18, ResNet18_Weights,
                                resnet34, ResNet34_Weights,
                                resnet50, ResNet50_Weights,
                                resnet101, ResNet101_Weights,
                                resnet152, ResNet152_Weights,
                                vgg11, VGG11_Weights,
                                vgg13, VGG13_Weights,
                                vgg16, VGG16_Weights,
                                vgg19, VGG19_Weights,
                                mobilenet_v2, MobileNet_V2_Weights,
                                mobilenet_v3_large, MobileNet_V3_Large_Weights,
                                mobilenet_v3_small, MobileNet_V3_Small_Weights,
                                efficientnet_b0, EfficientNet_B0_Weights,
                                efficientnet_b1, EfficientNet_B1_Weights,
                                efficientnet_b2, EfficientNet_B2_Weights,
                                efficientnet_b3, EfficientNet_B3_Weights,
                                efficientnet_b4, EfficientNet_B4_Weights
)

In [2]:
import os
import csv
from PIL import Image
from torch.utils.data import Dataset
from torchvision import transforms

def load_synset_mapping(mapping_file):
    """
    Load the synset mapping from LOC_synset_mapping.txt.
    Each line should be formatted as:
        n01440764 tench, Tinca tinca
    The order of lines defines the class index.
    Returns a dict mapping synset id to an integer index (zero-indexed).
    """
    synset2idx = {}
    with open(mapping_file, 'r') as f:
        for idx, line in enumerate(f):
            parts = line.strip().split()
            if parts:
                synset = parts[0]
                synset2idx[synset] = idx
    return synset2idx

def load_annotations(csv_file, synset2idx):
    """
    Load annotations from a CSV file (either LOC_train_solution.csv or LOC_val_solution.csv).
    Each row should contain:
         ImageId, PredictionString
    The first token in PredictionString is assumed to be the synset id.
    Returns a dictionary mapping the image id (without extension) to its integer label.
    """
    annotations = {}
    with open(csv_file, newline='') as f:
        reader = csv.reader(f)
        header = next(reader)
        # If the header isn't as expected, rewind
        if header[0] != "ImageId":
            f.seek(0)
            reader = csv.reader(f)
        for row in reader:
            if len(row) < 2:
                continue
            image_id = row[0].strip()
            pred_tokens = row[1].split()
            if pred_tokens:
                synset = pred_tokens[0].strip()
                if synset in synset2idx:
                    annotations[image_id] = synset2idx[synset]
    return annotations

class ImageNetCLS(Dataset):
    def __init__(self, root, split='train', transform=None):
        """
        Args:
            root (str): Root directory containing ILSVRC, e.g. '/path/to/ILSVRC'
            split (str): One of 'train', 'val', or 'test'
            transform (callable, optional): Transformations to apply to the images.
        """
        self.root = root
        self.split = split.lower()
        self.transform = transform if transform is not None else transforms.ToTensor()

        # Load the synset mapping file
        mapping_path = os.path.join(root, "LOC_synset_mapping.txt")
        self.synset2idx = load_synset_mapping(mapping_path)

        # Define the base folder for images
        base_img_folder = os.path.join(root, "Data", "CLS-LOC", self.split)
        if not os.path.isdir(base_img_folder):
            raise RuntimeError(f"Expected folder {base_img_folder} not found.")

        self.samples = []  # List of (image_path, label) tuples

        if self.split == 'train':
            # In train, images are stored in subfolders named after the synset.
            for synset_folder in os.listdir(base_img_folder):
                synset_path = os.path.join(base_img_folder, synset_folder)
                if not os.path.isdir(synset_path):
                    continue
                # Skip folders not in the synset mapping
                if synset_folder not in self.synset2idx:
                    continue
                label = self.synset2idx[synset_folder]
                for fname in os.listdir(synset_path):
                    if fname.lower().endswith(('.jpg', '.jpeg', '.png', '.bmp')):
                        img_path = os.path.join(synset_path, fname)
                        self.samples.append((img_path, label))
        elif self.split in ['val', 'test']:
            # For val and test, images are directly in the folder.
            # For validation, we use the corresponding CSV to obtain labels.
            annotations = {}
            if self.split == 'val':
                csv_file = os.path.join(root, "LOC_val_solution.csv")
                annotations = load_annotations(csv_file, self.synset2idx)
            for fname in os.listdir(base_img_folder):
                if fname.lower().endswith(('.jpg', '.jpeg', '.png', '.bmp')):
                    base_name, _ = os.path.splitext(fname)
                    img_path = os.path.join(base_img_folder, fname)
                    # For validation, only include images that have an annotation.
                    if self.split == 'val':
                        if base_name in annotations:
                            label = annotations[base_name]
                        else:
                            continue
                    else:
                        # For test, no label is available. Use a dummy label.
                        label = -1
                    self.samples.append((img_path, label))
        else:
            raise ValueError("split must be one of 'train', 'val', or 'test'.")

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, index):
        img_path, label = self.samples[index]
        image = Image.open(img_path).convert('RGB')
        if self.transform:
            image = self.transform(image)
        return image, label

# Define any transforms (resize, normalization, etc.)
transform = transforms.Compose([
    transforms.Resize((256, 256), interpolation=transforms.InterpolationMode.BILINEAR),
    transforms.CenterCrop((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize((0.256, 0.232, 0.181),(0.458, 0.448, 0.45))
])
# Path to your ILSVRC directory
root_dir = '/home/aleksandr-vashchenko/Desktop/Thesis/tensor-compression-methods/experiments/Evaluation/archive/ILSVRC'
# Training dataset
train_dataset = ImageNetCLS(root=root_dir, split='train', transform=transform)
print("Number of training samples:", len(train_dataset))
# Validation dataset
val_dataset = ImageNetCLS(root=root_dir, split='val', transform=transform)
print("Number of validation samples:", len(val_dataset))
# Test dataset (without labels)
test_dataset = ImageNetCLS(root=root_dir, split='test', transform=transform)
print("Number of test samples:", len(test_dataset))
# Retrieve a sample from the training set


Number of training samples: 1281167
Number of validation samples: 50000
Number of test samples: 100000


In [3]:
sample_img, sample_label = train_dataset[0]
print("Sample image size:", sample_img.size())
print("Sample label:", sample_label)
sample_img, sample_label = val_dataset[0]
print("Sample image size:", sample_img.size())
print("Sample label:", sample_label)

Sample image size: torch.Size([3, 224, 224])
Sample label: 984
Sample image size: torch.Size([3, 224, 224])
Sample label: 786


In [4]:
# Set the device to GPU if available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Parameters for compression
compression_params = {
    'layers': [torch.nn.Conv2d, torch.nn.ConvTranspose2d],
    'conv_compression_method': 'TKD',
    'conv_transpose_compression_method': 'TKD',
    'finetune': False,
    'optimizer': AdamW,
    'data_size': [3, 224, 224],
    'lr': 0.001,
    'loss_function': CrossEntropyLoss,
    'finetune_device': device,
    'task': 'classification',
}

# Functions to load models
models = [
    (resnet18, ResNet18_Weights.IMAGENET1K_V1),
    # (resnet34, ResNet34_Weights.IMAGENET1K_V1),
    # (resnet50, ResNet50_Weights.IMAGENET1K_V1),
    # (resnet101, ResNet101_Weights.IMAGENET1K_V1),
    # (resnet152, ResNet152_Weights.IMAGENET1K_V1),
    # (vgg11, VGG11_Weights.IMAGENET1K_V1),
    # (vgg13, VGG13_Weights.IMAGENET1K_V1),
    # (vgg16, VGG16_Weights.IMAGENET1K_V1),
    # (vgg19, VGG19_Weights.IMAGENET1K_V1),
    # (mobilenet_v2, MobileNet_V2_Weights.IMAGENET1K_V1),
    # (mobilenet_v3_large, MobileNet_V3_Large_Weights.IMAGENET1K_V1),
    # (mobilenet_v3_small, MobileNet_V3_Small_Weights.IMAGENET1K_V1),
    # (efficientnet_b0, EfficientNet_B0_Weights.IMAGENET1K_V1),
    # (efficientnet_b1, EfficientNet_B1_Weights.IMAGENET1K_V1),
    # (efficientnet_b2, EfficientNet_B2_Weights.IMAGENET1K_V1),
    # (efficientnet_b3, EfficientNet_B3_Weights.IMAGENET1K_V1),
    # (efficientnet_b4, EfficientNet_B4_Weights.IMAGENET1K_V1),
]

In [5]:
def evaluate_top1_top5(model, dataloader, device):
    """
    Evaluates the model on the provided dataloader and computes top-1 and top-5 accuracy.

    Args:
        model (torch.nn.Module): The trained PyTorch model.
        dataloader (torch.utils.data.DataLoader): DataLoader for the validation dataset.
        device (torch.device): The device on which to run the evaluation (e.g., "cuda" or "cpu").

    Returns:
        (top1_acc, top5_acc): Tuple of top-1 and top-5 accuracy in percentage.
    """
    model.eval()
    top1_correct = 0
    top5_correct = 0
    total_samples = 0

    with torch.no_grad():
        for images, labels in tqdm(dataloader):
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)  # Shape: (batch_size, num_classes)

            # Get top 1 prediction.
            _, pred_top1 = outputs.topk(1, dim=1, largest=True, sorted=True)
            # Get top 5 predictions.
            _, pred_top5 = outputs.topk(5, dim=1, largest=True, sorted=True)

            total_samples += labels.size(0)

            # Check top-1: squeeze pred_top1 from (batch_size, 1) to (batch_size)
            top1_correct += (pred_top1.squeeze(1) == labels).sum().item()

            # For top-5, check whether the true label is among the top-5 predictions for each sample.
            # pred_top5: shape (batch_size, 5)
            # labels: shape (batch_size,) -> unsqueeze to (batch_size, 1) to compare.
            top5_correct += (pred_top5.eq(labels.view(-1, 1)).sum(dim=1) > 0).sum().item()

    top1_acc = 100.0 * top1_correct / total_samples
    top5_acc = 100.0 * top5_correct / total_samples
    return top1_acc, top5_acc

In [6]:
def performance_test(model, input_tensor):
    with torch.no_grad():
        with torch.amp.autocast(device_type='cuda', enabled=True):
            model(input_tensor)

def evaluate_compressed_model(original_model, compressed_model):
    # Set models to evaluation mode
    model1 = original_model.to(device).eval()
    model2 = compressed_model.to(device).eval()

    val_dataloader = DataLoader(val_dataset, batch_size=64, shuffle=False, num_workers=16, pin_memory=True)

    top1_acc_orig, top5_acc_orig = evaluate_top1_top5(model1, val_dataloader, device)
    top1_acc_comp, top5_acc_comp = evaluate_top1_top5(model2, val_dataloader, device)

    print(f"Top-1 Accuracy (original): {top1_acc_orig:.2f}%")
    print(f"Top-5 Accuracy (original): {top5_acc_orig:.2f}%")
    print(f"Top-1 Accuracy (compressed): {top1_acc_comp:.2f}%")
    print(f"Top-5 Accuracy (compressed): {top5_acc_comp:.2f}%")
    flopco_stats_original = FlopCo(model1, device=device)
    flopco_stats_compressed = FlopCo(model2, device=device)

    print(f'FLOPs: {flopco_stats_original.total_flops}')
    print(f'Parameters: {flopco_stats_original.total_params}')
    print(f'Total macs: {flopco_stats_original.total_macs}')

    print(f'FLOPs (compressed): {flopco_stats_compressed.total_flops}')
    print(f'Parameters (compressed): {flopco_stats_compressed.total_params}')
    print(f'Total macs (compressed): {flopco_stats_compressed.total_macs}')

    print(f'Compression Ratio: {flopco_stats_original.total_params / flopco_stats_compressed.total_params:.2f}')
    print(f'Compression Ratio (FLOPs): {flopco_stats_original.total_flops / flopco_stats_compressed.total_flops:.2f}')
    print(f'Compression Ratio (macs): {flopco_stats_original.total_macs / flopco_stats_compressed.total_macs:.2f}')


    # Measure performance
    input_tensor = torch.randn(64, 3, 224, 224).to(device)
    time_original = timeit(lambda: performance_test(model1, input_tensor), number=1000)
    time_compressed = timeit(lambda: performance_test(model2, input_tensor), number=1000)

    return top1_acc_orig, top5_acc_orig, top1_acc_comp, top5_acc_comp, flopco_stats_original, flopco_stats_compressed, time_original, time_compressed

In [7]:
def finetune(model):
    model.to(device)
    model.train()
    optimizer = AdamW(model.parameters(), lr=0.01)
    criterion = CrossEntropyLoss()
    train_dataloader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=16, pin_memory=True)

    # Initialize variables for loss tracking
    running_loss = 0.0
    batch_count = 0

    # Create tqdm progress bar
    pbar = tqdm(train_dataloader)

    for images, labels in pbar:
        images = images.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        with torch.amp.autocast(enabled=True, device_type='cuda', dtype=torch.bfloat16):
            outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # Update loss statistics
        current_loss = loss.item()
        running_loss += current_loss
        batch_count += 1
        avg_loss = running_loss / batch_count

        # Update tqdm with current and average loss
        pbar.set_postfix({'loss': f'{current_loss:.4f}', 'avg_loss': f'{avg_loss:.4f}'})

    return model

In [10]:
for model_func, weights in models:
    print(f"Loading model: {model_func.__name__}")
    original_model = model_func(weights=weights)
    compressed_model = deepcopy(original_model)
    compress_model(compressed_model, **compression_params)

    # compressed_model = finetune(compressed_model)

    top1_acc_orig, top5_acc_orig, top1_acc_comp, top5_acc_comp, stats_original, stats_compressed, time_original, time_compressed = evaluate_compressed_model(original_model, compressed_model)
    results = {
        'model': model_func.__name__,
        'top1_acc_orig': top1_acc_orig,
        'top5_acc_orig': top5_acc_orig,
        'top1_acc_comp': top1_acc_comp,
        'top5_acc_comp': top5_acc_comp,
        'time_original': time_original,
        'time_compressed': time_compressed,
        'flopco_stats_original': {
            'total_flops': stats_original.total_flops,
            'total_params': stats_original.total_params,
            'total_macs': stats_original.total_macs,
            'flops': stats_original.flops,
            'macs': stats_original.macs,
            'params': stats_original.params,
            'relative_flops': stats_original.relative_flops,
            'relative_macs': stats_original.relative_macs,
            'relative_params': stats_original.relative_params,
        },
        'flopco_stats_compressed': {
            'total_flops': stats_compressed.total_flops,
            'total_params': stats_compressed.total_params,
            'total_macs': stats_compressed.total_macs,
            'flops': stats_compressed.flops,
            'macs': stats_compressed.macs,
            'params': stats_compressed.params,
            'relative_flops': stats_compressed.relative_flops,
            'relative_macs': stats_compressed.relative_macs,
            'relative_params': stats_compressed.relative_params,
        },
        'compression_ratio': {
            'params': stats_original.total_params / stats_compressed.total_params,
            'flops': stats_original.total_flops / stats_compressed.total_flops,
            'macs': stats_original.total_macs / stats_compressed.total_macs,
        }
    }

    # Append results to a JSON file
    try:
        with open('./results4.json', 'r') as f:
            existing_results = json.load(f)
    except FileNotFoundError:
        existing_results = []

    existing_results.append(results)

    with open('./results4.json', 'w+') as f:
        json.dump(existing_results, f, indent=4)

    torch.save(compressed_model, f"./models/{model_func.__name__}.pt")

    del compressed_model, original_model
    gc.collect()
    torch.cuda.empty_cache()

Loading model: resnet18
differential_evolution step 1: f(x)= 0.1024243990231649
differential_evolution step 2: f(x)= 0.10055490507503358
differential_evolution step 3: f(x)= 0.10055490507503358
Polishing solution with 'L-BFGS-B'
differential_evolution step 1: f(x)= 0.21672756172293045
differential_evolution step 2: f(x)= 0.21672756172293045
differential_evolution step 3: f(x)= 0.21384909107197783
differential_evolution step 4: f(x)= 0.2137928952114772
differential_evolution step 5: f(x)= 0.2112575950891102
differential_evolution step 6: f(x)= 0.2112575950891102
Polishing solution with 'L-BFGS-B'
differential_evolution step 1: f(x)= 0.32625396735710693
differential_evolution step 2: f(x)= 0.32625396735710693
differential_evolution step 3: f(x)= 0.32284515316194556
differential_evolution step 4: f(x)= 0.32210522404514313
differential_evolution step 5: f(x)= 0.32210522404514313
Polishing solution with 'L-BFGS-B'
differential_evolution step 1: f(x)= 0.3351041842044772
differential_evolutio

100%|██████████| 782/782 [00:29<00:00, 26.58it/s]
100%|██████████| 782/782 [00:34<00:00, 22.68it/s]


Top-1 Accuracy (original): 62.22%
Top-5 Accuracy (original): 84.41%
Top-1 Accuracy (compressed): 41.40%
Top-5 Accuracy (compressed): 66.99%
FLOPs: 3628147200
Parameters: 11679912
Total macs: 1814073344
FLOPs (compressed): 2531191256
Parameters (compressed): 8137570
Total macs (compressed): 1265595372
Compression Ratio: 1.44
Compression Ratio (FLOPs): 1.43
Compression Ratio (macs): 1.43


In [52]:
compressed_model = torch.load("./models/resnet18.pt")
original_model = resnet18(weights=ResNet18_Weights.IMAGENET1K_V1).to(device)
data = val_dataset[0][0].to(device).unsqueeze(0)

  compressed_model = torch.load("./models/resnet18.pt")


In [53]:
a = deepcopy(original_model.conv1).cpu()

In [54]:
b = compress_model(a, conv_compression_method="TKDCPD")

differential_evolution step 1: f(x)= 0.14084371329445
differential_evolution step 2: f(x)= 0.10055490507503358
differential_evolution step 3: f(x)= 0.10055490507503358
differential_evolution step 4: f(x)= 0.10055490507503358
differential_evolution step 5: f(x)= 0.10055490507503358
Polishing solution with 'L-BFGS-B'


In [55]:
a_result = a.to(device)(data)

In [56]:
b_result = b.to(device)(data)

In [57]:
torch.mean(torch.abs(a_result - b_result))

tensor(0.1434, device='cuda:0', grad_fn=<MeanBackward0>)

In [58]:
torch.mean(a_result)

tensor(-0.0009, device='cuda:0', grad_fn=<MeanBackward0>)

In [59]:
torch.mean(b_result)

tensor(0.0052, device='cuda:0', grad_fn=<MeanBackward0>)

In [60]:
a_result

tensor([[[[ 8.9331e-01,  1.1938e+00,  1.1811e+00,  ...,  5.9311e-01,
            5.1131e-01,  4.1525e-01],
          [ 1.0666e-01,  9.5170e-02, -1.6279e-02,  ...,  3.7675e-02,
            1.3672e-02, -8.3489e-04],
          [-5.1655e-02, -4.4190e-02, -4.9251e-02,  ...,  3.2046e-02,
            8.4421e-03, -2.0382e-02],
          ...,
          [-9.7974e-02, -6.0040e-02, -9.7559e-02,  ..., -1.5516e-01,
           -1.1645e-01, -4.2491e-02],
          [-1.5003e-02, -1.9886e-02, -1.0525e-01,  ...,  3.8734e-02,
            5.5388e-02,  1.0540e-02],
          [ 1.2019e+00,  1.2985e+00,  1.2867e+00,  ...,  1.0002e+00,
            1.1296e+00,  6.1471e-01]],

         [[ 1.1954e+00,  1.8428e+00,  1.8801e+00,  ...,  1.0175e+00,
            9.3251e-01,  7.1561e-01],
          [-1.7345e-01,  3.1996e-01,  3.6642e-01,  ...,  2.6724e-01,
            2.1353e-01,  3.0953e-01],
          [-2.3716e-01,  1.9480e-01,  2.8675e-01,  ..., -8.6663e-02,
            1.4780e-01,  3.0459e-01],
          ...,
     

In [61]:
b_result

tensor([[[[ 8.9827e-01,  1.1844e+00,  1.2007e+00,  ...,  5.9341e-01,
            5.1509e-01,  4.2156e-01],
          [ 1.0942e-01,  9.3683e-02,  9.7763e-03,  ...,  3.9626e-02,
            1.9725e-02,  1.3329e-02],
          [-4.6906e-02, -5.0772e-02, -3.3241e-02,  ...,  2.7200e-02,
            7.0475e-03, -5.5926e-03],
          ...,
          [-9.9068e-02, -6.9433e-02, -8.3131e-02,  ..., -1.5844e-01,
           -1.1299e-01, -1.2135e-02],
          [-1.5742e-02, -2.9500e-02, -9.0643e-02,  ...,  4.4668e-02,
            6.1293e-02,  4.4004e-02],
          [ 1.1875e+00,  1.2869e+00,  1.2806e+00,  ...,  9.8769e-01,
            1.1155e+00,  6.4490e-01]],

         [[ 1.1355e+00,  1.7511e+00,  1.8490e+00,  ...,  9.8566e-01,
            9.1478e-01,  7.2496e-01],
          [-2.4801e-01,  1.6266e-01,  2.5740e-01,  ...,  2.0460e-01,
            1.6210e-01,  2.9766e-01],
          [-3.5896e-01, -3.5087e-02,  1.0346e-01,  ..., -1.5719e-01,
            7.8994e-02,  2.8144e-01],
          ...,
     