In [1]:
import gc
import json

from copy import deepcopy
from timeit import timeit
from flopco import FlopCo
from tqdm import tqdm

import torch
from torch.optim import AdamW
from torch.nn import CrossEntropyLoss

from src.model_compressor.model_compressor import compress_model

from torchvision.models import (resnet18, ResNet18_Weights,
                                resnet34, ResNet34_Weights,
                                resnet50, ResNet50_Weights,
                                resnet101, ResNet101_Weights,
                                resnet152, ResNet152_Weights,
                                vgg11, VGG11_Weights,
                                vgg13, VGG13_Weights,
                                vgg16, VGG16_Weights,
                                vgg19, VGG19_Weights,
                                mobilenet_v2, MobileNet_V2_Weights,
                                mobilenet_v3_large, MobileNet_V3_Large_Weights,
                                mobilenet_v3_small, MobileNet_V3_Small_Weights,
                                efficientnet_b0, EfficientNet_B0_Weights,
                                efficientnet_b1, EfficientNet_B1_Weights,
                                efficientnet_b2, EfficientNet_B2_Weights,
                                efficientnet_b3, EfficientNet_B3_Weights,
                                efficientnet_b4, EfficientNet_B4_Weights
)

In [2]:
# Set the device to GPU if available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Parameters for compression
compression_params = {
    'conv_compression_method': 'TKDCPD',
    'conv_transpose_compression_method': 'TKDCPD',
    'finetune': True,
    'optimizer': AdamW,
    'data_size': [3, 224, 224],
    'lr': 0.001,
    'loss_function': CrossEntropyLoss,
    'finetune_device': device,
    'task': 'classification',
}

# Functions to load models
models = [
    (resnet18, ResNet18_Weights.IMAGENET1K_V1),
    (resnet34, ResNet34_Weights.IMAGENET1K_V1),
    (resnet50, ResNet50_Weights.IMAGENET1K_V1),
    (resnet101, ResNet101_Weights.IMAGENET1K_V1),
    (resnet152, ResNet152_Weights.IMAGENET1K_V1),
    (vgg11, VGG11_Weights.IMAGENET1K_V1),
    (vgg13, VGG13_Weights.IMAGENET1K_V1),
    (vgg16, VGG16_Weights.IMAGENET1K_V1),
    (vgg19, VGG19_Weights.IMAGENET1K_V1),
    (mobilenet_v2, MobileNet_V2_Weights.IMAGENET1K_V1),
    (mobilenet_v3_large, MobileNet_V3_Large_Weights.IMAGENET1K_V1),
    (mobilenet_v3_small, MobileNet_V3_Small_Weights.IMAGENET1K_V1),
    (efficientnet_b0, EfficientNet_B0_Weights.IMAGENET1K_V1),
    (efficientnet_b1, EfficientNet_B1_Weights.IMAGENET1K_V1),
    (efficientnet_b2, EfficientNet_B2_Weights.IMAGENET1K_V1),
    (efficientnet_b3, EfficientNet_B3_Weights.IMAGENET1K_V1),
    (efficientnet_b4, EfficientNet_B4_Weights.IMAGENET1K_V1),
]

In [3]:
def performance_test(model, input_tensor):
    with torch.no_grad():
        with torch.amp.autocast(device_type='cuda', enabled=True):
            model(input_tensor)

def evaluate_compressed_model(original_model, compressed_model):
    # Set models to evaluation mode
    model1 = original_model.to(device).eval()
    model2 = compressed_model.to(device).eval()

    # Initialize counters
    correct = 0
    total = 0

    with torch.no_grad():
        for _ in tqdm(range(1000), desc="Evaluating models"):
            # Move data to device
            images = torch.randn(64, 3, 224, 224).to(device)

            # Get predictions from both models
            outputs1 = model1(images)
            outputs2 = model2(images)

            # Calculate accuracy
            _, preds1 = torch.max(outputs1, 1)
            _, preds2 = torch.max(outputs2, 1)

            correct += (preds1 == preds2).sum().item()
            total += preds1.size(0)

        accuracy = 100 * correct / total

    print(f'Accuracy: {accuracy:.2f}%')
    flopco_stats_original = FlopCo(model1, device=device)
    flopco_stats_compressed = FlopCo(model2, device=device)

    print(f'FLOPs: {flopco_stats_original.total_flops}')
    print(f'Parameters: {flopco_stats_original.total_params}')
    print(f'Total macs: {flopco_stats_original.total_macs}')

    print(f'FLOPs (compressed): {flopco_stats_compressed.total_flops}')
    print(f'Parameters (compressed): {flopco_stats_compressed.total_params}')
    print(f'Total macs (compressed): {flopco_stats_compressed.total_macs}')

    print(f'Compression Ratio: {flopco_stats_original.total_params / flopco_stats_compressed.total_params:.2f}')
    print(f'Compression Ratio (FLOPs): {flopco_stats_original.total_flops / flopco_stats_compressed.total_flops:.2f}')
    print(f'Compression Ratio (macs): {flopco_stats_original.total_macs / flopco_stats_compressed.total_macs:.2f}')


    # Measure performance
    input_tensor = torch.randn(64, 3, 224, 224).to(device)
    time_original = timeit(lambda: performance_test(model1, input_tensor), number=1000)
    time_compressed = timeit(lambda: performance_test(model2, input_tensor), number=1000)

    return accuracy, flopco_stats_original, flopco_stats_compressed, time_original, time_compressed

In [5]:
for model_func, weights in models:
    print(f"Loading model: {model_func.__name__}")
    original_model = model_func(weights=weights)
    compressed_model = deepcopy(original_model)
    compress_model(compressed_model, **compression_params)

    accuracy, stats_original, stats_compressed, time_original, time_compressed = evaluate_compressed_model(original_model, compressed_model)
    results = {
        'model': model_func.__name__,
        'accuracy': accuracy,
        'time_original': time_original,
        'time_compressed': time_compressed,
        'flopco_stats_original': {
            'total_flops': stats_original.total_flops,
            'total_params': stats_original.total_params,
            'total_macs': stats_original.total_macs,
            'flops': stats_original.flops,
            'macs': stats_original.macs,
            'params': stats_original.params,
            'relative_flops': stats_original.relative_flops,
            'relative_macs': stats_original.relative_macs,
            'relative_params': stats_original.relative_params,
        },
        'flopco_stats_compressed': {
            'total_flops': stats_compressed.total_flops,
            'total_params': stats_compressed.total_params,
            'total_macs': stats_compressed.total_macs,
            'flops': stats_compressed.flops,
            'macs': stats_compressed.macs,
            'params': stats_compressed.params,
            'relative_flops': stats_compressed.relative_flops,
            'relative_macs': stats_compressed.relative_macs,
            'relative_params': stats_compressed.relative_params,
        },
        'compression_ratio': {
            'params': stats_original.total_params / stats_compressed.total_params,
            'flops': stats_original.total_flops / stats_compressed.total_flops,
            'macs': stats_original.total_macs / stats_compressed.total_macs,
        }
    }

    # Append results to a JSON file
    try:
        with open('./results2.json', 'r') as f:
            existing_results = json.load(f)
    except FileNotFoundError:
        existing_results = []

    existing_results.append(results)

    with open('./results2.json', 'w+') as f:
        json.dump(existing_results, f, indent=4)

    del compressed_model, original_model
    gc.collect()
    torch.cuda.empty_cache()

Loading model: mobilenet_v3_large
differential_evolution step 1: f(x)= 0.18263319027386715
differential_evolution step 2: f(x)= 0.18263319027386715
differential_evolution step 3: f(x)= 0.18263319027386715
differential_evolution step 4: f(x)= 0.18263319027386715
differential_evolution step 5: f(x)= 0.18263319027386715
differential_evolution step 6: f(x)= 0.18263319027386715
differential_evolution step 7: f(x)= 0.18263319027386715
Polishing solution with 'L-BFGS-B'


  warn("Depthwise convolution is not supported. This layer will not be compressed.")


differential_evolution step 1: f(x)= 0.5754669308662415
differential_evolution step 2: f(x)= 0.5754669308662415
differential_evolution step 3: f(x)= 0.5754669308662415
differential_evolution step 4: f(x)= 0.5754669308662415
differential_evolution step 5: f(x)= 0.5754669308662415
Polishing solution with 'L-BFGS-B'
differential_evolution step 1: f(x)= 0.6084460020065308
differential_evolution step 2: f(x)= 0.5342286825180054
differential_evolution step 3: f(x)= 0.5342286825180054
differential_evolution step 4: f(x)= 0.5342286825180054
differential_evolution step 5: f(x)= 0.5342286825180054
differential_evolution step 6: f(x)= 0.5342286825180054
Polishing solution with 'L-BFGS-B'
differential_evolution step 1: f(x)= 0.6732354164123535
differential_evolution step 2: f(x)= 0.6732354164123535
differential_evolution step 3: f(x)= 0.6259453892707825
differential_evolution step 4: f(x)= 0.6259453892707825
differential_evolution step 5: f(x)= 0.6259453892707825
differential_evolution step 6: f(x

Evaluating models: 100%|██████████| 1000/1000 [01:45<00:00,  9.50it/s]


Accuracy: 45.16%
FLOPs: 433186840
Parameters: 5458632
Total macs: 216589760
FLOPs (compressed): 526543384
Parameters (compressed): 6105181
Total macs (compressed): 263267008
Compression Ratio: 0.89
Compression Ratio (FLOPs): 0.82
Compression Ratio (macs): 0.82
Loading model: mobilenet_v3_small
differential_evolution step 1: f(x)= 0.2091097671981912
differential_evolution step 2: f(x)= 0.2091097671981912
differential_evolution step 3: f(x)= 0.2091097671981912
differential_evolution step 4: f(x)= 0.2091097671981912
differential_evolution step 5: f(x)= 0.2091097671981912
differential_evolution step 6: f(x)= 0.2091097671981912
Polishing solution with 'L-BFGS-B'
differential_evolution step 1: f(x)= 0.0024475371824337344
differential_evolution step 2: f(x)= 0.0024475371824337344
differential_evolution step 3: f(x)= 0.0024475371824337344
differential_evolution step 4: f(x)= 0.0024475371824337344
differential_evolution step 5: f(x)= 0.0024475371824337344
differential_evolution step 6: f(x)= 0.

KeyboardInterrupt: 

In [5]:
print(compressed_model)

MobileNetV3(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Sequential(
        (0): Conv2d(3, 3, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (1): Sequential(
          (0): Conv2d(3, 3, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (1): Conv2d(3, 3, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=3, bias=False)
          (2): Conv2d(3, 8, kernel_size=(1, 1), stride=(1, 1), bias=False)
        )
        (2): Conv2d(8, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
      )
      (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
      (2): Hardswish()
    )
    (1): InvertedResidual(
      (block): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=16, bias=False)
          (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
          (2): ReLU(inplace=True)
        )
        (1):