In [1]:
import sys
sys.path.append("../scr/utils")


from copy import deepcopy
import time
import platform
from tqdm import tqdm

import torch
import pandas as pd
import numpy as np
from torch.utils.data import DataLoader
import torch.nn.utils.prune as prune

from scr.utils.create_dataset import BirdDataset
from scr.utils.metrics import validation_epoch_end

In [3]:
# Информация о железе, на котором тестируется модель

processor_info = platform.processor()
print("Информация о процессоре:", processor_info)

if torch.cuda.is_available():
    num_gpus = torch.cuda.device_count()
    print("Доступно GPU:", num_gpus)
    for i in range(num_gpus):
        gpu = torch.cuda.get_device_name(i)
        print(f"GPU {i + 1}: {gpu}")   
else:
    print("GPU недоступны на данной системе.")

Информация о процессоре: arm
GPU недоступны на данной системе.


In [4]:
df = pd.read_csv("../data/data.csv")
df_test = df[df.fold == 3].sample(n=100, random_state=42).reset_index(drop=True)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model_base = torch.load("../experiment/14_September_2023_16_29/model_tf_efficientnet_b0_last_version.pt", map_location=device).to(device)
dataset_test = BirdDataset(df=df_test, path_to_folder_with_audio="../data")
valid_loader = DataLoader(dataset_test, batch_size=4)
model_base.eval()
device

device(type='cpu')

## Проверим работу базовой модели

In [5]:
model = deepcopy(model_base)

In [10]:
def get_model_size(model):
    param_size = 0
    for param in model.parameters():
        param_size += param.nelement() * param.element_size()
    buffer_size = 0
    for buffer in model.buffers():
        buffer_size += buffer.nelement() * buffer.element_size()

    size_all_mb = (param_size + buffer_size) / 1024**2
    print('model size: {:.3f}MB'.format(size_all_mb))

In [11]:
def count_zero_weights(model):
    num_zero_weights = 0
    total_weights = 0

    for name, param in model.named_parameters():
        if 'weight' in name:
            num_zero_weights += torch.sum(param == 0).item()
            total_weights += param.numel()

    sparsity = num_zero_weights / total_weights
    print(f"Разреженность весов: {sparsity * 100:.2f}%")
    return sparsity

In [12]:
def get_metric_score(model):
    predicted_labels_list = None
    true_labels_list = None
    metric = validation_epoch_end
    start_time = time.time()


    with torch.no_grad():
        for batch in tqdm(valid_loader):
            X_batch = batch[0].to(device)
            y_batch = batch[1].to(device)
            res = model.forward(X_batch)

            res = res.detach().sigmoid().cpu().numpy()
            y_batch_onehot = y_batch
            y_batch_onehot = y_batch_onehot.unsqueeze(1).detach().cpu().numpy()
            y_batch_onehot = y_batch_onehot.squeeze()

            if predicted_labels_list is None:
                predicted_labels_list = res
                true_labels_list = y_batch_onehot
            else:
                predicted_labels_list = np.concatenate([predicted_labels_list, res], axis=0)
                true_labels_list = np.concatenate([true_labels_list, y_batch_onehot], axis=0)

            del batch, res
        torch.cuda.empty_cache()

    end_time = time.time()


    all_predicted_labels = np.vstack(predicted_labels_list)
    all_true_labels = np.vstack(true_labels_list)
    all_true_labels = np.squeeze(all_true_labels)
    mask = (all_true_labels > 0) & (all_true_labels < 1)
    all_true_labels[mask] = 0
    avg_metric = metric(all_true_labels, all_predicted_labels)
    t = end_time - start_time

    # Мы понимаем, что ко времени работы модели добавляем время обработки батчей и добавления аугментаций к стартовым данным

    print("Наши метрики на нашей базовой модели:")
    print(f"Время работы модели на всем батче {t:.<2g} сек.")
    print(f"Время работы модели на одном сэмпле (AVG) {round(t/ len(dataset_test), 3) } сек.")
    print("Метрики качества:")
    for m in avg_metric:
        print(f"metric {m} : {avg_metric[m]:.<5g}")

In [13]:
count_zero_weights(model=model)

Разреженность весов: 0.00%


0.0

In [14]:
get_model_size(model=model)

model size: 16.738MB


In [15]:
get_metric_score(model=model)

100%|██████████| 25/25 [00:26<00:00,  1.05s/it]

Наши метрики на нашей базовой модели:
Время работы модели на всем батче 26.7708 сек.
Время работы модели на одном сэмпле (AVG) 0.268 сек.
Метрики качества:
metric val_RMAP : 0.699263





## PTDQ  fp32 -> qint8

In [16]:
model = deepcopy(model_base)
torch.backends.quantized.engine = 'qnnpack'
model_int8 = torch.ao.quantization.quantize_dynamic(
    model,  # the original model
    {torch.nn.Linear},  # a set of layers to dynamically quantize
    dtype=torch.qint8  # the target dtype for quantized weights
)

In [20]:
count_zero_weights(model=model_int8)

Разреженность весов: 0.00%


0.0

In [18]:
get_model_size(model=model_int8)

model size: 15.448MB


In [19]:
get_metric_score(model=model_int8)

100%|██████████| 25/25 [00:25<00:00,  1.00s/it]

Наши метрики на нашей базовой модели:
Время работы модели на всем батче 25.1171 сек.
Время работы модели на одном сэмпле (AVG) 0.251 сек.
Метрики качества:
metric val_RMAP : 0.719203





#### Незначительное уменьшение веса модели и метрики, на 5 секунд уменьшилось время на батче

## PTDS  fp32 -> qint8

In [37]:
model = deepcopy(model_base)
model.eval()
model.fuse_model()

AttributeError: 'EfficientNet' object has no attribute 'fuse_model'

In [34]:
model.qconfig = torch.quantization.get_default_qconfig('fbgemm')
torch.quantization.prepare(model, inplace=True);

In [15]:
sample = iter(valid_loader).__next__()[0]
res = model(sample);

In [16]:
torch.quantization.convert(model, inplace=True);

In [17]:
get_model_size(model=model)

model size: 0.414MB


In [18]:
get_metric_score(model=model);

  0%|          | 0/25 [00:00<?, ?it/s]


NotImplementedError: Could not run 'quantized::conv2d.new' with arguments from the 'CPU' backend. This could be because the operator doesn't exist for this backend, or was omitted during the selective/custom build process (if using custom build). If you are a Facebook employee using PyTorch on mobile, please visit https://fburl.com/ptmfixes for possible resolutions. 'quantized::conv2d.new' is only available for these backends: [MPS, QuantizedCPU, BackendSelect, Python, FuncTorchDynamicLayerBackMode, Functionalize, Named, Conjugate, Negative, ZeroTensor, ADInplaceOrView, AutogradOther, AutogradCPU, AutogradCUDA, AutogradXLA, AutogradMPS, AutogradXPU, AutogradHPU, AutogradLazy, AutogradMeta, Tracer, AutocastCPU, AutocastCUDA, FuncTorchBatched, FuncTorchVmapMode, Batched, VmapMode, FuncTorchGradWrapper, PythonTLSSnapshot, FuncTorchDynamicLayerFrontMode, PythonDispatcher].

MPS: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/mps/MPSFallback.mm:39 [backend fallback]
QuantizedCPU: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/native/quantized/cpu/qconv.cpp:1555 [kernel]
BackendSelect: fallthrough registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/core/BackendSelectFallbackKernel.cpp:3 [backend fallback]
Python: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/core/PythonFallbackKernel.cpp:144 [backend fallback]
FuncTorchDynamicLayerBackMode: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/functorch/DynamicLayer.cpp:491 [backend fallback]
Functionalize: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/FunctionalizeFallbackKernel.cpp:280 [backend fallback]
Named: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/core/NamedRegistrations.cpp:7 [backend fallback]
Conjugate: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/ConjugateFallback.cpp:17 [backend fallback]
Negative: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/native/NegateFallback.cpp:19 [backend fallback]
ZeroTensor: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/ZeroTensorFallback.cpp:86 [backend fallback]
ADInplaceOrView: fallthrough registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/core/VariableFallbackKernel.cpp:63 [backend fallback]
AutogradOther: fallthrough registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/core/VariableFallbackKernel.cpp:30 [backend fallback]
AutogradCPU: fallthrough registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/core/VariableFallbackKernel.cpp:34 [backend fallback]
AutogradCUDA: fallthrough registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/core/VariableFallbackKernel.cpp:42 [backend fallback]
AutogradXLA: fallthrough registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/core/VariableFallbackKernel.cpp:46 [backend fallback]
AutogradMPS: fallthrough registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/core/VariableFallbackKernel.cpp:54 [backend fallback]
AutogradXPU: fallthrough registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/core/VariableFallbackKernel.cpp:38 [backend fallback]
AutogradHPU: fallthrough registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/core/VariableFallbackKernel.cpp:67 [backend fallback]
AutogradLazy: fallthrough registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/core/VariableFallbackKernel.cpp:50 [backend fallback]
AutogradMeta: fallthrough registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/core/VariableFallbackKernel.cpp:58 [backend fallback]
Tracer: registered at /Users/runner/work/pytorch/pytorch/pytorch/torch/csrc/autograd/TraceTypeManual.cpp:294 [backend fallback]
AutocastCPU: fallthrough registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/autocast_mode.cpp:487 [backend fallback]
AutocastCUDA: fallthrough registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/autocast_mode.cpp:354 [backend fallback]
FuncTorchBatched: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/functorch/LegacyBatchingRegistrations.cpp:815 [backend fallback]
FuncTorchVmapMode: fallthrough registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/functorch/VmapModeRegistrations.cpp:28 [backend fallback]
Batched: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/LegacyBatchingRegistrations.cpp:1073 [backend fallback]
VmapMode: fallthrough registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/VmapModeRegistrations.cpp:33 [backend fallback]
FuncTorchGradWrapper: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/functorch/TensorWrapper.cpp:210 [backend fallback]
PythonTLSSnapshot: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/core/PythonFallbackKernel.cpp:152 [backend fallback]
FuncTorchDynamicLayerFrontMode: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/functorch/DynamicLayer.cpp:487 [backend fallback]
PythonDispatcher: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/core/PythonFallbackKernel.cpp:148 [backend fallback]


## Global Pruning

In [52]:
model = deepcopy(model_base)
def global_prune(model, pruning_method, amount):
    prunable_parameters = []
    for name, module in model.named_modules():
        if isinstance(module, (torch.nn.Conv2d, torch.nn.Linear)):
            prunable_parameters.append((module, 'weight'))

    pruned_parameters = [(module, param_name) for module, param_name in prunable_parameters]
    prune.global_unstructured(pruned_parameters, pruning_method=pruning_method, amount=amount)

    return pruned_parameters, model

pruned_parameters, model = global_prune(model, prune.L1Unstructured, 0.05)
for module, param_name in pruned_parameters:
    prune.remove(module, "weight")

In [53]:
count_zero_weights(model=model)

Разреженность весов: 4.98%


0.049756669571182265

In [54]:
get_model_size(model=model)

model size: 16.738MB


In [55]:
get_metric_score(model=model);

100%|██████████| 25/25 [00:27<00:00,  1.09s/it]

Наши метрики на нашей базовой модели:
Время работы модели на всем батче 27.2565 сек.
Время работы модели на одном сэмпле (AVG) 0.273 сек.
Метрики качества:
metric val_RMAP : 0.696653





#### Незначительное уменьшение метрики, на 5 секунд уменьшилось время на батче

## Iterative Pruning

In [56]:
model = deepcopy(model_base)
def structured_prune(model):
    prunable_parameters = []
    for name, module in model.named_modules():
        if isinstance(module, (torch.nn.Conv2d, torch.nn.Linear)):
            prunable_parameters.append((module, 'weight'))
            prune.ln_structured(module, name="weight", amount=0.05, n=2, dim=0)
    pruned_parameters = [(module, param_name) for module, param_name in prunable_parameters]
    return pruned_parameters, model

pruned_parameters, model = structured_prune(model)
for module, param_name in pruned_parameters:
    prune.remove(module, "weight")

In [57]:
count_zero_weights(model=model)

Разреженность весов: 4.97%


0.04968598787519235

In [58]:
get_model_size(model=model)

model size: 16.738MB


In [59]:
get_metric_score(model=model);

100%|██████████| 25/25 [00:24<00:00,  1.03it/s]

Наши метрики на нашей базовой модели:
Время работы модели на всем батче 24.3499 сек.
Время работы модели на одном сэмпле (AVG) 0.243 сек.
Метрики качества:
metric val_RMAP : 0.375434





## Random Pruning

In [61]:
model = deepcopy(model_base)
def random_prune(model):
    prunable_parameters = []
    for name, module in model.named_modules():
        if isinstance(module, (torch.nn.Conv2d, torch.nn.Linear)):
            prunable_parameters.append((module, 'weight'))
            prune.random_unstructured(module, name="weight", amount=0.05)
    pruned_parameters = [(module, param_name) for module, param_name in prunable_parameters]
    return pruned_parameters, model

pruned_parameters, model = random_prune(model)
for module, param_name in pruned_parameters:
    prune.remove(module, "weight")

In [62]:
count_zero_weights(model=model)

Разреженность весов: 4.98%


0.04975643782791672

In [63]:
get_model_size(model=model)

model size: 16.738MB


In [64]:
get_metric_score(model=model);

100%|██████████| 25/25 [00:23<00:00,  1.05it/s]

Наши метрики на нашей базовой модели:
Время работы модели на всем батче 23.7246 сек.
Время работы модели на одном сэмпле (AVG) 0.237 сек.
Метрики качества:
metric val_RMAP : 0.013238



