In [1]:
import os
import sys
import random

sys.path.append("../../../")

import yaml
import torch
import argparse
import numpy as np
from typing import Dict
from termcolor import colored
from accelerate import Accelerator
from losses.losses import build_loss_fn
from optimizers.optimizers import build_optimizer
from optimizers.schedulers import build_scheduler
from train_scripts.trainer_ddp import Segmentation_Trainer
from architectures.build_architecture import build_architecture
from dataloaders.build_dataset import build_dataset, build_dataloader


from sklearn.metrics import (
    jaccard_score,
    accuracy_score,
    f1_score,
    recall_score,
    precision_score,
    confusion_matrix,
)

In [2]:
def load_config(config_path: str) -> Dict:
    """loads the yaml config file

    Args:
        config_path (str): _description_

    Returns:
        Dict: _description_
    """
    with open(config_path, "r") as file:
        config = yaml.safe_load(file)
    return config

In [3]:
config = load_config("config.yaml")

In [4]:
# build validation dataset & validataion data loader
valset = build_dataset(
    dataset_type=config["dataset_parameters"]["dataset_type"],
    dataset_args=config["dataset_parameters"]["val_dataset_args"],
)
valloader = build_dataloader(
    dataset=valset,
    dataloader_args=config["dataset_parameters"]["val_dataloader_args"],
    config=config,
    train=False,
)

In [10]:
model = build_architecture(config)
model = model.to("cuda:2")
model = model.eval()

In [11]:
import torchvision.models as models
import torch
from ptflops import get_model_complexity_info

with torch.cuda.device(0):
    net = model
    macs, params = get_model_complexity_info(
        net, (4, 128, 128, 128), as_strings=True, print_per_layer_stat=False, verbose=False
    )
    print("{:<30}  {:<8}".format("Computational complexity: ", macs))
    print("{:<30}  {:<8}".format("Number of parameters: ", params))

Computational complexity:       12.8 GMac
Number of parameters:           4.51 M  


In [12]:
import torch
from torch.profiler import profile, record_function, ProfilerActivity

In [13]:
inputs = torch.randn(1, 4, 128, 128, 128)

pytorch_total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print("\nmodel parameter count = ", pytorch_total_params)


model parameter count =  4511939


In [14]:
def estimate_memory_inference(
    model, sample_input, batch_size=1, use_amp=False, device=0
):
    """Predict the maximum memory usage of the model.
    Args:
        optimizer_type (Type): the class name of the optimizer to instantiate
        model (nn.Module): the neural network model
        sample_input (torch.Tensor): A sample input to the network. It should be
            a single item, not a batch, and it will be replicated batch_size times.
        batch_size (int): the batch size
        use_amp (bool): whether to estimate based on using mixed precision
        device (torch.device): the device to use
    """
    # Reset model and optimizer
    model.cpu()
    a = torch.cuda.memory_allocated(device)
    model.to(device)
    b = torch.cuda.memory_allocated(device)
    model_memory = b - a
    model_input = sample_input  # .unsqueeze(0).repeat(batch_size, 1)
    output = model(model_input.to(device)).sum()
    total_memory = model_memory

    return total_memory


estimate_memory_inference(model, inputs)

18072064

In [19]:
model.cpu()
with profile(activities=[ProfilerActivity.CPU], record_shapes=True) as prof:
    with record_function("model_inference"):
        model(inputs)

print(prof.key_averages().table(sort_by="cpu_time_total", row_limit=10))

STAGE:2024-01-17 05:53:07 119795:119795 ActivityProfilerController.cpp:312] Completed Stage: Warm Up


--------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  
                            Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg    # of Calls  
--------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  
                 model_inference        39.36%     348.252ms       100.00%     884.701ms     884.701ms             1  
                    aten::conv3d         0.20%       1.735ms        23.09%     204.275ms      11.349ms            18  
               aten::convolution         0.05%     430.000us        23.07%     204.130ms      11.341ms            18  
              aten::_convolution         0.03%     296.000us        23.02%     203.700ms      11.317ms            18  
        aten::mkldnn_convolution        22.67%     200.561ms        22.71%     200.906ms      14.350ms            14  
                     aten::copy_        13.06%  

STAGE:2024-01-17 05:53:08 119795:119795 ActivityProfilerController.cpp:318] Completed Stage: Collection
STAGE:2024-01-17 05:53:08 119795:119795 ActivityProfilerController.cpp:322] Completed Stage: Post Processing


In [25]:
model = model.to("cuda:2")
inputs = torch.randn(1, 4, 128, 128, 128).to("cuda:2")

with profile(
    activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA], record_shapes=True
) as prof:
    with record_function("model_inference"):
        model(inputs)

print(prof.key_averages().table(sort_by="cuda_time_total", row_limit=10))

STAGE:2024-01-17 05:54:34 119795:119795 ActivityProfilerController.cpp:312] Completed Stage: Warm Up
STAGE:2024-01-17 05:54:34 119795:119795 ActivityProfilerController.cpp:318] Completed Stage: Collection
STAGE:2024-01-17 05:54:34 119795:119795 ActivityProfilerController.cpp:322] Completed Stage: Post Processing


-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                   Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                        model_inference        26.86%       4.067ms        99.83%      15.114ms      15.114ms       0.000us         0.00%       7.423ms       7.423ms             1  
                                      aten::convolution         1.41%     214.000us        20.34%       3.080ms     171.111us       0.000us         0.00%       1.993ms     110.722us            18  
         