<a href="https://colab.research.google.com/github/StefanoGiacomelli/e2panns/blob/main/E2PANNs_Model_Profiling.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

#%cd /content/drive/MyDrive/Stefano_Giacomelli/Tecnojest/

Mounted at /content/drive
/content/drive/.shortcut-targets-by-id/1T5OivhNEVmsivuZiIvY5-RrXh2JfbBM2/Tecnojest


In [None]:
!pip install epanns-inference

Collecting epanns-inference
  Downloading epanns_inference-0.1.1-py3-none-any.whl.metadata (725 bytes)
Collecting torchlibrosa (from epanns-inference)
  Downloading torchlibrosa-0.1.0-py3-none-any.whl.metadata (3.5 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch->epanns-inference)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch->epanns-inference)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch->epanns-inference)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch->epanns-inference)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch->epanns-inference)
  Downloading nvidia_cub

In [None]:
import os
import sys
import shutil
import subprocess
import glob
import platform
import signal
import psutil
import tracemalloc
from queue import Queue
import threading
from concurrent.futures import ThreadPoolExecutor
import time
from tqdm import tqdm
import random
import logging
import json
import csv
import numpy as np
import torch
from scipy.stats import iqr, skew, kurtosis

# Profiling functions

(it replicates the profiling environment on local hardware - w. minor adjustments)

## Inference

In [None]:
!pip install codecarbon==2.4.2

from codecarbon import EmissionsTracker

Collecting codecarbon==2.4.2
  Downloading codecarbon-2.4.2-py3-none-any.whl.metadata (8.4 kB)
Collecting arrow (from codecarbon==2.4.2)
  Downloading arrow-1.3.0-py3-none-any.whl.metadata (7.5 kB)
Collecting rapidfuzz (from codecarbon==2.4.2)
  Downloading rapidfuzz-3.13.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting types-python-dateutil>=2.8.10 (from arrow->codecarbon==2.4.2)
  Downloading types_python_dateutil-2.9.0.20241206-py3-none-any.whl.metadata (2.1 kB)
Downloading codecarbon-2.4.2-py3-none-any.whl (494 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m494.9/494.9 kB[0m [31m9.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading arrow-1.3.0-py3-none-any.whl (66 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m66.4/66.4 kB[0m [31m5.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading rapidfuzz-3.13.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
def set_seeds(seed=42):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    os.environ["PYTHONHASHSEED"] = str(seed)


# Hardware Profiling ------------------------------------------------------------------------------------------
def your_gpu(verbose=True, save_path=None):
    """
    Verify NVIDIA GPU(s) availability, return PyTorch device string and detailed GPU info.
    Optionally saves the information into a JSON file if save_path is provided.
    """

    def bytes_to_gb(bytes_val):
        return bytes_val * 1e-9

    def fetch_gpu_info(gpu_id):
        gpu_info = {"id": gpu_id}
        device = f"cuda:{gpu_id}"
        gpu_info["name"] = torch.cuda.get_device_name(gpu_id)
        try:
            free_mem, total_mem = torch.cuda.mem_get_info(device)
            gpu_info["total_memory_gb"] = bytes_to_gb(total_mem)
            gpu_info["free_memory_gb"] = bytes_to_gb(free_mem)
            if verbose:
                print(f"[GPU-{gpu_id}] Name: {gpu_info['name']}, Free: {gpu_info['free_memory_gb']:.2f} GB, Total: {gpu_info['total_memory_gb']:.2f} GB")
        except Exception as e:
            gpu_info["total_memory_gb"] = None
            gpu_info["free_memory_gb"] = None
            if verbose:
                print(f"[GPU-{gpu_id}] Memory info retrieval failed: {e}")
        return gpu_info

    gpu_details = {"gpu_driver_version": None,
                   "cuda_compiler_version": None,
                   "count": 0,
                   "devices": []}

    try:
        if torch.cuda.is_available():
            device_string = f"cuda:{torch.cuda.current_device()}"
            gpu_details["count"] = torch.cuda.device_count()

            with ThreadPoolExecutor() as executor:
                gpu_details["devices"] = list(executor.map(fetch_gpu_info, range(gpu_details["count"])))

            if shutil.which("nvidia-smi"):
                try:
                    smi_output = subprocess.check_output(["nvidia-smi"], encoding="utf-8")
                    for line in smi_output.splitlines():
                        if "Driver Version" in line:
                            parts = line.split()
                            idx = parts.index("Version:") + 1 if "Version:" in parts else None
                            if idx and idx < len(parts):
                                gpu_details["gpu_driver_version"] = parts[idx]
                            break
                    if verbose:
                        print(f"Driver Version: {gpu_details['gpu_driver_version']}")
                except Exception as e:
                    if verbose:
                        print(f"nvidia-smi output parsing failed: {e}")
            else:
                if verbose:
                    print("'nvidia-smi' not found.")

            if shutil.which("nvcc"):
                try:
                    nvcc_output = subprocess.check_output(["nvcc", "--version"], encoding="utf-8")
                    for line in nvcc_output.splitlines():
                        if "release" in line:
                            idx = line.index("release") + len("release")
                            cuda_version = line[idx:].split(",")[0].strip()
                            gpu_details["cuda_compiler_version"] = cuda_version
                            break
                    if verbose:
                        print(f"CUDA Compiler Version: {gpu_details['cuda_compiler_version']}")
                except Exception as e:
                    if verbose:
                        print(f"nvcc output parsing failed: {e}")
            else:
                if verbose:
                    print("'nvcc' not found.")

            if verbose:
                print(f"PyTorch Version: {torch.__version__}")

        else:
            device_string = "cpu"
            if verbose:
                print("No GPU detected. Using CPU.")
                print(f"PyTorch Version: {torch.__version__}")

    except Exception as e:
        device_string = "cpu"
        if verbose:
            print(f"Error during GPU detection: {e}")

    # Save GPU info to JSON
    if save_path is not None:
        try:
            with open(save_path, "w") as f:
                json.dump({"device": device_string, "gpu_info": gpu_details}, f, indent=4)
            if verbose:
                print(f"Saved GPU info JSON to: {save_path}")
        except Exception as e:
            if verbose:
                print(f"Failed to save GPU info JSON: {e}")

    return device_string, gpu_details


def your_hardware(verbose=True, save_path=None):
    """
    Inspect and log hardware details (CPU, RAM, Disk) with cross-platform support.
    Optionally saves the results into a JSON file if save_path is provided.
    """
    hardware_info = {}

    # CPU Info
    def get_cpu_info():
        if platform.system() == "Linux":
            try:
                output = subprocess.check_output(["cat", "/proc/cpuinfo"], encoding="utf-8")
                if verbose:
                    print("CPU Info retrieved from /proc/cpuinfo")
                return parse_cpu_info_linux(output)
            except Exception as e:
                if verbose:
                    print(f"Failed to retrieve CPU info: {e}")
        return {}

    def parse_cpu_info_linux(output):
        cpu_model = None
        cpu_count = 0
        cpuinfo_frequencies = {}

        for line in output.splitlines():
            if "model name" in line:
                if cpu_model is None:
                    cpu_model = line.split(":")[1].strip()
                cpu_count += 1

        # Try to read per-core frequencies using psutil
        try:
            freqs = psutil.cpu_freq(percpu=True)
            if freqs:
                freqs_mhz = {}
                for idx, f in enumerate(freqs):
                    if f:  # psutil might return None for a core
                        freqs_mhz[f"cpu{idx}"] = f.max  # Max frequency in MHz
                cpuinfo_frequencies = freqs_mhz
        except Exception:
            cpuinfo_frequencies = {}

        return {"model_name": cpu_model,
                "physical_cores": cpu_count,
                "frequencies_mhz": cpuinfo_frequencies if cpuinfo_frequencies else "Not Available"}

    hardware_info["cpu"] = get_cpu_info()

    # RAM Info
    def get_ram_info():
        try:
            virtual_mem = psutil.virtual_memory()
            return {"total_memory_gb": round(virtual_mem.total / 1e9, 2),
                    "available_memory_gb": round(virtual_mem.available / 1e9, 2),
                    "used_memory_gb": round(virtual_mem.used / 1e9, 2),
                    "percent_used": virtual_mem.percent}
        except Exception as e:
            if verbose:
                print(f"Failed to retrieve RAM info: {e}")
            return {}

    hardware_info["ram"] = get_ram_info()

    # Disk Info
    def get_disk_info():
        try:
            disks = []
            if platform.system() in ["Linux", "Darwin"]:
                if shutil.which("df"):
                    output = subprocess.check_output(["df", "-h"], encoding="utf-8")
                    lines = output.splitlines()
                    headers = lines[0].split()
                    for line in lines[1:]:
                        if line.strip():
                            parts = line.split()
                            disk_info = dict(zip(headers, parts))
                            disks.append(disk_info)
            elif platform.system() == "Windows":
                for partition in psutil.disk_partitions():
                    usage = psutil.disk_usage(partition.mountpoint)
                    disks.append({"device": partition.device,
                                  "mountpoint": partition.mountpoint,
                                  "fstype": partition.fstype,
                                  "total_gb": round(usage.total / 1e9, 2),
                                  "used_gb": round(usage.used / 1e9, 2),
                                  "free_gb": round(usage.free / 1e9, 2),
                                  "percent_used": usage.percent})
            return disks
        except Exception as e:
            if verbose:
                print(f"Failed to retrieve disk info: {e}")
            return []

    hardware_info["disks"] = get_disk_info()

    # Optional: Print Hardware summary
    if verbose:
        print(f"Hardware Summary: {json.dumps(hardware_info, indent=4)}")

    # Optional: Save Hardware summary to JSON
    if save_path is not None:
        try:
            with open(save_path, "w") as f:
                json.dump(hardware_info, f, indent=4)
            if verbose:
                print(f"Saved hardware info JSON to: {save_path}")
        except Exception as e:
            if verbose:
                print(f"Failed to save hardware info JSON: {e}")

    return hardware_info


# Units Monitoring Functions ----------------------------------------------------------------------------------
cpu_usage_samples = Queue()
cpu_monitoring = threading.Event()
gpu_usage_samples = Queue()
gpu_monitoring = threading.Event()


def monitor_cpu_usage():
    """
    Continuously monitor CPU resources usage and append utilization samples to a thread-safe queue.

    :global cpu_usage_samples: A thread-safe queue to store CPU usage percentages.
    :type cpu_usage_samples: Queue
    :global cpu_monitoring: A thread-safe event to control the monitoring loop.
    :type cpu_monitoring: threading.Event
    """
    if not cpu_monitoring.is_set():
        cpu_monitoring.set()

    while cpu_monitoring.is_set():
        cpu_usage_samples.put(psutil.cpu_percent(interval=0.1))


def monitor_gpu_usage():
    """
    Continuously monitor GPU utilization and append samples to a thread-safe queue.

    :global gpu_usage_samples: A thread-safe queue to store GPU usage percentages.
    :type gpu_usage_samples: Queue
    :global gpu_monitoring: A thread-safe event to control the monitoring loop.
    :type gpu_monitoring: threading.Event
    """
    if not gpu_monitoring.is_set():
        gpu_monitoring.set()

    while gpu_monitoring.is_set():
        try:
            result = subprocess.run(["nvidia-smi", "--query-gpu=utilization.gpu", "--format=csv,noheader,nounits"],
                                    stdout=subprocess.PIPE,
                                    stderr=subprocess.PIPE,
                                    universal_newlines=True)
            if result.returncode == 0:
                utilization = int(result.stdout.strip())
                gpu_usage_samples.put(utilization)
        except Exception as e:
            gpu_usage_samples.put(0)  # Assume 0% usage if query fails

        time.sleep(0.1)


# Model profiling functions -----------------------------------------------------------------------------------
def min_binary_search(model, sample_rate, device, save_path, verbose=True):
    """
    Find the minimum input duration the model can process without error using binary search.
    Save (or append) the result to a specified JSON file.

    :param model: Your PyTorch model.
    :param sample_rate: Audio sample rate (e.g., 32000).
    :param device: Device string ('cpu' or 'cuda').
    :param save_path: Filepath to save JSON results.
    :param verbose: Whether to print progress info.
    """
    def generate_input(duration_samples, device):
        return torch.randn((1, duration_samples), device=device) * 2 - 1.

    max_dur = int(sample_rate * 10)  # 10 seconds max
    low, high = 1, max_dur
    total_iterations = high - low + 1

    model.eval()
    with torch.inference_mode():
        with tqdm(total=total_iterations, desc="MIN Input Size Binary Search") as pbar:
            i = 0
            while low < high and (high - low) > 1:
                mid = (high + low) // 2
                try:
                    set_seeds(42)
                    x = generate_input(mid, device)
                    output = model(x.float())
                    high = mid - 1
                except Exception as e:
                    low = mid
                i += 1
                completed_iterations = total_iterations - (high - low + 1)
                pbar.n = completed_iterations
                pbar.refresh()

    # Final results
    min_samples = high
    min_seconds = min_samples / sample_rate

    results_entry = {"min_input_size": {"samples": int(min_samples),
                                        "seconds": float(min_seconds),
                                        "sample_rate": int(sample_rate),
                                        "binary_search_iterations": int(i)}}

    if verbose:
        print(f"Results: {json.dumps(results_entry, indent=4)}")

    # Save results to JSON
    if save_path is not None:
        try:
            if os.path.exists(save_path):
                with open(save_path, "r") as f:
                    existing_data = json.load(f)
                if not isinstance(existing_data, dict):
                    existing_data = {}
            else:
                existing_data = {}

            # Update
            existing_data.update(results_entry)

            with open(save_path, "w") as f:
                json.dump(existing_data, f, indent=4)

            if verbose:
                print(f"Saved (updated) profiling results to: {save_path}")
        except Exception as e:
            if verbose:
                print(f"Failed to save profiling results: {e}")

    return results_entry


def overall_time(model, sample_rate, device, save_path, iterations=100, input_duration_sec=10.0, verbose=True, npz_save_path=None):
    """
    Profile the overall wall clock time for the model inference (sleep included).
    Save (or append) the result to a specified JSON file and optionally save NPZ arrays.

    :param model: Your PyTorch model.
    :param sample_rate: Audio sample rate (e.g., 32000).
    :param device: Device string ('cpu' or 'cuda').
    :param save_path: Filepath to save JSON results.
    :param iterations: Number of iterations to average.
    :param input_duration_sec: Duration of the input (seconds). Default 10s.
    :param verbose: Whether to print progress info.
    :param npz_save_path: Optional path to save .npz compressed timings.
    """
    def generate_input(duration_samples, device):
        return torch.randn((1, duration_samples), device=device) * 2 - 1.

    samples = int(sample_rate * input_duration_sec)

    model.eval()
    model.to(device)

    timings = []

    with torch.inference_mode():
        x = generate_input(samples, device)

        for _ in tqdm(range(iterations), desc="CPU Overall Time Profiling"):
            set_seeds(42)
            start_time = time.perf_counter()
            output = model(x.float())
            torch.cuda.synchronize() if device.startswith("cuda") else None
            elapsed = time.perf_counter() - start_time
            timings.append(elapsed)

    timings = np.array(timings)

    results_entry = {"cpu_overall_time": {"iterations": int(iterations),
                                          "input_duration_sec": float(input_duration_sec),
                                          "max_sec": float(np.max(timings)),
                                          "min_sec": float(np.min(timings)),
                                          "mean_sec": float(np.mean(timings)),
                                          "std_dev_sec": float(np.std(timings, ddof=1)),
                                          "median_sec": float(np.median(timings)),
                                          "percentiles": {"25th_perc": float(np.percentile(timings, 25)),
                                                          "33th_perc": float(np.percentile(timings, 33)),
                                                          "66th_perc": float(np.percentile(timings, 66)),
                                                          "75th_perc": float(np.percentile(timings, 75))},
                                          "iqr_sec": float(iqr(timings)),
                                          "skewness": float(skew(timings)),
                                          "kurtosis": float(kurtosis(timings))}}

    if verbose:
        print(f"Results: {json.dumps(results_entry, indent=4)}")

    # Save results to JSON
    if save_path is not None:
        try:
            if os.path.exists(save_path):
                with open(save_path, "r") as f:
                    existing_data = json.load(f)
                if not isinstance(existing_data, dict):
                    existing_data = {}
            else:
                existing_data = {}

            existing_data.update(results_entry)

            with open(save_path, "w") as f:
                json.dump(existing_data, f, indent=4)

            if verbose:
                print(f"Saved (updated) profiling results to: {save_path}")
        except Exception as e:
            if verbose:
                print(f"Failed to save profiling results: {e}")

    # Save results to .NPZ
    if npz_save_path is not None:
        try:
            np.savez_compressed(npz_save_path, values=timings, features=np.array(list(results_entry["cpu_overall_time"].items())))
            if verbose:
                print(f"Saved compressed NPZ data to: {npz_save_path}")
        except Exception as e:
            if verbose:
                print(f"Failed to save NPZ: {e}")

    return results_entry


def process_time(model, sample_rate, device, save_path, iterations=100, input_duration_sec=10.0, verbose=True, npz_save_path=None):
    """
    Profile the CPU process time (excluding sleep) for model inference.
    Save (or append) the result to a specified JSON file and optionally save NPZ arrays.

    :param model: Your PyTorch model.
    :param sample_rate: Audio sample rate (e.g., 32000).
    :param device: Device string ('cpu' or 'cuda').
    :param save_path: Filepath to save JSON results.
    :param iterations: Number of iterations to average.
    :param input_duration_sec: Duration of the input (seconds). Default 10s.
    :param verbose: Whether to print progress info.
    :param npz_save_path: Optional path to save .npz compressed timings.
    """
    def generate_input(duration_samples, device):
        return torch.randn((1, duration_samples), device=device) * 2 - 1.

    samples = int(sample_rate * input_duration_sec)

    model.eval()
    model.to(device)

    timings = []

    with torch.inference_mode():
        x = generate_input(samples, device)

        for _ in tqdm(range(iterations), desc="CPU Process Time Profiling"):
            set_seeds(42)
            start_time = time.process_time()
            output = model(x.float())
            torch.cuda.synchronize() if device.startswith("cuda") else None
            elapsed = time.process_time() - start_time
            timings.append(elapsed)

    timings = np.array(timings)

    results_entry = {"cpu_process_time": {"iterations": int(iterations),
                                          "input_duration_sec": float(input_duration_sec),
                                          "max_sec": float(np.max(timings)),
                                          "min_sec": float(np.min(timings)),
                                          "mean_sec": float(np.mean(timings)),
                                          "std_dev_sec": float(np.std(timings, ddof=1)),
                                          "median_sec": float(np.median(timings)),
                                          "percentiles": {"25th_perc": float(np.percentile(timings, 25)),
                                                          "33th_perc": float(np.percentile(timings, 33)),
                                                          "66th_perc": float(np.percentile(timings, 66)),
                                                          "75th_perc": float(np.percentile(timings, 75))},
                                          "iqr_sec": float(iqr(timings)),
                                          "skewness": float(skew(timings)),
                                          "kurtosis": float(kurtosis(timings))}}

    if verbose:
        print(f"Results: {json.dumps(results_entry, indent=4)}")

    # Save results to JSON
    if save_path is not None:
        try:
            if os.path.exists(save_path):
                with open(save_path, "r") as f:
                    existing_data = json.load(f)
                if not isinstance(existing_data, dict):
                    existing_data = {}
            else:
                existing_data = {}

            existing_data.update(results_entry)

            with open(save_path, "w") as f:
                json.dump(existing_data, f, indent=4)

            if verbose:
                print(f"Saved (updated) profiling results to: {save_path}")
        except Exception as e:
            if verbose:
                print(f"Failed to save profiling results: {e}")

    # Save results to .NPZ
    if npz_save_path is not None:
        try:
            np.savez_compressed(npz_save_path, values=timings, features=np.array(list(results_entry["cpu_process_time"].items())))
            if verbose:
                print(f"Saved compressed NPZ data to: {npz_save_path}")
        except Exception as e:
            if verbose:
                print(f"Failed to save NPZ: {e}")

    return results_entry


def memory(model, sample_rate, device, save_path=None, iterations=100, input_duration_sec=10.0, verbose=True, npz_save_path=None):
    """
    Profile peak RAM memory during model inference using tracemalloc.
    Save (or update) the results into a specified JSON file and optionally into compressed NPZ arrays.
    Colab-friendly: no cache/cpu-cycle profiling.
    """

    def generate_input(duration_samples, device):
        return torch.randn((1, duration_samples), device=device) * 2 - 1.

    samples = int(sample_rate * input_duration_sec)

    model.eval()
    model.to(device)

    with torch.inference_mode():
        x = generate_input(samples, device)

        # Start tracemalloc for memory profiling
        tracemalloc.start()

        # Model inference
        for _ in tqdm(range(iterations), desc="Memory Profiling Only"):
            _ = model(x.float())

        # Stop tracemalloc
        current, peak = tracemalloc.get_traced_memory()
        tracemalloc.stop()

    results_entry = {"memory_usage": {"iterations": int(iterations),
                                      "input_duration_sec": float(input_duration_sec),
                                      "current_bytes": int(current),
                                      "peak_bytes": int(peak),
                                      "current_megabytes": round(current / (1024 ** 2), 4),
                                      "peak_megabytes": round(peak / (1024 ** 2), 4)}}

    if verbose:
        print(json.dumps(results_entry, indent=4))

    # Save results to JSON
    if save_path is not None:
        try:
            if os.path.exists(save_path):
                with open(save_path, "r") as f:
                    existing_data = json.load(f)
                if not isinstance(existing_data, dict):
                    existing_data = {}
            else:
                existing_data = {}

            existing_data.update(results_entry)

            with open(save_path, "w") as f:
                json.dump(existing_data, f, indent=4)

            if verbose:
                print(f"Saved (updated) memory profiling results to: {save_path}")
        except Exception as e:
            if verbose:
                print(f"Failed to save memory profiling results: {e}")

    # Save results to .npz
    if npz_save_path is not None:
        try:
            np.savez_compressed(npz_save_path,
                                peak_memory=np.array([current, peak]))
            if verbose:
                print(f"Saved compressed NPZ memory data to: {npz_save_path}")
        except Exception as e:
            if verbose:
                print(f"Failed to save NPZ: {e}")


def cpu_usage(model, sample_rate, device, save_path, iterations=100, input_duration_sec=10.0, verbose=True, npz_save_path=None):
    """
    Profile CPU usage percentage during model inference.
    Save (or append) the results into a specified JSON file and optionally into compressed NPZ arrays.

    :param model: Your PyTorch model.
    :param sample_rate: Audio sample rate (e.g., 32000).
    :param device: Device string ('cpu' or 'cuda').
    :param save_path: Path to save the JSON results.
    :param iterations: Number of iterations to average.
    :param input_duration_sec: Duration of the input (seconds). Default 10s.
    :param verbose: Whether to print progress info.
    :param npz_save_path: Optional path to save .npz compressed results.
    """
    def generate_input(duration_samples, device):
        return torch.randn((1, duration_samples), device=device) * 2 - 1.

    samples = int(sample_rate * input_duration_sec)

    model.eval()
    model.to(device)

    with torch.inference_mode():
        x = generate_input(samples, device)

        # Start CPU usage monitoring in a separate thread
        try:
            cpu_monitoring.set()
            monitor_thread = threading.Thread(target=monitor_cpu_usage)
            monitor_thread.start()

            # Run model inference
            for _ in tqdm(range(iterations), desc="CPU Usage Profiling"):
                _ = model(x.float())

        except Exception as e:
            if verbose:
                print(f"[ERROR] During CPU usage monitoring: {e}")

        finally:
            cpu_monitoring.clear()
            monitor_thread.join()

    # Process CPU usage samples
    cpu_perc_samples = []
    while not cpu_usage_samples.empty():
        cpu_perc_samples.append(cpu_usage_samples.get())

    avg_cpu_usage = sum(cpu_perc_samples) / len(cpu_perc_samples) if cpu_perc_samples else 0
    peak_cpu_usage = max(cpu_perc_samples) if cpu_perc_samples else 0

    results_entry = {"cpu_usage": {"iterations": int(iterations),
                                   "input_duration_sec": float(input_duration_sec),
                                   "avg_cpu_usage_percent": round(avg_cpu_usage, 2),
                                   "peak_cpu_usage_percent": round(peak_cpu_usage, 2)}}

    if verbose:
        print(f"Results: {json.dumps(results_entry, indent=4)}")

    # Save results to JSON
    if save_path is not None:
        try:
            if os.path.exists(save_path):
                with open(save_path, "r") as f:
                    existing_data = json.load(f)
                if not isinstance(existing_data, dict):
                    existing_data = {}
            else:
                existing_data = {}

            existing_data.update(results_entry)

            with open(save_path, "w") as f:
                json.dump(existing_data, f, indent=4)

            if verbose:
                print(f"Saved (updated) CPU usage results to: {save_path}")
        except Exception as e:
            if verbose:
                print(f"Failed to save CPU usage results: {e}")

    # Save results to NPZ (optional)
    if npz_save_path is not None:
        try:
            np.savez_compressed(npz_save_path,
                                cpu_usage_samples=np.array(cpu_perc_samples),
                                features=np.array(list(results_entry["cpu_usage"].items())))
            if verbose:
                print(f"Saved compressed NPZ CPU usage data to: {npz_save_path}")
        except Exception as e:
            if verbose:
                print(f"Failed to save CPU usage NPZ: {e}")

    return results_entry


def energy_co2(model, sample_rate, device, save_path=None, iterations=100, input_duration_sec=10.0, verbose=True, npz_save_path=None):
    """
    Profile energy consumption and CO₂ emissions during model inference using CodeCarbon.
    Save results into a specified JSON file (append behavior) and optionally into compressed NPZ arrays.
    Runtime printing only. Colab-friendly version.
    """
    def generate_input(duration_samples, device):
        return torch.randn((1, duration_samples), device=device) * 2 - 1.

    samples = int(sample_rate * input_duration_sec)
    model.eval()
    model.to(device)

    save_dir = os.path.dirname(save_path) if save_path else "./"
    os.makedirs(save_dir, exist_ok=True)

    # Setup CodeCarbon tracker
    energy_tracker = EmissionsTracker(project_name="energy_emissions_colab",
                                      tracking_mode="machine",
                                      save_to_file=True,
                                      output_dir=save_dir,
                                      output_file="energy_emissions.csv",
                                      measure_power_secs=0.1)

    with torch.inference_mode():
        x = generate_input(samples, device)

        energy_tracker.start()

        for i in tqdm(range(iterations), desc="Energy/CO₂ Emissions Profiling"):
            energy_tracker.start_task(f"Run-{i+1}")
            _ = model(x.float())
            energy_tracker.stop_task(f"Run-{i+1}")

        energy_tracker.stop()

    # Read and process results
    emissions_csv_path = os.path.join(save_dir, "energy_emissions.csv")
    if not os.path.exists(emissions_csv_path):
        raise FileNotFoundError(f"No emissions CSV file found at {emissions_csv_path}.")

    emissions_rate_values = []
    cpu_energy_values = []
    ram_energy_values = []

    with open(emissions_csv_path, 'r') as file:
        reader = csv.DictReader(file)
        for row in reader:
            emissions_rate_values.append(float(row.get('emissions_rate', 0)))
            cpu_energy_values.append(float(row.get('cpu_energy', 0)))
            ram_energy_values.append(float(row.get('ram_energy', 0)))

    results_entry = {"energy_consumption": {"iterations": int(iterations),
                                            "input_duration_sec": float(input_duration_sec),
                                            "avg_emission_rate_gCO2eq_per_sec": np.mean(emissions_rate_values) if emissions_rate_values else 0,
                                            "avg_cpu_energy_kWh": np.mean(cpu_energy_values) if cpu_energy_values else 0,
                                            "avg_ram_energy_kWh": np.mean(ram_energy_values) if ram_energy_values else 0}}

    if verbose:
        print(json.dumps(results_entry, indent=4))

    # Save results to JSON (append if exists)
    if save_path is not None:
        try:
            if os.path.exists(save_path):
                with open(save_path, "r") as f:
                    existing_data = json.load(f)
                if not isinstance(existing_data, dict):
                    existing_data = {}
            else:
                existing_data = {}

            # Merge new results
            existing_data.update(results_entry)

            with open(save_path, "w") as f:
                json.dump(existing_data, f, indent=4)

            if verbose:
                print(f"Saved (updated) energy profiling results to: {save_path}")
        except Exception as e:
            if verbose:
                print(f"Failed to save JSON: {e}")

    # Save NPZ (optional)
    if npz_save_path is not None:
        try:
            np.savez_compressed(npz_save_path,
                                emissions_rate=np.array(emissions_rate_values),
                                cpu_energy=np.array(cpu_energy_values),
                                ram_energy=np.array(ram_energy_values))
            if verbose:
                print(f"Saved compressed NPZ energy data to: {npz_save_path}")

        except Exception as e:
            if verbose:
                print(f"Failed to save NPZ: {e}")

    return results_entry


def cuda_time(model, sample_rate, device, save_path, iterations=100, input_duration_sec=10.0, verbose=True, npz_save_path=None):
    """
    Measure CUDA event timing (GPU only) for model inference.

    :param model: Your PyTorch model.
    :param sample_rate: Audio sample rate (e.g., 32000).
    :param device: Device string ('cuda' required).
    :param save_path: Path to save the JSON results.
    :param iterations: Number of iterations to average.
    :param input_duration_sec: Input duration in seconds.
    :param verbose: Print verbose output.
    :param npz_save_path: Optional path to save .npz results.
    """
    assert device.startswith("cuda"), "CUDA Event profiling requires a GPU device."

    def generate_input(duration_samples, device):
        return torch.randn((1, duration_samples), device=device) * 2 - 1.

    samples = int(sample_rate * input_duration_sec)
    model.eval()
    model.to(device)

    timings = []
    start = torch.cuda.Event(enable_timing=True)
    end = torch.cuda.Event(enable_timing=True)

    with torch.inference_mode():
        x = generate_input(samples, device)

        for _ in tqdm(range(iterations), desc="CUDA Event Timing"):
            set_seeds(42)
            start.record()
            _ = model(x.float())
            end.record()
            torch.cuda.synchronize()
            timings.append(start.elapsed_time(end) / 1000.)  # Convert ms -> seconds

    timings = np.array(timings)

    results_entry = {"cuda_time": {"iterations": int(iterations),
                                   "input_duration_sec": float(input_duration_sec),
                                   "max_sec": float(np.max(timings)),
                                   "min_sec": float(np.min(timings)),
                                   "mean_sec": float(np.mean(timings)),
                                   "std_dev_sec": float(np.std(timings, ddof=1)),
                                   "median_sec": float(np.median(timings)),
                                   "percentiles": {f"{p}th_perc": float(np.percentile(timings, p)) for p in [25, 33, 66, 75]},
                                   "iqr_sec": float(iqr(timings)),
                                   "skewness": float(skew(timings)),
                                   "kurtosis": float(kurtosis(timings))}}

    if verbose:
        print(f"Results: {json.dumps(results_entry, indent=4)}")

    # Save JSON
    if save_path:
        try:
            if os.path.exists(save_path):
                with open(save_path, 'r') as f:
                    existing = json.load(f)
                if not isinstance(existing, dict):
                    existing = {}
            else:
                existing = {}
            existing.update(results_entry)
            with open(save_path, 'w') as f:
                json.dump(existing, f, indent=4)
            if verbose:
                print(f"Saved CUDA Timing to: {save_path}")
        except Exception as e:
            if verbose:
                print(f"Failed to save CUDA Timing JSON: {e}")

    # Save NPZ
    if npz_save_path:
        try:
            np.savez_compressed(npz_save_path, timings=timings)
            if verbose:
                print(f"Saved compressed CUDA Timings to: {npz_save_path}")
        except Exception as e:
            if verbose:
                print(f"Failed to save CUDA Times NPZ: {e}")

    return results_entry


def e2e_inference_time(model, sample_rate, device, save_path, iterations=100, input_duration_sec=10.0, verbose=True, npz_save_path=None):
    """
    Measure End-to-End (CPU+GPU) inference timing.

    :param model: Your PyTorch model.
    :param sample_rate: Audio sample rate (e.g., 32000).
    :param device: Device string ('cpu' or 'cuda').
    :param save_path: Path to save the JSON results.
    :param iterations: Number of iterations.
    :param input_duration_sec: Input duration in seconds.
    :param verbose: Print verbose output.
    :param npz_save_path: Optional path to save .npz results.
    """
    def generate_input(duration_samples, device):
        return torch.randn((1, duration_samples), device=device) * 2 - 1.

    samples = int(sample_rate * input_duration_sec)
    model.eval()
    model.to(device)

    timings = []

    with torch.inference_mode():
        x = generate_input(samples, device)

        for _ in tqdm(range(iterations), desc="E2E Inference Timing"):
            set_seeds(42)
            start = time.perf_counter()
            _ = model(x.float())
            if device.startswith("cuda"):
                torch.cuda.synchronize()
            timings.append(time.perf_counter() - start)

    timings = np.array(timings)

    results_entry = {"e2e_inference_time": {"iterations": int(iterations),
                                            "input_duration_sec": float(input_duration_sec),
                                            "max_sec": float(np.max(timings)),
                                            "min_sec": float(np.min(timings)),
                                            "mean_sec": float(np.mean(timings)),
                                            "std_dev_sec": float(np.std(timings, ddof=1)),
                                            "median_sec": float(np.median(timings)),
                                            "percentiles": {f"{p}th_perc": float(np.percentile(timings, p)) for p in [25, 33, 66, 75]},
                                            "iqr_sec": float(iqr(timings)),
                                            "skewness": float(skew(timings)),
                                            "kurtosis": float(kurtosis(timings))}}

    if verbose:
        print(f"Results: {json.dumps(results_entry, indent=4)}")

    if save_path:
        try:
            if os.path.exists(save_path):
                with open(save_path, 'r') as f:
                    existing = json.load(f)
                if not isinstance(existing, dict):
                    existing = {}
            else:
                existing = {}
            existing.update(results_entry)
            with open(save_path, 'w') as f:
                json.dump(existing, f, indent=4)
            if verbose:
                print(f"Saved E2E inference timing to: {save_path}")
        except Exception as e:
            if verbose:
                print(f"Failed to save E2E timing JSON: {e}")

    if npz_save_path:
        try:
            np.savez_compressed(npz_save_path, timings=timings)
            if verbose:
                print(f"Saved compressed E2E timings to: {npz_save_path}")
        except Exception as e:
            if verbose:
                print(f"Failed to save E2E NPZ: {e}")

    return results_entry


def gpu_memory(model, sample_rate, device, save_path, iterations=100, input_duration_sec=10.0, verbose=True):
    """
    Measure peak GPU memory usage during model inference.

    :param model: Your PyTorch model.
    :param sample_rate: Audio sample rate (e.g., 32000).
    :param device: Device string ('cuda' required).
    :param save_path: Path to save the JSON results.
    :param iterations: Number of iterations.
    :param input_duration_sec: Input duration in seconds.
    :param verbose: Print verbose output.
    """
    assert device.startswith("cuda"), "GPU memory profiling requires a CUDA device."

    def generate_input(duration_samples, device):
        return torch.randn((1, duration_samples), device=device) * 2 - 1.

    samples = int(sample_rate * input_duration_sec)
    model.eval()
    model.to(device)

    with torch.inference_mode():
        x = generate_input(samples, device)
        torch.cuda.reset_peak_memory_stats()

        for _ in tqdm(range(iterations), desc="GPU Memory Usage Profiling"):
            _ = model(x.float())
            torch.cuda.synchronize()

    peak_memory_bytes = torch.cuda.max_memory_allocated()

    results_entry = {"gpu_memory_usage": {"iterations": int(iterations),
                                          "input_duration_sec": float(input_duration_sec),
                                          "peak_memory_bytes": int(peak_memory_bytes),
                                          "peak_memory_megabytes": round(peak_memory_bytes / (1024**2), 4)}}

    if verbose:
        print(f"Results: {json.dumps(results_entry, indent=4)}")

    if save_path:
        try:
            if os.path.exists(save_path):
                with open(save_path, 'r') as f:
                    existing = json.load(f)
                if not isinstance(existing, dict):
                    existing = {}
            else:
                existing = {}
            existing.update(results_entry)
            with open(save_path, 'w') as f:
                json.dump(existing, f, indent=4)
            if verbose:
                print(f"Saved GPU memory usage to: {save_path}")
        except Exception as e:
            if verbose:
                print(f"Failed to save GPU memory usage JSON: {e}")

    return results_entry


def gpu_usage(model, sample_rate, device, save_path, iterations=100, input_duration_sec=10.0, verbose=True):
    """
    Measure GPU utilization percentage during model inference.

    :param model: Your PyTorch model.
    :param sample_rate: Audio sample rate (e.g., 32000).
    :param device: Device string ('cuda' required).
    :param save_path: Path to save the JSON results.
    :param iterations: Number of iterations.
    :param input_duration_sec: Input duration in seconds.
    :param verbose: Print verbose output.
    """
    assert device.startswith("cuda"), "GPU utilization monitoring requires a CUDA device."

    def generate_input(duration_samples, device):
        return torch.randn((1, duration_samples), device=device) * 2 - 1.

    samples = int(sample_rate * input_duration_sec)
    model.eval()
    model.to(device)

    gpu_queue = Queue()
    stop_event = threading.Event()

    with torch.inference_mode():
        x = generate_input(samples, device)

        monitor_thread = threading.Thread(target=monitor_gpu_usage, args=(gpu_queue, stop_event))
        monitor_thread.start()

        for _ in tqdm(range(iterations), desc="GPU Utilization Profiling"):
            _ = model(x.float())
            torch.cuda.synchronize()

        stop_event.set()
        monitor_thread.join()

    gpu_samples = []
    while not gpu_queue.empty():
        gpu_samples.append(gpu_queue.get())

    avg_gpu = sum(gpu_samples) / len(gpu_samples) if gpu_samples else 0
    peak_gpu = max(gpu_samples) if gpu_samples else 0

    results_entry = {"gpu_utilization": {"iterations": int(iterations),
                                         "input_duration_sec": float(input_duration_sec),
                                         "avg_utilization_percent": float(avg_gpu),
                                         "peak_utilization_percent": float(peak_gpu)}}

    if verbose:
        print(f"Results: {json.dumps(results_entry, indent=4)}")

    if save_path:
        try:
            if os.path.exists(save_path):
                with open(save_path, 'r') as f:
                    existing = json.load(f)
                if not isinstance(existing, dict):
                    existing = {}
            else:
                existing = {}
            existing.update(results_entry)
            with open(save_path, 'w') as f:
                json.dump(existing, f, indent=4)
            if verbose:
                print(f"Saved GPU utilization to: {save_path}")
        except Exception as e:
            if verbose:
                print(f"Failed to save GPU utilization JSON: {e}")

    return results_entry


## Architecture

In [None]:
!pip install torchinfo
!pip install torchprofile
!pip install torchview
!pip install loguru

from torchinfo import summary
from torchprofile import profile_macs
from torchview import draw_graph
from torch.profiler import profile, record_function, ProfilerActivity
from loguru import logger

Collecting torchinfo
  Downloading torchinfo-1.8.0-py3-none-any.whl.metadata (21 kB)
Downloading torchinfo-1.8.0-py3-none-any.whl (23 kB)
Installing collected packages: torchinfo
Successfully installed torchinfo-1.8.0
Collecting torchprofile
  Downloading torchprofile-0.0.4-py3-none-any.whl.metadata (303 bytes)
Downloading torchprofile-0.0.4-py3-none-any.whl (7.7 kB)
Installing collected packages: torchprofile
Successfully installed torchprofile-0.0.4
Collecting torchview
  Downloading torchview-0.2.6-py3-none-any.whl.metadata (12 kB)
Downloading torchview-0.2.6-py3-none-any.whl (25 kB)
Installing collected packages: torchview
Successfully installed torchview-0.2.6
Collecting loguru
  Downloading loguru-0.7.3-py3-none-any.whl.metadata (22 kB)
Downloading loguru-0.7.3-py3-none-any.whl (61 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.6/61.6 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: loguru
Successfully installed loguru-0.7.3

In [None]:
def set_seeds(seed=42):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    os.environ["PYTHONHASHSEED"] = str(seed)


# Lightning-2-PyTorch Checkpoints Loader ----------------------------------------------------------------------
def load_lightning2pt(checkpoint_path, model, device="cpu", verbose=True, validate_updates=True):
    """
    Loads a PyTorch Lightning checkpoint's state_dict into a plain PyTorch model and optionally verifies parameter updates.

    :param checkpoint_path: Absolute Path to the Lightning checkpoint file (.ckpt).
    :param model: The plain PyTorch model instance to load the checkpoint into.
    :param device: Device to load the model onto ('cpu' or 'cuda').
    :param verbose: Whether to print detailed information about the loading process (default: True).
    :param validate_updates: Whether to validate which layers were updated during fine-tuning (default: True).
    :return: The plain PyTorch model with weights loaded from the checkpoint, and a list of updated layers (if validated).
    """
    # Step 1: Load the Lightning checkpoint
    try:
        checkpoint = torch.load(checkpoint_path, map_location=device)
    except FileNotFoundError:
        raise ValueError(f"Checkpoint file not found at: {checkpoint_path}")
    except Exception as e:
        raise ValueError(f"Failed to load checkpoint: {e}")

    # Step 2: Extract the Lightning state_dict
    if "state_dict" not in checkpoint:
        raise ValueError(f"Checkpoint does not contain a 'state_dict'. Keys found: {list(checkpoint.keys())}")

    lightning_state_dict = checkpoint["state_dict"]

    # Step 3: Generalize prefix removal
    stripped_state_dict = {}
    prefix = None

    for key in lightning_state_dict.keys():
        if "." in key:
            prefix = key.split(".")[0] + "."
            break

    if prefix:
        stripped_state_dict = {key.replace(prefix, ""): value for key, value in lightning_state_dict.items()}
        if verbose:
            print(f"Detected prefix '{prefix}'. Stripped from state_dict keys.")
    else:
        stripped_state_dict = lightning_state_dict
        if verbose:
            print("No prefix detected in state_dict keys.")

    # Step 4: Move the model to the specified device
    model.to(device)
    if verbose:
        print(f"Model moved to device: {device}")

    # Step 5: Optionally validate parameter updates
    updated_layers = []
    if validate_updates:
        for name, param in model.state_dict().items():
            if name in stripped_state_dict:
                old_param = param.clone()
                new_param = stripped_state_dict[name]

                # Print data type information
                if verbose:
                    print(f"Validating layer: {name}")
                    print(f"  Old Param: Type: {type(old_param)}, DType: {old_param.dtype}")
                    print(f"  New Param: Type: {type(new_param)}, DType: {new_param.dtype}")

                # Compare old and new parameters
                if not torch.equal(old_param, new_param):
                    updated_layers.append(name)

                    # Compute and display parameter differences
                    diff = (old_param - new_param).float()
                    if verbose:
                        print(f"  Layer: {name} has changes!")
                        print(f"    Min Difference: {diff.abs().min().item():.6f}")
                        print(f"    Max Difference: {diff.abs().max().item():.6f}")
                        print(f"    Mean Difference: {diff.abs().mean().item():.6f}")
                        print(f"    Std-Dev of Differences: {diff.abs().std().item():.6f}")

                        # Optionally, display a small set of differences
                        print(f"    Sample Differences: {diff.flatten()[:5].tolist()}...")
                print('---------------------------------------------------------------------------------')

    # Load the stripped state_dict into the plain model
    try:
        model.load_state_dict(stripped_state_dict)
        if verbose:
            print("State dict successfully loaded into the model!")
    except Exception as e:
        raise ValueError(f"Failed to load state_dict into the model: {e}")

    # Step 6: Print updated layers if validated
    if verbose and validate_updates:
        if updated_layers:
            print("The following layers were updated during fine-tuning:")
            for layer in updated_layers:
                print(f" - {layer}")
        else:
            print("No layers were updated. Fine-tuning may not have modified the model.")

    # Return the model and optionally updated layers
    return model, updated_layers if validate_updates else None


# Model profiling functions ----------------------------------------------------------------------
def inference_trace(model, sample_rate, input_duration_sec=10.0, device="cpu", save_path=None):
    """
    Perform an inference trace profiling of the model using PyTorch's profiler.
    Saves a Chrome Trace JSON file containing CPU and CUDA activity traces.

    :param model: The PyTorch model to profile.
    :param sample_rate: Audio sample rate (e.g., 32000).
    :param input_duration_sec: Duration of input audio in seconds (default: 10.0).
    :param device: Device to run the model on ('cpu' or 'cuda').
    :param save_path: Path to save the Chrome trace JSON file (required).
    :param verbose: Whether to print progress messages (default: True).
    :return: None
    """
    if save_path is None:
        raise ValueError("You must provide a save_path to store the Chrome trace JSON file.")

    model.eval()
    model.to(device)

    # Generate random input tensor
    num_samples = int(sample_rate * input_duration_sec)
    x = torch.randn((1, num_samples), device=device) * 2 - 1.

    with torch.inference_mode():
        with profile(activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA],
                     profile_memory=True,
                     record_shapes=True) as prof:
            with record_function("model_inference"):
                _ = model(x.float())

    try:
        prof.export_chrome_trace(save_path)
        print(f"Inference trace successfully saved to: {save_path}")
    except Exception as e:
        print(f"Failed to save inference trace: {e}")



def architecture_profile(model, sample_rate, input_duration_sec=10.0, device="cpu", save_path=None):
    """
    Perform full architecture profiling of the model:
    - Structure summary using torchinfo
    - MACs and FLOPs estimation using torchprofile
    Saves all results to a dedicated log file using Loguru.

    :param model: The PyTorch model to profile.
    :param sample_rate: Audio sample rate (e.g., 32000).
    :param input_duration_sec: Duration of input audio in seconds (default: 10.0).
    :param device: Device to run the model on ('cpu' or 'cuda').
    :param save_path: Path to save the full architecture summary log file (required).
    :return: None
    """
    if save_path is None:
        raise ValueError("You must provide a save_path to store the architecture summary log.")

    # Step 1: Model preparation
    model.eval()
    model.to(device)

    # Generate input shape and input tensor
    num_samples = int(sample_rate * input_duration_sec)
    input_shape = (1, num_samples)
    dummy_input = torch.randn((1, num_samples), device='cpu') * 2 - 1.

    try:
        # Create a dedicated logger
        loguru_logger = logger.bind(name="architecture_profile")
        loguru_logger.add(save_path, format="{message}", level="INFO", enqueue=True)

        # Step 2: Architecture Summary
        model_stats = summary(model=model,
                              input_size=input_shape,
                              cache_forward_pass=True,
                              col_names=("input_size",
                                         "output_size",
                                         "num_params",
                                         "params_percent",
                                         "kernel_size",
                                         "mult_adds",
                                         "trainable"),
                              depth=100,
                              device=device,
                              row_settings=("ascii_only",
                                            "depth",
                                            "var_names"),
                              verbose=0)

        summary_str = str(model_stats)
        loguru_logger.info(summary_str)

        # Step 3: MACs and FLOPs Estimation
        mul_add_cum = profile_macs(model.cpu(), dummy_input)

        loguru_logger.info(f"MACs    : {mul_add_cum}")
        loguru_logger.info(f"FLOPs   : {mul_add_cum * 2}")

    except Exception as e:
        print(f"Failed during full architecture profiling: {e}")

# Performance Profiling

In [None]:
# Remove old RunTimes Directory
!rm -rf 3_profiling_results_COLAB

In [None]:
import datetime
from epanns_inference import models

# Ensure reproducibility
set_seeds(42)

# Output Parameters -----------------------------------------------------------------------------
TIMESTAMP = datetime.datetime.now().strftime("%Y%m%d_%H%M")
RESULTS_DIR = "./3_profiling_results_COLAB"
os.makedirs(RESULTS_DIR, exist_ok=True)

In [None]:
# GPU & Hardware profiling ----------------------------------------------------------------------
gpu_info_path = os.path.join(RESULTS_DIR, f"gpu_info_{TIMESTAMP}.json")
device, gpu_info = your_gpu(verbose=True, save_path=gpu_info_path)
print(f"Torch device: {device}")
print('\n')

hardware_info_path = os.path.join(RESULTS_DIR, f"hardware_info_{TIMESTAMP}.json")
hardware_info = your_hardware(verbose=True, save_path=hardware_info_path)
print('\n')

# CPU inference profiling -----------------------------------------------------------------------
SAMPLE_RATE = 32000
INPUT_DURATION_SEC = 10.0
ITERATIONS = 100
CHECKPOINT_PATH = "./multi-unified.ckpt"

# Load the model
model = models.Cnn14_pruned(pre_trained=False)
model, _ = load_lightning2pt(CHECKPOINT_PATH, model, device="cpu", verbose=True, validate_updates=False)
print('\n')
model.cpu()
model_info_path = os.path.join(RESULTS_DIR, f"model_info_{TIMESTAMP}.json")

# Minimum inpout size
min_binary_search(model=model,
                  sample_rate=SAMPLE_RATE,
                  device="cpu",
                  save_path=model_info_path,
                  verbose=True)
print('\n')

# Benchmark CPU Overall Time
npz_path = os.path.join(RESULTS_DIR, f"cpu_overall_times_{TIMESTAMP}.npz")
overall_time(model=model,
             device="cpu",
             sample_rate=SAMPLE_RATE,
             input_duration_sec=INPUT_DURATION_SEC,
             iterations=ITERATIONS,
             verbose=True,
             save_path=model_info_path,
             npz_save_path=npz_path)
print('\n')

# CPU Process Time
npz_path = os.path.join(RESULTS_DIR, f"cpu_process_times_{TIMESTAMP}.npz")
process_time(model=model,
             device="cpu",
             sample_rate=SAMPLE_RATE,
             input_duration_sec=INPUT_DURATION_SEC,
             iterations=ITERATIONS,
             verbose=True,
             save_path=model_info_path,
             npz_save_path=npz_path)
print('\n')

# Memory/Cache Usage
npz_path = os.path.join(RESULTS_DIR, f"memory_and_cache_{TIMESTAMP}.npz")
memory(model=model,
       device="cpu",
       sample_rate=SAMPLE_RATE,
       input_duration_sec=INPUT_DURATION_SEC,
       iterations=ITERATIONS,
       verbose=True,
       save_path=model_info_path,
       npz_save_path=npz_path)
print('\n')

# CPU Usage
npz_path = os.path.join(RESULTS_DIR, f"cpu_usage_{TIMESTAMP}.npz")
cpu_usage(model=model,
          device="cpu",
          sample_rate=SAMPLE_RATE,
          input_duration_sec=INPUT_DURATION_SEC,
          iterations=ITERATIONS,
          verbose=True,
          save_path=model_info_path,
          npz_save_path=npz_path)
print('\n')

# GPU Inference Profiling -------------------------------------------------------------------
model.cuda()

# Cuda Time
npz_path = os.path.join(RESULTS_DIR, f"cuda_times_{TIMESTAMP}.npz")
cuda_time(model=model,
          device="cuda:0",
          sample_rate=SAMPLE_RATE,
          input_duration_sec=INPUT_DURATION_SEC,
          iterations=ITERATIONS,
          verbose=True,
          save_path=model_info_path,
          npz_save_path=npz_path)
print('\n')

# E2E Inference Time
npz_path = os.path.join(RESULTS_DIR, f"e2e_inference_times_{TIMESTAMP}.npz")
e2e_inference_time(model=model,
                   device="cuda:0",
                   sample_rate=SAMPLE_RATE,
                   input_duration_sec=INPUT_DURATION_SEC,
                   iterations=ITERATIONS,
                   verbose=True,
                   save_path=model_info_path,
                   npz_save_path=npz_path)
print('\n')

# GPU Memory Usage
gpu_memory(model=model,
           device="cuda:0",
           sample_rate=SAMPLE_RATE,
           input_duration_sec=INPUT_DURATION_SEC,
           iterations=ITERATIONS,
           verbose=True,
           save_path=model_info_path)
print('\n')

# Cuda-processors Usage
gpu_usage(model=model,
          device="cuda:0",
          sample_rate=SAMPLE_RATE,
          input_duration_sec=INPUT_DURATION_SEC,
          iterations=ITERATIONS,
          verbose=True,
          save_path=model_info_path)
print('\n')

# Energy and CO2 Emissions
npz_path = os.path.join(RESULTS_DIR, f"energy_co2_{TIMESTAMP}.npz")
energy_co2(model=model,
           device="cpu",
           sample_rate=SAMPLE_RATE,
           input_duration_sec=INPUT_DURATION_SEC,
           iterations=ITERATIONS,
           verbose=True,
           save_path=model_info_path,
           npz_save_path=npz_path)
print('\n')

print("EOF")

[GPU-0] Name: Tesla T4, Free: 15.72 GB, Total: 15.83 GB
Driver Version: 550.54.15
CUDA Compiler Version: 12.5
PyTorch Version: 2.6.0+cu124
Saved GPU info JSON to: ./3_profiling_results_COLAB/gpu_info_20250409_1028.json
Torch device: cuda:0


CPU Info retrieved from /proc/cpuinfo
Hardware Summary: {
    "cpu": {
        "model_name": "Intel(R) Xeon(R) CPU @ 2.30GHz",
        "physical_cores": 8,
        "frequencies_mhz": {
            "cpu0": 0.0,
            "cpu1": 0.0,
            "cpu2": 0.0,
            "cpu3": 0.0,
            "cpu4": 0.0,
            "cpu5": 0.0,
            "cpu6": 0.0,
            "cpu7": 0.0
        }
    },
    "ram": {
        "total_memory_gb": 54.75,
        "available_memory_gb": 51.95,
        "used_memory_gb": 2.15,
        "percent_used": 5.1
    },
    "disks": [
        {
            "Filesystem": "overlay",
            "Size": "236G",
            "Used": "41G",
            "Avail": "195G",
            "Use%": "18%",
            "Mounted": "/"
     

  clipwise_output = nn.functional.softmax(self.fc_audioset(x))
MIN Input Size Binary Search: 100%|█████████▉| 319998/320000 [00:00<00:00, 423413.29it/s]


Results: {
    "min_input_size": {
        "samples": 9919,
        "seconds": 0.30996875,
        "sample_rate": 32000,
        "binary_search_iterations": 19
    }
}
Saved (updated) profiling results to: ./3_profiling_results_COLAB/model_info_20250409_1028.json




CPU Overall Time Profiling: 100%|██████████| 100/100 [00:22<00:00,  4.54it/s]


Results: {
    "cpu_overall_time": {
        "iterations": 100,
        "input_duration_sec": 10.0,
        "max_sec": 0.31162175100007516,
        "min_sec": 0.18841470300003493,
        "mean_sec": 0.21822004696998876,
        "std_dev_sec": 0.036087429297872085,
        "median_sec": 0.20275976699991816,
        "percentiles": {
            "25th_perc": 0.19719780199997672,
            "33th_perc": 0.19843507044994113,
            "66th_perc": 0.20985839805997786,
            "75th_perc": 0.21594237100001124
        },
        "iqr_sec": 0.018744569000034517,
        "skewness": 1.6676633104478196,
        "kurtosis": 1.2622859354801985
    }
}
Saved (updated) profiling results to: ./3_profiling_results_COLAB/model_info_20250409_1028.json
Saved compressed NPZ data to: ./3_profiling_results_COLAB/cpu_overall_times_20250409_1028.npz




CPU Process Time Profiling: 100%|██████████| 100/100 [00:21<00:00,  4.62it/s]


Results: {
    "cpu_process_time": {
        "iterations": 100,
        "input_duration_sec": 10.0,
        "max_sec": 1.2924306919999964,
        "min_sec": 0.7234315620000018,
        "mean_sec": 0.8216555288399979,
        "std_dev_sec": 0.13417356655347595,
        "median_sec": 0.7606123329999974,
        "percentiles": {
            "25th_perc": 0.7417675297500068,
            "33th_perc": 0.7449946447599959,
            "66th_perc": 0.7910177704200043,
            "75th_perc": 0.8145932839999972
        },
        "iqr_sec": 0.07282575424999038,
        "skewness": 1.8209846660259033,
        "kurtosis": 2.2026931938970673
    }
}
Saved (updated) profiling results to: ./3_profiling_results_COLAB/model_info_20250409_1028.json
Saved compressed NPZ data to: ./3_profiling_results_COLAB/cpu_process_times_20250409_1028.npz




Memory Profiling Only: 100%|██████████| 100/100 [00:21<00:00,  4.57it/s]


{
    "memory_usage": {
        "iterations": 100,
        "input_duration_sec": 10.0,
        "current_bytes": 44194,
        "peak_bytes": 102487,
        "current_megabytes": 0.0421,
        "peak_megabytes": 0.0977
    }
}
Saved (updated) memory profiling results to: ./3_profiling_results_COLAB/model_info_20250409_1028.json
Saved compressed NPZ memory data to: ./3_profiling_results_COLAB/memory_and_cache_20250409_1028.npz




CPU Usage Profiling: 100%|██████████| 100/100 [00:20<00:00,  4.79it/s]


Results: {
    "cpu_usage": {
        "iterations": 100,
        "input_duration_sec": 10.0,
        "avg_cpu_usage_percent": 50.63,
        "peak_cpu_usage_percent": 69.6
    }
}
Saved (updated) CPU usage results to: ./3_profiling_results_COLAB/model_info_20250409_1028.json
Saved compressed NPZ CPU usage data to: ./3_profiling_results_COLAB/cpu_usage_20250409_1028.npz




CUDA Event Timing: 100%|██████████| 100/100 [00:01<00:00, 60.46it/s]


Results: {
    "cuda_time": {
        "iterations": 100,
        "input_duration_sec": 10.0,
        "max_sec": 0.49310906982421876,
        "min_sec": 0.009578495979309083,
        "mean_sec": 0.015809169750213622,
        "std_dev_sec": 0.04829954901922595,
        "median_sec": 0.009829264163970947,
        "percentiles": {
            "25th_perc": 0.009715607643127441,
            "33th_perc": 0.009742387619018555,
            "66th_perc": 0.009916383953094483,
            "75th_perc": 0.009949087858200072
        },
        "iqr_sec": 0.00023348021507263103,
        "skewness": 9.795335394297489,
        "kurtosis": 94.3024127075387
    }
}
Saved CUDA Timing to: ./3_profiling_results_COLAB/model_info_20250409_1028.json
Saved compressed CUDA Timings to: ./3_profiling_results_COLAB/cuda_times_20250409_1028.npz




E2E Inference Timing: 100%|██████████| 100/100 [00:01<00:00, 94.27it/s]


Results: {
    "e2e_inference_time": {
        "iterations": 100,
        "input_duration_sec": 10.0,
        "max_sec": 0.010833442000148352,
        "min_sec": 0.009124144999987038,
        "mean_sec": 0.009944103550010368,
        "std_dev_sec": 0.0002961851210873841,
        "median_sec": 0.009914098500075852,
        "percentiles": {
            "25th_perc": 0.0097725325001079,
            "33th_perc": 0.009835762840061761,
            "66th_perc": 0.009992997080130408,
            "75th_perc": 0.010020846999964306
        },
        "iqr_sec": 0.00024831449985640575,
        "skewness": 0.4154733523589719,
        "kurtosis": 1.9081065156204513
    }
}
Saved E2E inference timing to: ./3_profiling_results_COLAB/model_info_20250409_1028.json
Saved compressed E2E timings to: ./3_profiling_results_COLAB/e2e_inference_times_20250409_1028.npz




GPU Memory Usage Profiling: 100%|██████████| 100/100 [00:01<00:00, 97.88it/s]
Exception in thread Thread-11 (monitor_gpu_usage):
Traceback (most recent call last):
  File "/usr/lib/python3.11/threading.py", line 1045, in _bootstrap_inner


Results: {
    "gpu_memory_usage": {
        "iterations": 100,
        "input_duration_sec": 10.0,
        "peak_memory_bytes": 212500480,
        "peak_memory_megabytes": 202.6562
    }
}
Saved GPU memory usage to: ./3_profiling_results_COLAB/model_info_20250409_1028.json




    self.run()
  File "/usr/lib/python3.11/threading.py", line 982, in run
    self._target(*self._args, **self._kwargs)
TypeError: monitor_gpu_usage() takes 0 positional arguments but 2 were given
GPU Utilization Profiling: 100%|██████████| 100/100 [00:01<00:00, 98.14it/s]
[codecarbon INFO @ 10:32:22] [setup] RAM Tracking...
[codecarbon INFO @ 10:32:22] [setup] GPU Tracking...
[codecarbon INFO @ 10:32:22] Tracking Nvidia GPU via pynvml
[codecarbon INFO @ 10:32:22] [setup] CPU Tracking...


Results: {
    "gpu_utilization": {
        "iterations": 100,
        "input_duration_sec": 10.0,
        "avg_utilization_percent": 0.0,
        "peak_utilization_percent": 0.0
    }
}
Saved GPU utilization to: ./3_profiling_results_COLAB/model_info_20250409_1028.json




[codecarbon INFO @ 10:32:23] CPU Model on constant consumption mode: Intel(R) Xeon(R) CPU @ 2.30GHz
[codecarbon INFO @ 10:32:23] >>> Tracker's metadata:
[codecarbon INFO @ 10:32:23]   Platform system: Linux-6.1.85+-x86_64-with-glibc2.35
[codecarbon INFO @ 10:32:23]   Python version: 3.11.11
[codecarbon INFO @ 10:32:23]   CodeCarbon version: 2.4.2
[codecarbon INFO @ 10:32:23]   Available RAM : 50.994 GB
[codecarbon INFO @ 10:32:23]   CPU count: 8
[codecarbon INFO @ 10:32:23]   CPU model: Intel(R) Xeon(R) CPU @ 2.30GHz
[codecarbon INFO @ 10:32:23]   GPU count: 1
[codecarbon INFO @ 10:32:23]   GPU model: 1 x Tesla T4
  clipwise_output = nn.functional.softmax(self.fc_audioset(x))
[codecarbon INFO @ 10:32:24] Energy consumed for RAM : 0.000001 kWh. RAM Power : 19.122615337371826 W
[codecarbon INFO @ 10:32:24] Energy consumed for all GPUs : 0.000002 kWh. Total GPU Power : 37.100613280476075 W
[codecarbon INFO @ 10:32:24] Energy consumed for all CPUs : 0.000002 kWh. Total CPU Power : 42.5 W
[

{
    "energy_consumption": {
        "iterations": 100,
        "input_duration_sec": 10.0,
        "avg_emission_rate_gCO2eq_per_sec": 0.0007429383310169242,
        "avg_cpu_energy_kWh": 0.00026368899262613725,
        "avg_ram_energy_kWh": 0.00011633221994473461
    }
}
Saved (updated) energy profiling results to: ./3_profiling_results_COLAB/model_info_20250409_1028.json
Saved compressed NPZ energy data to: ./3_profiling_results_COLAB/energy_co2_20250409_1028.npz


EOF


  df = pd.concat([df, pd.DataFrame.from_records([dict(data.values)])])
  df = pd.concat(


# Architectural Profiling

In [None]:
import datetime
from epanns_inference import models

# Ensure reproducibility
set_seeds(42)

# Output Parameters -----------------------------------------------------------------------------
TIMESTAMP = datetime.datetime.now().strftime("%Y%m%d_%H%M")
RESULTS_DIR = "./3_profiling_results_COLAB/E2PANNs_architecture/"
os.makedirs(RESULTS_DIR, exist_ok=True)

In [None]:
# Global Parameters
SAMPLE_RATE = 32000
INPUT_DURATION_SEC = 10.0
CHECKPOINT_PATH = "./multi-unified.ckpt"

# Load the model
model = models.Cnn14_pruned(pre_trained=False)
model, _ = load_lightning2pt(CHECKPOINT_PATH, model, device="cpu", verbose=True, validate_updates=False)
print('\n')
model.cpu()

# Inference Activites Trace (CPU-only Trace)
model_trace_path = os.path.join(RESULTS_DIR, f"inference_trace_CPU_{TIMESTAMP}.json")
inference_trace(model=model,
                sample_rate=SAMPLE_RATE,
                input_duration_sec=INPUT_DURATION_SEC,
                device='cpu',
                save_path=model_trace_path)
print('\n')

# Architecture structure
structure_log_path = model_trace_path = os.path.join(RESULTS_DIR,"architecture_summary.log")
architecture_profile(model=model,
                     sample_rate=SAMPLE_RATE,
                     input_duration_sec=INPUT_DURATION_SEC,
                     device='cpu',
                     save_path=structure_log_path)
print('\n')

# Computational Graph
model_graph = draw_graph(model=model,
                         input_size=(1, 320000),
                         depth=100,
                         graph_dir='TB',
                         roll=True,
                         expand_nested=True,
                         hide_inner_tensors=False,
                         hide_module_functions=False,
                         device='cpu',
                         save_graph=True,
                         filename=f'E2PANNs_graph',
                         directory=RESULTS_DIR)
model_graph.visual_graph

# Inference Activites Trace (GPU-accelerated)
model.cuda()
model_trace_path = os.path.join(RESULTS_DIR, f"inference_trace_CUDA_{TIMESTAMP}.json")
inference_trace(model=model,
                sample_rate=SAMPLE_RATE,
                input_duration_sec=INPUT_DURATION_SEC,
                device='cuda:0',
                save_path=model_trace_path)
print('\n')

Detected prefix 'model.'. Stripped from state_dict keys.
Model moved to device: cpu
State dict successfully loaded into the model!




  clipwise_output = nn.functional.softmax(self.fc_audioset(x))


Inference trace successfully saved to: ./3_profiling_results_COLAB/inference_trace_CPU_20250409_1028.json




Layer (type (var_name):depth-idx)                  Input Shape               Output Shape              Param #                   Param %                   Kernel Shape              Mult-Adds                 Trainable
Cnn14_pruned (Cnn14_pruned)                        [1, 320000]               [1, 2048]                 --                             --                   --                        --                        Partial
+ Spectrogram (spectrogram_extractor): 1-1         [1, 320000]               [1, 1, 1001, 513]         --                             --                   --                        --                        False
|    + STFT (stft): 2-1                            --                        --                        --                             --                   --                        --                        False
|    |    + Conv1d (conv_real): 3-1                [1, 1, 321024]            [1, 513, 1001]            (525,312)                   2.16%      





  ret = func(*args, **kwargs)


Inference trace successfully saved to: ./3_profiling_results_COLAB/inference_trace_CUDA_20250409_1028.json




  clipwise_output = nn.functional.softmax(self.fc_audioset(x))


# Coefficients Visualization

In [None]:
# Remove old RunTimes Directory
!rm -rf weights_plots

In [None]:
# Model Weights Visualization with Plotly - Save to HTML Files ----------------
import plotly.graph_objects as go
import plotly.io as pio
import numpy as np
import os
import torch

def weights_html_plots(model, device="cpu", save_dir="./weights_plots", kernel_separation=5):
    """
    Visualize model weights using Plotly and save each figure as a standalone HTML file.
    - Conv2d layers: separated 3D kernels vertically with gray colormap and simplified colorbar (only min and max).
    - Conv1d and Linear layers: interactive lollipop plots.
    Each layer's visualization is in its own HTML file.

    :param model: PyTorch model instance.
    :param device: Device to move the model on ('cpu' or 'cuda').
    :param save_dir: Directory where to save the HTML files.
    :param kernel_separation: Vertical separation between Conv2D kernels (default: 5 units).
    :return: None
    """
    model.to(device)
    model.eval()

    os.makedirs(save_dir, exist_ok=True)

    # Helper: Recursively yield named modules
    def named_modules_recursive(model, prefix=""):
        for name, module in model.named_children():
            full_name = f"{prefix}.{name}" if prefix else name
            yield full_name, module
            yield from named_modules_recursive(module, full_name)

    for name, layer in named_modules_recursive(model):
        try:
            safe_layer_name = name.replace(".", "_")  # Safe filename

            if isinstance(layer, torch.nn.Conv2d):
                weights = layer.weight.data.detach().cpu().numpy()  # (out_channels, in_channels, H, W)
                out_channels, in_channels, height, width = weights.shape

                cube = weights[:, 0, :, :]  # Take first input channel only

                fig = go.Figure()

                min_val = np.min(cube)
                max_val = np.max(cube)

                for i in range(cube.shape[0]):
                    fig.add_trace(go.Surface(z=np.full_like(cube[i], i * kernel_separation),  # Apply vertical offset
                                  surfacecolor=cube[i],
                                  colorscale='gray',
                                  showscale=(i == 0),  # Only show colorbar once
                                  opacity=0.8,
                                  colorbar=dict(title="Weight Value",
                                    titleside="right",
                                    tickmode="array",
                                    tickvals=[min_val, max_val],
                                    ticktext=[f"{min_val:.2f}", f"{max_val:.2f}"],
                                    lenmode="pixels",
                                    len=200)))

                fig.update_layout(title=f"Conv2D Layer: {name}",
                                  scene=dict(xaxis_title='Width',
                                             yaxis_title='Height',
                                             zaxis_title='Kernel Index',
                                             aspectmode='data'),
                                  autosize=True,
                                  height=1000,
                                  width=1000,
                                  template="simple_white")

                html_path = os.path.join(save_dir, f"{safe_layer_name}.html")
                pio.write_html(fig, file=html_path, auto_open=False)
                print(f"Saved Conv2D figure: {html_path}")

            elif isinstance(layer, (torch.nn.Conv1d, torch.nn.Linear)):
                weights = layer.weight.data.detach().cpu().flatten()

                fig = go.Figure()

                fig.add_trace(go.Scatter(x=np.arange(len(weights)),
                                         y=weights,
                                         mode="markers+lines",
                                         marker=dict(size=6, color='black'),
                                         line=dict(color='gray'),
                                         hoverinfo='x+y'))

                fig.update_layout(title=f"Layer: {name}",
                                  xaxis_title="Index",
                                  yaxis_title="Weight Value",
                                  height=1000,
                                  width=1000,
                                  template="simple_white")

                html_path = os.path.join(save_dir, f"{safe_layer_name}.html")
                pio.write_html(fig, file=html_path, auto_open=False)
                print(f"Saved Linear/Conv1D figure: {html_path}")

        except Exception as e:
            print(f"[WARNING] Skipping layer {name} due to error: {e}")

In [None]:
RESULTS_DIR = "./3_profiling_results_COLAB"
weights_html_plots(model, device="cpu", save_dir=f"./{RESULTS_DIR}/weights_plots", kernel_separation=2)

Saved Linear/Conv1D figure: ././3_profiling_results_COLAB/weights_plots/spectrogram_extractor_stft_conv_real.html
Saved Linear/Conv1D figure: ././3_profiling_results_COLAB/weights_plots/spectrogram_extractor_stft_conv_imag.html
Saved Conv2D figure: ././3_profiling_results_COLAB/weights_plots/conv_block1_conv1.html
Saved Conv2D figure: ././3_profiling_results_COLAB/weights_plots/conv_block1_conv2.html
Saved Conv2D figure: ././3_profiling_results_COLAB/weights_plots/conv_block2_conv1.html
Saved Conv2D figure: ././3_profiling_results_COLAB/weights_plots/conv_block2_conv2.html
Saved Conv2D figure: ././3_profiling_results_COLAB/weights_plots/conv_block3_conv1.html
Saved Conv2D figure: ././3_profiling_results_COLAB/weights_plots/conv_block3_conv2.html
Saved Conv2D figure: ././3_profiling_results_COLAB/weights_plots/conv_block4_conv1.html
Saved Conv2D figure: ././3_profiling_results_COLAB/weights_plots/conv_block4_conv2.html
Saved Conv2D figure: ././3_profiling_results_COLAB/weights_plots/con

In [None]:
# Download a zipped version of assets
!zip -r 3_profiling_results_COLAB.zip 3_profiling_results_COLAB
from google.colab import files
files.download('3_profiling_results_COLAB.zip')

  adding: 3_profiling_results_COLAB/ (stored 0%)
  adding: 3_profiling_results_COLAB/gpu_info_20250409_1028.json (deflated 52%)
  adding: 3_profiling_results_COLAB/hardware_info_20250409_1028.json (deflated 80%)
  adding: 3_profiling_results_COLAB/model_info_20250409_1028.json (deflated 72%)
  adding: 3_profiling_results_COLAB/cpu_overall_times_20250409_1028.npz (deflated 4%)
  adding: 3_profiling_results_COLAB/cpu_process_times_20250409_1028.npz (deflated 4%)
  adding: 3_profiling_results_COLAB/memory_and_cache_20250409_1028.npz (deflated 25%)
  adding: 3_profiling_results_COLAB/cpu_usage_20250409_1028.npz (deflated 11%)
  adding: 3_profiling_results_COLAB/cuda_times_20250409_1028.npz (deflated 2%)
  adding: 3_profiling_results_COLAB/e2e_inference_times_20250409_1028.npz (deflated 2%)
  adding: 3_profiling_results_COLAB/energy_emissions.csv (deflated 38%)
  adding: 3_profiling_results_COLAB/emissions_base_8bf2183a-5368-4d3f-9f3d-06ca7300ce6b.csv (deflated 80%)
  adding: 3_profiling_re

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>