In [15]:
import time
from datetime import timedelta
import psutil
import torch
import numpy as np
import tracemalloc
from dataclasses import dataclass
from contextlib import contextmanager
from typing import Callable, Any
import gc

In [16]:
@dataclass
class BenchmarkResult:
    model_name: str
    model_size: float
    inference_time_ms: float
    peak_memory_mb: float
    gpu_memory_mb: float
    throughput: float
    accuracy: float

In [4]:
class ModelBenchmark:
    def __init__(self, device: str = 'cuda'):
        self.device = device

    @contextmanager
    def _measure_memory(self):
        tracemalloc.start()
        try:
            yield
        finally:
            current, peak = tracemalloc.get_traced_memory()
            tracemalloc.stop()
            self.peak_memory = peak / 1024 ** 2

    def _get_gpu_memory(self) -> float:
        if self.device == 'cuda':
            return torch.cuda.max_memory_allocated() / 1024 ** 2
        return 0

    def _clear_memory(self):
        gc.collect()
        if self.device == 'cuda':
            torch.cuda.empty_cache()
            torch.cuda.reset_peak_memory_stats()

    def _get_model_size(self, model) -> float:
        """Pytorch models only"""
        if self.device == 'cuda':
            param_size = sum(p.numel() * p.element_size() for p in model.parameters())
            buffer_size = sum(b.numel() * b.element_size() for b in model.buffers())
            total_size_mb = (param_size + buffer_size) / 1024 ** 2
            return total_size_mb
        return 0

    def benchmark_model(self,
                        model,
                        input_texts: list[str],
                        true_texts: list[str],
                        model_name: str,
                        predict: Callable[[Any, str], str],
                        warm_up_runs: int = 3,
                        num_runs: int = 5
                        ) -> BenchmarkResult:

        print(f"Starting {warm_up_runs} warm-up iterations for {model_name}...")
        start = time.time()
        for _ in range(warm_up_runs):
            for text in input_texts[:len(input_texts) / 2]:
                _ = predict(model, text)
        print(f"Finished warm-up after {time.time() - start} seconds.")

        inference_times = []
        throughputs = []
        memory_usages = []
        gpu_memory_usages = []

        print(f"Starting benchmark iterations...")
        for run in range(num_runs):
            self._clear_memory()
            with self._measure_memory():
                total_tokens = 0
                outputs = []
                start_time = time.time()

                for text in input_texts:
                    outputs.append(predict(model, text))

                    total_tokens += len(text.split())

                inference_time = time.time() - start_time
                # TODO get accuracy
                throughputs.append(total_tokens / inference_time)
                inference_times.append(inference_time)
                memory_usages.append(self.peak_memory)
                gpu_memory_usages.append(self._get_gpu_memory())

            print(f"Finished {run}/{num_runs} iteration in {inference_time} seconds.")

        avg_inference_time = np.mean(inference_times)
        avg_throughput = np.mean(throughputs)
        avg_memory = np.mean(memory_usages)
        avg_gpu_memory = np.mean(gpu_memory_usages)

        return BenchmarkResult(model_name=model_name,
                               model_size=self._get_model_size(model),
                               inference_time_ms=avg_inference_time,
                               peak_memory_mb=avg_memory,
                               gpu_memory_mb=avg_gpu_memory,
                               throughput=avg_throughput)

### Neuspell


In [5]:
from neuspell import BertChecker

checker = BertChecker(device='cuda')
checker.from_pretrained()

loading vocab from path:C:\FIT\bakalarka\.venv\Lib\site-packages\neuspell\../neuspell_data/checkpoints/subwordbert-probwordnoise\vocab.pkl
initializing model
loading pretrained weights from path:C:\FIT\bakalarka\.venv\Lib\site-packages\neuspell\../neuspell_data/checkpoints/subwordbert-probwordnoise
Loading model params from checkpoint dir: C:\FIT\bakalarka\.venv\Lib\site-packages\neuspell\../neuspell_data/checkpoints/subwordbert-probwordnoise


In [12]:
DATA_PATH = "./data/"
with open(DATA_PATH + "small_clean.txt", 'r', encoding='utf-8', newline='\n') as f:
    input_clean = [line.strip() for line in f if line != ""]

with open(DATA_PATH + "small_corrupt.txt", 'r', encoding='utf-8', newline='\n') as f:
    input_corrupt = [line.strip() for line in f if line != ""]

In [14]:
benchmark = ModelBenchmark()

In [None]:
benchmark.benchmark_model(checker, input_clean, input_corrupt, "neuspell-bert", lambda x: checker.correct_string(x), )