In [None]:
import torch
import time
import matplotlib.pyplot as plt
from tqdm import tqdm

def benchmark_batched_eig(matrix_size=256, batch_sizes=[1, 2, 4, 8, 16, 32]):
    cpu_times, gpu_times = [], []

    for B in tqdm(batch_sizes):
        A = torch.randn(B, matrix_size, matrix_size)
        A = A @ A.transpose(-1, -2)  # Make symmetric for real eigvals

        # CPU
        start = time.time()
        _ = torch.linalg.eigvals(A)
        cpu_times.append(time.time() - start)

        # GPU
        if torch.cuda.is_available():
            A_gpu = A.to("cuda")
            _ = torch.linalg.eigvals(A_gpu)  # Warm-up
            torch.cuda.synchronize()
            start = time.time()
            _ = torch.linalg.eigvals(A_gpu)
            torch.cuda.synchronize()
            gpu_times.append(time.time() - start)
        else:
            gpu_times.append(None)

    return batch_sizes, cpu_times, gpu_times


def plot_batched_results(batch_sizes, cpu_times, gpu_times, anchor_batch_size=2, matrix_size=256):
    plt.figure()
    
    plt.plot(batch_sizes, cpu_times, "o-", label="CPU", color="navy", markersize=2)
    if any(gpu_times):
        plt.plot(batch_sizes, gpu_times, "o-", label="GPU", color="crimson", markersize=2)

    # Linear baselines from anchor
    if anchor_batch_size in batch_sizes:
        idx = batch_sizes.index(anchor_batch_size)
        cpu_anchor = cpu_times[idx]
        plt.plot(batch_sizes, [cpu_anchor * (b / anchor_batch_size) for b in batch_sizes],
                 "--", label=f"CPU Linear @{anchor_batch_size}", color="blue", alpha=0.4)
        if gpu_times[idx] is not None:
            gpu_anchor = gpu_times[idx]
            plt.plot(batch_sizes, [gpu_anchor * (b / anchor_batch_size) for b in batch_sizes],
                     "--", label=f"GPU Linear @{anchor_batch_size}", color="red", alpha=0.4)

    plt.xlabel("Batch Size")
    plt.ylabel("Total Time (s)")
    plt.title(f"Fixed Size = {matrix_size}x{matrix_size}")
    plt.grid(True, linestyle="--", alpha=0.5)
    plt.legend()
    plt.tight_layout()
    plt.show()

In [None]:
matrix_size = 92
batch_size = list(range(2, 129, 2))
batch_sizes, cpu_times, gpu_times = benchmark_batched_eig(matrix_size=matrix_size, batch_sizes=batch_size)

In [None]:
plot_batched_results(batch_sizes, cpu_times, gpu_times, anchor_batch_size=12, matrix_size=matrix_size)
# Plotting the CPU times

# Batched Matrix Multiplication

In [None]:
import torch
import time
import matplotlib.pyplot as plt
from tqdm import tqdm

def benchmark_batched_matmul(matrix_size=256, batch_sizes=[1, 2, 4, 8, 16, 32, 64, 128]):
    cpu_times, gpu_times = [], []

    for B in tqdm(batch_sizes):
        A = torch.randn(B, matrix_size, matrix_size)
        B_ = torch.randn(B, matrix_size, matrix_size)

        # CPU
        start = time.time()
        _ = torch.bmm(A, B_)
        cpu_times.append(time.time() - start)

        # GPU
        if torch.cuda.is_available():
            A_gpu = A.to("cuda")
            B_gpu = B_.to("cuda")
            # Warm-up
            _ = torch.bmm(A_gpu, B_gpu)
            torch.cuda.synchronize()
            start = time.time()
            _ = torch.bmm(A_gpu, B_gpu)
            torch.cuda.synchronize()
            gpu_times.append(time.time() - start)
        else:
            gpu_times.append(None)

    return batch_sizes, cpu_times, gpu_times


def plot_batched_results(batch_sizes, cpu_times, gpu_times, anchor_batch_size=2, matrix_size=256):
    plt.figure()
    
    # plt.plot(batch_sizes, cpu_times, "o-", label="CPU", color="navy", markersize=2)
    if any(gpu_times):
        plt.plot(batch_sizes, gpu_times, "o-", label="GPU", color="crimson", markersize=2)

    # Linear baselines from anchor
    if anchor_batch_size in batch_sizes:
        idx = batch_sizes.index(anchor_batch_size)
        cpu_anchor = cpu_times[idx]
        # plt.plot(batch_sizes, [cpu_anchor * (b / anchor_batch_size) for b in batch_sizes],
        #          "--", label=f"CPU Linear @{anchor_batch_size}", color="blue", alpha=0.4)
        if gpu_times[idx] is not None:
            gpu_anchor = gpu_times[idx]
            plt.plot(batch_sizes, [gpu_anchor * (b / anchor_batch_size) for b in batch_sizes],
                     "--", label=f"GPU Linear @{anchor_batch_size}", color="red", alpha=0.4)

    plt.xlabel("Batch Size")
    plt.ylabel("Total Time (s)")
    plt.title(f"Batched Matrix Multiplication (Fixed Size = {matrix_size}x{matrix_size})")
    plt.grid(True, linestyle="--", alpha=0.5)
    plt.legend()
    plt.tight_layout()
    plt.show()

In [None]:
# Usage
matrix_size = 512
batch_sizes = list(np.arange(1, 129*3, 3))
batch_sizes, cpu_times, gpu_times = benchmark_batched_matmul(matrix_size=matrix_size, batch_sizes=batch_sizes)

In [None]:
plot_batched_results(batch_sizes, cpu_times, gpu_times, anchor_batch_size=16, matrix_size=matrix_size)
