In [2]:
# ============================================================
# Appendix A.0.1 — GPU Validation and Performance Benchmark
# ------------------------------------------------------------
# Author: Efrain G. Garza
# Research Context:
#   Mentor–Mentee Optimization (Euclidean Distance Modeling,
#   Vectorized Degree Representations, and Linear Algebra Acceleration)
# Institution: University of Texas at San Antonio
#
# Purpose:
#   This appendix provides an empirical validation of GPU acceleration
#   for the computational components used in this research. Two core
#   workloads are benchmarked on both CPU and GPU:
#
#     (1) Large-scale matrix multiplication (PyTorch), representative of
#         the vector–matrix operations underlying the Euclidean distance
#         matrix in the mentor–mentee optimization model.
#
#     (2) High-dimensional simulation (CuPy), reflecting the iterative
#         GPU-based procedures used during model diagnostics,
#         sensitivity checks, and distance-matrix verification.
#
#   The results confirm that the NVIDIA RTX 3070 Ti Laptop GPU provides
#   an ~18× acceleration relative to CPU execution, ensuring that all
#   computational experiments in this study were conducted under a
#   validated and reproducible hardware–software environment.
# ============================================================

import time
import torch
import cupy as cp
import numpy as np
import pandas as pd

# ------------------------------------------------------------
# PyTorch GPU vs CPU — Matrix Multiplication Benchmark
# ------------------------------------------------------------
print("\n=== PyTorch Benchmark (Matrix Multiply) ===")

N = 4000  # 4000×4000 matrices

# CPU computation
a_cpu = torch.rand(N, N)
b_cpu = torch.rand(N, N)

start = time.time()
_ = torch.mm(a_cpu, b_cpu)
cpu_time_torch = time.time() - start

# GPU computation
device_gpu = torch.device("cuda")
a_gpu = a_cpu.to(device_gpu)
b_gpu = b_cpu.to(device_gpu)
torch.cuda.synchronize()

start = time.time()
_ = torch.mm(a_gpu, b_gpu)
torch.cuda.synchronize()
gpu_time_torch = time.time() - start

print(f"CPU time: {cpu_time_torch:.4f} s")
print(f"GPU time: {gpu_time_torch:.4f} s")
print(f"Speed-up: {cpu_time_torch / gpu_time_torch:.2f}×")


# ------------------------------------------------------------
# CuPy GPU vs CPU — Monte Carlo Simulation Benchmark
# ------------------------------------------------------------
print("\n=== CuPy Benchmark (Monte Carlo Simulation) ===")

N = 8_000_000  # 8 million draws

# ---- CPU version (NumPy) ----
start = time.time()
x_cpu = np.random.randn(N)
sum_cpu = x_cpu.sum()
cpu_time_sim = time.time() - start

# ---- GPU version (CuPy) ----
cp.cuda.Stream.null.synchronize()
start = time.time()
x_gpu = cp.random.randn(N)
sum_gpu = cp.sum(x_gpu)
cp.cuda.Stream.null.synchronize()
gpu_time_sim = time.time() - start

print(f"CPU time: {cpu_time_sim:.4f} s")
print(f"GPU time: {gpu_time_sim:.4f} s")
print(f"Speed-up: {cpu_time_sim / gpu_time_sim:.2f}×")


# ------------------------------------------------------------
# Benchmark Summary Table (for the Appendix)
# ------------------------------------------------------------
results = pd.DataFrame({
    "Benchmark": [
        "PyTorch Matrix Multiply",
        "CuPy Monte Carlo Simulation"
    ],
    "CPU Time (s)": [
        cpu_time_torch,
        cpu_time_sim
    ],
    "GPU Time (s)": [
        gpu_time_torch,
        gpu_time_sim
    ],
    "Speed-up (×)": [
        cpu_time_torch / gpu_time_torch,
        cpu_time_sim / gpu_time_sim
    ]
})

print("\nBenchmark Summary:")
print(results.to_string(index=False))



=== PyTorch Benchmark (Matrix Multiply) ===
CPU time: 0.0755 s
GPU time: 0.0041 s
Speed-up: 18.36×

=== CuPy Benchmark (Monte Carlo Simulation) ===
CPU time: 0.0922 s
GPU time: 0.0037 s
Speed-up: 25.20×

Benchmark Summary:
                  Benchmark  CPU Time (s)  GPU Time (s)  Speed-up (×)
    PyTorch Matrix Multiply      0.075536      0.004114     18.362177
CuPy Monte Carlo Simulation      0.092209      0.003660     25.195570
