In [1]:
#Imports
import numpy as np
import cupy as cp
import time
from tqdm import tqdm

In [2]:
#Generate 100,000 random 3D points
n_points = 100000
np.random.seed(42)
points_cpu = np.random.uniform(0, 100, size=(n_points, 3))

In [3]:
# Check if CUDA is available and transfer to GPU only if available
try:
    import cupy as cp
    cuda_available = True
    print("CUDA is available. Using GPU for computation.")
except (ImportError, Exception) as e:
    cuda_available = False
    print(f"CUDA is not available: {str(e)}")
    print("Falling back to CPU-only computation for GPU tasks.")

print("Generated 100,000 3D points in range [0, 100]")

CUDA is available. Using GPU for computation.
Generated 100,000 3D points in range [0, 100]


In [4]:
#CPU-based pairwise Euclidean distance (batched timing)
def time_distances_cpu(points, batch_size=1000):
    n = points.shape[0]
    print("\nCalculating distance matrix in batches on CPU (timing only)...")
    start_time = time.time()
    for i in tqdm(range(0, n, batch_size), desc="CPU Batches"):
        end_i = min(i + batch_size, n)
        points_batch_i = points[i:end_i]
        X_batch_i_squared_sum = np.sum(points_batch_i**2, axis=1)
        for j in range(0, n, batch_size):
            end_j = min(j + batch_size, n)
            points_batch_j = points[j:end_j]
            X_batch_j_squared_sum = np.sum(points_batch_j**2, axis=1)

            dot_product_batch = points_batch_i @ points_batch_j.T

            # Calculate distance matrix for the current batch - result is not stored
            dist_matrix_batch = np.sqrt(np.maximum(
                X_batch_i_squared_sum[:, np.newaxis] + X_batch_j_squared_sum[np.newaxis, :] - 2 * dot_product_batch,
                0.0
            ))
    end_time = time.time()
    return end_time - start_time

In [5]:
#GPU-based pairwise Euclidean distance (batched timing)
def time_distances_gpu(points_cpu, batch_size=1000):
    n = points_cpu.shape[0]
    if not cuda_available:
        print("GPU computation skipped due to CUDA unavailability.")
        return None

    print("\nCalculating distance matrix in batches on GPU (timing only)...")
    start_time = time.time()
    try:
        for i in tqdm(range(0, n, batch_size), desc="GPU Batches"):
            end_i = min(i + batch_size, n)
            points_batch_i_cpu = points_cpu[i:end_i]
            points_batch_i_gpu = cp.asarray(points_batch_i_cpu) # Transfer batch to GPU

            X_batch_i_squared_sum_gpu = cp.sum(points_batch_i_gpu**2, axis=1)

            for j in range(0, n, batch_size):
                end_j = min(j + batch_size, n)
                points_batch_j_cpu = points_cpu[j:end_j]
                points_batch_j_gpu = cp.asarray(points_batch_j_cpu) # Transfer batch to GPU

                dot_product_batch_gpu = points_batch_i_gpu @ points_batch_j_gpu.T

                # Calculate distance matrix for the current batch - result is not stored
                dist_matrix_batch_gpu = cp.sqrt(cp.maximum(
                    X_batch_i_squared_sum_gpu[:, cp.newaxis] + cp.sum(points_batch_j_gpu**2, axis=1)[cp.newaxis, :] - 2 * dot_product_batch_gpu,
                    0.0
                ))
            # Ensure all GPU operations for this row batch are complete
            cp.cuda.Device(0).synchronize()
    except Exception as e:
        print(f"Error during GPU computation: {str(e)}")
        return None

    end_time = time.time()
    return end_time - start_time

In [6]:
#Timing CPU & GPU
cpu_time = time_distances_cpu(points_cpu, batch_size=1000)
if cpu_time is not None:
    print(f"Batch-wise CPU Time: {cpu_time:.2f} seconds")

gpu_time = time_distances_gpu(points_cpu, batch_size=1000)
if gpu_time is not None:
    print(f"\n Batch-wise GPU Time: {gpu_time:.2f} seconds")


Calculating distance matrix in batches on CPU (timing only)...


CPU Batches: 100%|██████████| 100/100 [01:52<00:00,  1.13s/it]


Batch-wise CPU Time: 112.57 seconds

Calculating distance matrix in batches on GPU (timing only)...


GPU Batches: 100%|██████████| 100/100 [00:07<00:00, 13.40it/s]


 Batch-wise GPU Time: 7.47 seconds





In [7]:
#Compare execution times
if cpu_time is not None and gpu_time is not None:
    print("\n--- Performance Comparison ---")
    print(f"Batch-wise CPU Execution Time: {cpu_time:.2f} seconds")
    print(f"Batch-wise GPU Execution Time: {gpu_time:.2f} seconds")
    if gpu_time > 0:
        speedup = cpu_time / gpu_time
        print(f"Speedup: {speedup:.2f}x (GPU over CPU)")
    else:
        print("Speedup cannot be calculated as GPU time is zero.")
elif cpu_time is not None:
    print("--- Performance Comparison ---")
    print(f"Batch-wise CPU Execution Time: {cpu_time:.2f} seconds")
    print("GPU computation was skipped or failed, cannot compare performance.")


--- Performance Comparison ---
Batch-wise CPU Execution Time: 112.57 seconds
Batch-wise GPU Execution Time: 7.47 seconds
Speedup: 15.08x (GPU over CPU)
