In [1]:
import numpy as np
import cupy as cp
import time
import os

# --- Configuration ---
ARRAY_SIZE = 800_000_000  # 800 million elements
# Use 'int32' for faster GPU performance or 'float64' for standard large data

# Check for CuPy and GPU availability
try:
    # Set the GPU device (usually 0 is the primary GPU)
    cp.cuda.runtime.getDevice()
    print(f"✅ CuPy successfully initialized and using GPU device 0.")
    print(f"Array size for sorting: {ARRAY_SIZE:,} elements.")
    print("-" * 40)
except cp.cuda.runtime.CUDARuntimeError:
    print("❌ ERROR: CuPy failed to initialize a GPU. ")
    print("Please ensure you have an NVIDIA GPU, CUDA toolkit, and CuPy installed correctly.")
    # Exit gracefully if GPU is not available
    exit()

# 1. --- CPU (NumPy) Sorting ---
print("1. Starting CPU (NumPy) sort...")

# Create the array on the CPU
cpu_array = np.random.randint(0, 1000, size=ARRAY_SIZE, dtype=np.int32)

start_time_cpu = time.time()
# Perform the sort on the CPU
np.sort(cpu_array)
end_time_cpu = time.time()

cpu_time = end_time_cpu - start_time_cpu
print(f"   CPU Sorting Time: {cpu_time:.4f} seconds.")
# Optional: Clear the array to save memory
del cpu_array

# 2. --- GPU (CuPy) Sorting ---
print("\n2. Starting GPU (CuPy) sort...")

# Create the array directly on the GPU
# CuPy's random functions generate data on the GPU memory
gpu_array = cp.random.randint(0, 1000, size=ARRAY_SIZE, dtype=cp.int32)

start_time_gpu = time.time()
# Perform the sort on the GPU
cp.sort(gpu_array)
# IMPORTANT: Synchronize the stream. GPU operations are asynchronous.
# This line ensures the sorting is fully completed before measuring the time.
cp.cuda.Stream.null.synchronize()
end_time_gpu = time.time()

gpu_time = end_time_gpu - start_time_gpu
print(f"   GPU Sorting Time: {gpu_time:.4f} seconds.")
# Optional: Clear the array to save memory
del gpu_array

# --- Results Comparison ---
print("-" * 40)
if cpu_time > 0 and gpu_time > 0:
    speedup = cpu_time / gpu_time
    print(f"✨ **Speedup: GPU is {speedup:.2f}x faster than CPU**")
else:
    print("Cannot calculate speedup.")

✅ CuPy successfully initialized and using GPU device 0.
Array size for sorting: 800,000,000 elements.
----------------------------------------
1. Starting CPU (NumPy) sort...
   CPU Sorting Time: 21.0971 seconds.

2. Starting GPU (CuPy) sort...
   GPU Sorting Time: 2.2332 seconds.
----------------------------------------
✨ **Speedup: GPU is 9.45x faster than CPU**


In [None]:
while True:
    print("\nHi")