### Numpy code

In [1]:
import numpy as np

# Example: Large matrices (adjust size as needed)
n = 7000  # For very large matrices, ensure you have enough RAM
A = np.random.rand(n, n).astype(np.float32)
B = np.random.rand(n, n).astype(np.float32)

C = np.dot(A, B)  # warm-up and Matrix multiplication

%timeit -r 2 -o np.dot(A, B)

print(f"Result shape: {C.shape}")
print(f"Result type: {C.dtype}")


1.06 s ± 608 μs per loop (mean ± std. dev. of 2 runs, 1 loop each)
Result shape: (7000, 7000)
Result type: float32


In [None]:
import torch

# Tamaño de la matriz
n = 7000

# Selección de dispositivo
# En bohr usará GPU (cuda), en local caerá en CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# Crear matrices aleatorias en simple precisión
A = torch.rand((n, n), dtype=torch.float32, device=device)
B = torch.rand((n, n), dtype=torch.float32, device=device)

# Warm-up
C = torch.matmul(A, B)
torch.cuda.synchronize()

# Medición de tiempo
%timeit -r 2 -o torch.matmul(A, B); torch.cuda.synchronize()

print(f"Result shape: {C.shape}")
print(f"Result type: {C.dtype}")

Resultado de la multiplicación de matrices usando código original y Pytorch, ejecutando en cola Bohr:

662 ms ± 1.36 ms per loop (mean ± std. dev. of 2 runs, 1 loop each)
Result shape: (7000, 7000)
Result type: float32


Using device: cuda
49.9 ms ± 91 μs per loop (mean ± std. dev. of 2 runs, 10 loops each)
Result shape: torch.Size([7000, 7000])
Result type: torch.float32
Tiempo de multiplicación: 49.404 ms
Resultado shape: torch.Size([7000, 7000]), dtype: torch.float32