In [1]:
!pip install pycuda

Collecting pycuda
  Downloading pycuda-2024.1.2.tar.gz (1.7 MB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.7 MB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.3/1.7 MB[0m [31m10.4 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m1.7/1.7 MB[0m [31m25.3 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m18.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting pytools>=2011.2 (from pycuda)
  Downloading pytools-2024.1.20-py3-none-any.whl.metadata (2.9 kB)
Collecting mako (from pycuda)
  Downloading Mako-1.3.8-py3-none-any.whl.metadata (2.9 kB)
Downloading pytools-2024.1.20-py3-none-any.whl (91 kB)
[2K   [

In [2]:
import numpy as np
import pycuda.autoinit
import pycuda.driver as cuda
from pycuda.compiler import SourceModule

# Define CUDA kernel
cuda_code = """
__global__ void matrix_multiply(float *A, float *B, float *C, int N) {
    int row = blockIdx.y * blockDim.y + threadIdx.y;
    int col = blockIdx.x * blockDim.x + threadIdx.x;

    if (row < N && col < N) {
        float sum = 0.0f;
        for (int k = 0; k < N; k++) {
            sum += A[row * N + k] * B[k * N + col];
        }
        C[row * N + col] = sum;
    }
}
"""

class CUDAActuator:
    def __init__(self):
        # Compile CUDA kernel
        self.mod = SourceModule(cuda_code)
        self.matrix_multiply = self.mod.get_function("matrix_multiply")

    def multiply_matrices(self, A, B):
        N = A.shape[0]
        C = np.zeros((N, N), dtype=np.float32)

        # Allocate memory on GPU
        A_gpu = cuda.mem_alloc(A.nbytes)
        B_gpu = cuda.mem_alloc(B.nbytes)
        C_gpu = cuda.mem_alloc(C.nbytes)

        # Copy data to GPU
        cuda.memcpy_htod(A_gpu, A)
        cuda.memcpy_htod(B_gpu, B)

        # Define block and grid dimensions
        block_dim = (16, 16, 1)
        grid_dim = ((N + 15) // 16, (N + 15) // 16, 1)

        # Execute kernel
        self.matrix_multiply(A_gpu, B_gpu, C_gpu, np.int32(N),
                           block=block_dim, grid=grid_dim)

        # Copy result back to host
        cuda.memcpy_dtoh(C, C_gpu)
        return C


In [3]:
# Initialize actuator with CUDA support
actuator = CUDAActuator()

# Create sample matrices
A = np.random.rand(4, 4).astype(np.float32)
B = np.random.rand(4, 4).astype(np.float32)
print(A)
print(B)

# Perform matrix multiplication on CPU
h_result = np.matmul(A, B)
print("h_result:")
print(h_result)

# Perform matrix multiplication on GPU
d_result = actuator.multiply_matrices(A, B)
print("d_result:")
print(d_result)

[[0.05291    0.97733176 0.5361534  0.61798185]
 [0.24420781 0.5090192  0.7932272  0.16055483]
 [0.8315346  0.49540794 0.2815539  0.88492393]
 [0.18592694 0.5563976  0.5870603  0.04864544]]
[[0.23241186 0.9898964  0.13904348 0.23646586]
 [0.7213201  0.28595626 0.29019594 0.72134197]
 [0.26875418 0.0760897  0.70181626 0.86252254]
 [0.6638231  0.66124505 0.6225403  0.14939651]]
h_result:
[[1.2715901  0.7812828  1.0519743  1.2722706 ]
 [0.7436857  0.55382013 0.8383224  1.1330864 ]
 [1.2137079  1.5713731  1.0078847  0.92903924]
 [0.6346193  0.4199896  0.62960845 0.9589385 ]]
d_result:
[[1.2715901  0.7812828  1.0519743  1.2722706 ]
 [0.7436857  0.55382013 0.8383224  1.1330864 ]
 [1.2137079  1.5713731  1.0078847  0.92903924]
 [0.6346193  0.4199896  0.62960845 0.9589385 ]]


In [4]:
import torch

def check_gpu_usage():
    if torch.cuda.is_available():
        print('Memory Usage:')
        print('Allocated:', f"{(torch.cuda.memory_allocated(0)/1024**3):.20f}", 'GB')
        print('Cached:', f"{(torch.cuda.memory_cached(0)/1024**3):.20f}", 'GB')

In [5]:
import numpy as np
from src.core import Actuator
import psutil

# Create actuator
actuator = Actuator(['z', [1., 0., 0.], 'z', [1., 0., 0.]])

# Test forward kinematics
actuator.angles = [np.pi / 6, np.pi / 3]
check_gpu_usage()
print(actuator.ee)
check_gpu_usage()

arm = Actuator(['z', [1., 0., 0.], 'z', [1., 0., 0.]])
arm.ee = [2 / np.sqrt(2), 2 / np.sqrt(2), 0.]
print(np.round(np.rad2deg(arm.angles)))




ModuleNotFoundError: No module named 'src'