Enable GPU/CUDA support.

Tim-Salzmann · Jul 10, 2023 · 421de6e · 421de6e
1 parent 9293040
commit 421de6e
Show file tree

Hide file tree

Showing 4 changed files with 31 additions and 8 deletions.
diff --git a/README.md b/README.md
@@ -22,15 +22,30 @@ Tested on Ubuntu 20.04 and MacOS.
 ### Prerequisites
 Python 3.9 or higher.
 gcc, make and cmake.
+
+### CPU Installation
 CPU only installation of `PyTorch==2.0.0`. CPU versions `>2.0.0` might work too.
 
 Install L4CasADi via `pip install .`
 
+### GPU (CUDA) Installation
+CUDA installation of `PyTorch==2.0.0`.
+
+Install L4CasADi via `USE_CUDA=TRUE CUDACXX=<PATH_TO_nvcc> pip install .`
+
+### Mac M1 - ARM
 On MacOS with M1 chip you will have to compile [tera_renderer](https://github.com/acados/tera_renderer) from source
 and place the binary in `l4casadi/template_generation/bin`. For other platforms it will be downloaded automatically.
 
 ## Example
-https://github.com/Tim-Salzmann/l4casadi/blob/5edbe4b31d915c6d897608f183d06c53eaf14f63/examples/readme.py#L28-L40
+https://github.com/Tim-Salzmann/l4casadi/blob/d800ca7bea785a22b05c64f4aecd7554ba5093d3/examples/readme.py#L28-L40
+
+Please note that only `casadi.MX` symbolic variables are supported as input.
+
+Multi-input multi-output functions can be realized by concatenating the symbolic inputs when passing to the model and
+splitting them inside the PyTorch function.
+
+To use GPU (CUDA) simply pass `device="cuda"` to the `L4CasADi` constructor.
 
 An example of solving a simple NLP with torch system model can be found in
 [examples/simple_nlp.py](/examples/simple_nlp.py).
@@ -56,6 +71,5 @@ please get in contact or create a pull request.
 
 Possible upcoming features include:
 ```
-- GPU support.
-- Multi input, multi output functions.
+- Explicit multi input, multi output functions.
 ```
diff --git a/examples/readme.py b/examples/readme.py
@@ -25,7 +25,7 @@ def forward(self, x):
 
 
 pyTorch_model = MultiLayerPerceptron()
-l4c_model = l4c.L4CasADi(pyTorch_model, has_batch=True)
+l4c_model = l4c.L4CasADi(pyTorch_model, has_batch=True, device='cpu')  # device='cuda' for GPU
 
 x_sym = cs.MX.sym('x', 2, 1)
 y_sym = l4c_model(x_sym)

diff --git a/l4casadi/l4casadi.py b/l4casadi/l4casadi.py
@@ -18,7 +18,7 @@ def __init__(self, model: Callable[[torch.Tensor], torch.Tensor],
                  has_batch: bool = False, device: Union[torch.device, Text] = "cpu", name: Text = "l4casadi_f"):
         self.model = model
         if isinstance(self.model, torch.nn.Module):
-            self.model.eval()
+            self.model.eval().to(device)
             for parameters in self.model.parameters():
                 parameters.requires_grad = False
         self.name = name
@@ -66,10 +66,10 @@ def get_ready(self, inp: Union[cs.MX, cs.SX, cs.DM]):
 
     def generate_cpp_function_template(self, rows: int, cols: int, has_jac: bool, has_hess: bool):
         if self.has_batch:
-            rows_out = self.model(torch.zeros(1, rows)).shape[-1]
+            rows_out = self.model(torch.zeros(1, rows).to(self.device)).shape[-1]
             cols_out = 1
         else:
-            out_shape = self.model(torch.zeros(rows, cols)).shape
+            out_shape = self.model(torch.zeros(rows, cols).to(self.device)).shape
             if len(out_shape) == 1:
                 rows_out = out_shape[0]
                 cols_out = 1
@@ -121,6 +121,7 @@ def export_torch_traces(self, rows: int, cols: int) -> Tuple[bool, bool]:
             d_inp = torch.zeros((1, rows))
         else:
             d_inp = torch.zeros((rows, cols))
+        d_inp = d_inp.to(self.device)
 
         out_folder = self.generation_path
 

diff --git a/libl4casadi/CMakeLists.txt b/libl4casadi/CMakeLists.txt
@@ -4,6 +4,7 @@ project(L4CasADi)
 set(LIBTORCH_OSX_M1 https://github.com/mlverse/libtorch-mac-m1/releases/download/LibTorchOpenMP/libtorch-v2.0.0.zip)
 set(LIBTORCH_OSX https://download.pytorch.org/libtorch/cpu/libtorch-macos-2.0.0.zip)
 set(LIBTORCH_linux https://download.pytorch.org/libtorch/cpu/libtorch-shared-with-deps-2.0.0%2Bcpu.zip)
+set(LIBTORCH_linux_cuda https://download.pytorch.org/libtorch/cu118/libtorch-shared-with-deps-2.0.0%2Bcu118.zip)
 
 function(download_and_extract url name)
     if (EXISTS ${CMAKE_SOURCE_DIR}/${name})
@@ -28,7 +29,12 @@ if (APPLE)
     endif()
 else()
     message("Detected Linux")
-    download_and_extract(${LIBTORCH_linux} libtorch)
+    if ($ENV{USE_CUDA})
+        message("Compiling with Cuda.")
+        download_and_extract(${LIBTORCH_linux_cuda} libtorch)
+    else()
+        download_and_extract(${LIBTORCH_linux} libtorch)
+    endif()
 endif()
 
 set(CMAKE_PREFIX_PATH libtorch)
@@ -38,7 +44,9 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}")
 
 add_library(l4casadi SHARED src/l4casadi.cpp include/l4casadi.hpp)
 target_include_directories(l4casadi PRIVATE include)
+target_include_directories(l4casadi PUBLIC ${TORCH_INCLUDE_DIRS})
 target_link_libraries(l4casadi ${TORCH_LIBRARIES})
 set_property(TARGET l4casadi PROPERTY CXX_STANDARD 17)
+#set_property(TARGET l4casadi PROPERTY CUDA_STANDARD 17)
 
 install(TARGETS l4casadi LIBRARY DESTINATION l4casadi)