[python] Fixes device id mismatch issue for mutlple GPU case (deepjav…

…alibrary#800)
KexinFeng · Aug 16, 2023 · 9a6f20b · 9a6f20b
1 parent 855ea9f
commit 9a6f20b
Show file tree

Hide file tree

Showing 2 changed files with 7 additions and 6 deletions.
diff --git a/engines/python/setup/djl_python/huggingface.py b/engines/python/setup/djl_python/huggingface.py
@@ -119,7 +119,6 @@ def initialize(self, properties: dict):
 
         self.hf_pipeline = self.get_pipeline(task=task,
                                              model_id_or_path=model_id_or_path,
-                                             device=self.device_id,
                                              kwargs=kwargs)
 
         self.initialized = True
@@ -158,8 +157,7 @@ def inference(self, inputs):
 
         return outputs
 
-    def get_pipeline(self, task: str, device: int, model_id_or_path: str,
-                     kwargs):
+    def get_pipeline(self, task: str, model_id_or_path: str, kwargs):
         # define tokenizer or feature extractor as kwargs to load it the pipeline correctly
         if task in {
                 "automatic-speech-recognition",
@@ -186,7 +184,7 @@ def get_pipeline(self, task: str, device: int, model_id_or_path: str,
             else:
                 hf_pipeline = pipeline(task=task,
                                        model=model_id_or_path,
-                                       device=device,
+                                       device=self.device_id,
                                        **kwargs)
         else:
             tokenizer = AutoTokenizer.from_pretrained(model_id_or_path)
@@ -250,7 +248,7 @@ def wrapped_pipeline(inputs, *args, **kwargs):
             tokenizer = hf_pipeline.tokenizer
             input_tokens = tokenizer(inputs, padding=True, return_tensors="pt")
             if self.device_id >= 0:
-                input_tokens.to(torch.cuda.current_device())
+                input_tokens.to(f"cuda:{self.device_id}")
             with torch.no_grad():
                 output_tokens = model.generate(
                     *args,

diff --git a/engines/python/src/main/java/ai/djl/python/engine/Connection.java b/engines/python/src/main/java/ai/djl/python/engine/Connection.java
@@ -151,12 +151,15 @@ static String[] getPythonStartCmd(PyEnv pyEnv, Model model, int workerId, int po
 
         // TP settings
         Device device = model.getNDManager().getDevice();
+        String deviceId = String.valueOf(device.getDeviceId());
         if (tensorParallelDegree > 0 && device.isGpu()) {
+            deviceId = "0";
             String cudaDevices = getVisibleDevices(device.getDeviceId(), tensorParallelDegree);
             pyEnv.addEnv("CUDA_VISIBLE_DEVICES", cudaDevices);
             logger.info("Set CUDA_VISIBLE_DEVICES={}", cudaDevices);
         }
         if ("nc".equals(device.getDeviceType())) {
+            deviceId = "0";
             String visibleCores;
             if (tensorParallelDegree > 0) {
                 visibleCores = getNeuronVisibleCores(device.getDeviceId(), tensorParallelDegree);
@@ -179,7 +182,7 @@ static String[] getPythonStartCmd(PyEnv pyEnv, Model model, int workerId, int po
         args[8] = "--entry-point";
         args[9] = pyEnv.getEntryPoint();
         args[10] = "--device-id";
-        args[11] = String.valueOf(device.getDeviceId());
+        args[11] = deviceId;
         return args;
     }