From 9a6f20bee9dd5ec4916b056e507ba15df01a1c5d Mon Sep 17 00:00:00 2001 From: Frank Liu Date: Fri, 2 Jun 2023 14:36:23 -0700 Subject: [PATCH] [python] Fixes device id mismatch issue for mutlple GPU case (#800) --- engines/python/setup/djl_python/huggingface.py | 8 +++----- .../src/main/java/ai/djl/python/engine/Connection.java | 5 ++++- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/engines/python/setup/djl_python/huggingface.py b/engines/python/setup/djl_python/huggingface.py index 138f02d37..d8628b46c 100644 --- a/engines/python/setup/djl_python/huggingface.py +++ b/engines/python/setup/djl_python/huggingface.py @@ -119,7 +119,6 @@ def initialize(self, properties: dict): self.hf_pipeline = self.get_pipeline(task=task, model_id_or_path=model_id_or_path, - device=self.device_id, kwargs=kwargs) self.initialized = True @@ -158,8 +157,7 @@ def inference(self, inputs): return outputs - def get_pipeline(self, task: str, device: int, model_id_or_path: str, - kwargs): + def get_pipeline(self, task: str, model_id_or_path: str, kwargs): # define tokenizer or feature extractor as kwargs to load it the pipeline correctly if task in { "automatic-speech-recognition", @@ -186,7 +184,7 @@ def get_pipeline(self, task: str, device: int, model_id_or_path: str, else: hf_pipeline = pipeline(task=task, model=model_id_or_path, - device=device, + device=self.device_id, **kwargs) else: tokenizer = AutoTokenizer.from_pretrained(model_id_or_path) @@ -250,7 +248,7 @@ def wrapped_pipeline(inputs, *args, **kwargs): tokenizer = hf_pipeline.tokenizer input_tokens = tokenizer(inputs, padding=True, return_tensors="pt") if self.device_id >= 0: - input_tokens.to(torch.cuda.current_device()) + input_tokens.to(f"cuda:{self.device_id}") with torch.no_grad(): output_tokens = model.generate( *args, diff --git a/engines/python/src/main/java/ai/djl/python/engine/Connection.java b/engines/python/src/main/java/ai/djl/python/engine/Connection.java index c63fd1497..0f17e3e0e 100644 --- a/engines/python/src/main/java/ai/djl/python/engine/Connection.java +++ b/engines/python/src/main/java/ai/djl/python/engine/Connection.java @@ -151,12 +151,15 @@ static String[] getPythonStartCmd(PyEnv pyEnv, Model model, int workerId, int po // TP settings Device device = model.getNDManager().getDevice(); + String deviceId = String.valueOf(device.getDeviceId()); if (tensorParallelDegree > 0 && device.isGpu()) { + deviceId = "0"; String cudaDevices = getVisibleDevices(device.getDeviceId(), tensorParallelDegree); pyEnv.addEnv("CUDA_VISIBLE_DEVICES", cudaDevices); logger.info("Set CUDA_VISIBLE_DEVICES={}", cudaDevices); } if ("nc".equals(device.getDeviceType())) { + deviceId = "0"; String visibleCores; if (tensorParallelDegree > 0) { visibleCores = getNeuronVisibleCores(device.getDeviceId(), tensorParallelDegree); @@ -179,7 +182,7 @@ static String[] getPythonStartCmd(PyEnv pyEnv, Model model, int workerId, int po args[8] = "--entry-point"; args[9] = pyEnv.getEntryPoint(); args[10] = "--device-id"; - args[11] = String.valueOf(device.getDeviceId()); + args[11] = deviceId; return args; }