Skip to content

Commit

Permalink
[python] Fixes device id mismatch issue for mutlple GPU case (deepjav…
Browse files Browse the repository at this point in the history
  • Loading branch information
frankfliu authored and KexinFeng committed Aug 16, 2023
1 parent 855ea9f commit 9a6f20b
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 6 deletions.
8 changes: 3 additions & 5 deletions engines/python/setup/djl_python/huggingface.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,6 @@ def initialize(self, properties: dict):

self.hf_pipeline = self.get_pipeline(task=task,
model_id_or_path=model_id_or_path,
device=self.device_id,
kwargs=kwargs)

self.initialized = True
Expand Down Expand Up @@ -158,8 +157,7 @@ def inference(self, inputs):

return outputs

def get_pipeline(self, task: str, device: int, model_id_or_path: str,
kwargs):
def get_pipeline(self, task: str, model_id_or_path: str, kwargs):
# define tokenizer or feature extractor as kwargs to load it the pipeline correctly
if task in {
"automatic-speech-recognition",
Expand All @@ -186,7 +184,7 @@ def get_pipeline(self, task: str, device: int, model_id_or_path: str,
else:
hf_pipeline = pipeline(task=task,
model=model_id_or_path,
device=device,
device=self.device_id,
**kwargs)
else:
tokenizer = AutoTokenizer.from_pretrained(model_id_or_path)
Expand Down Expand Up @@ -250,7 +248,7 @@ def wrapped_pipeline(inputs, *args, **kwargs):
tokenizer = hf_pipeline.tokenizer
input_tokens = tokenizer(inputs, padding=True, return_tensors="pt")
if self.device_id >= 0:
input_tokens.to(torch.cuda.current_device())
input_tokens.to(f"cuda:{self.device_id}")
with torch.no_grad():
output_tokens = model.generate(
*args,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -151,12 +151,15 @@ static String[] getPythonStartCmd(PyEnv pyEnv, Model model, int workerId, int po

// TP settings
Device device = model.getNDManager().getDevice();
String deviceId = String.valueOf(device.getDeviceId());
if (tensorParallelDegree > 0 && device.isGpu()) {
deviceId = "0";
String cudaDevices = getVisibleDevices(device.getDeviceId(), tensorParallelDegree);
pyEnv.addEnv("CUDA_VISIBLE_DEVICES", cudaDevices);
logger.info("Set CUDA_VISIBLE_DEVICES={}", cudaDevices);
}
if ("nc".equals(device.getDeviceType())) {
deviceId = "0";
String visibleCores;
if (tensorParallelDegree > 0) {
visibleCores = getNeuronVisibleCores(device.getDeviceId(), tensorParallelDegree);
Expand All @@ -179,7 +182,7 @@ static String[] getPythonStartCmd(PyEnv pyEnv, Model model, int workerId, int po
args[8] = "--entry-point";
args[9] = pyEnv.getEntryPoint();
args[10] = "--device-id";
args[11] = String.valueOf(device.getDeviceId());
args[11] = deviceId;
return args;
}

Expand Down

0 comments on commit 9a6f20b

Please sign in to comment.