diff --git a/engines/python/setup/djl_python/deepspeed.py b/engines/python/setup/djl_python/deepspeed.py index 97501b704..29fdba6b4 100644 --- a/engines/python/setup/djl_python/deepspeed.py +++ b/engines/python/setup/djl_python/deepspeed.py @@ -324,8 +324,8 @@ def inference(self, inputs: Input): "DeepSpeed") device = torch.cuda.current_device() outputs.add_stream_content( - stream_generator(self.model, self.tokenizer, device, - input_data, **model_kwargs)) + stream_generator(self.model, self.tokenizer, input_data, + device, **model_kwargs)) return outputs if self.task == "text-generation": tokenized_inputs = self.tokenizer(