From 59541c3c7b8cf66428a5beeb3f3c0ee72c295afd Mon Sep 17 00:00:00 2001 From: Xin Yang <105740670+xyang16@users.noreply.github.com> Date: Tue, 6 Jun 2023 23:25:48 -0700 Subject: [PATCH] Fix input_data and device order for streaming (#809) --- engines/python/setup/djl_python/deepspeed.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/engines/python/setup/djl_python/deepspeed.py b/engines/python/setup/djl_python/deepspeed.py index 97501b704..29fdba6b4 100644 --- a/engines/python/setup/djl_python/deepspeed.py +++ b/engines/python/setup/djl_python/deepspeed.py @@ -324,8 +324,8 @@ def inference(self, inputs: Input): "DeepSpeed") device = torch.cuda.current_device() outputs.add_stream_content( - stream_generator(self.model, self.tokenizer, device, - input_data, **model_kwargs)) + stream_generator(self.model, self.tokenizer, input_data, + device, **model_kwargs)) return outputs if self.task == "text-generation": tokenized_inputs = self.tokenizer(