Skip to content

Commit

Permalink
[python] Support non-gpu models for huggingface (deepjavalibrary#772)
Browse files Browse the repository at this point in the history
  • Loading branch information
frankfliu authored and KexinFeng committed Aug 16, 2023
1 parent b7fab1e commit 97594a4
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 21 deletions.
22 changes: 12 additions & 10 deletions engines/python/setup/djl_python/encode_decode.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,35 +51,37 @@ def encode_csv(content): # type: (str) -> np.array


def decode(inputs: Input, content_type: str):
if not content_type or content_type == "application/json":
if not content_type or "application/json" in content_type:
return inputs.get_as_json()
elif content_type == "text/csv":
elif "text/csv" in content_type:
return decode_csv(inputs)
elif content_type == "text/plain":
elif "text/plain" in content_type:
return {"inputs": [inputs.get_as_string()]}
if content_type.startswith("image/"):
return {"inputs": inputs.get_as_image()}
elif content_type.startswith("audio/"):
return {"inputs": inputs.get_as_bytes()}
elif content_type == "tensor/npz":
elif "tensor/npz" in content_type:
return {"inputs": inputs.get_as_npz()}
elif content_type in {"tensor/ndlist", "application/x-npy"}:
return {"inputs": inputs.get_as_numpy()}
elif content_type == "application/x-www-form-urlencoded":
return {"inputs": inputs.get_as_string()}
else:
# "application/octet-stream"
return {"inputs": inputs.get_as_bytes()}


def encode(outputs: Output, prediction, content_type: str):
if content_type == "application/json":
if not content_type or "application/json" in content_type:
outputs.add_as_json(prediction)
outputs.add_property("Content-Type", content_type)
elif content_type == "text/csv":
outputs.add_property("Content-Type", "application/json")
elif "text/csv" in content_type:
outputs.add_as_string(encode_csv(prediction))
outputs.add_property("Content-Type", content_type)
elif content_type == "tensor/npz":
outputs.add_property("Content-Type", "text/csv")
elif "tensor/npz" in content_type:
outputs.add_as_npz(prediction)
outputs.add_property("Content-Type", content_type)
outputs.add_property("Content-Type", "tensor/npz")
else:
outputs.add_as_numpy(prediction)
outputs.add_property("Content-Type", "tensor/ndlist")
20 changes: 9 additions & 11 deletions engines/python/setup/djl_python/huggingface.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,7 @@
# BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or implied. See the License for
# the specific language governing permissions and limitations under the License.

import json
import logging
import os

import torch
from transformers import pipeline, Conversation, AutoModelForCausalLM, AutoModelForSeq2SeqLM, AutoTokenizer, AutoConfig
Expand Down Expand Up @@ -64,6 +62,7 @@ def __init__(self):
self.initialized = False
self.enable_streaming = False
self.model = None
self.device_id = -1
self.tokenizer = None

def initialize(self, properties: dict):
Expand All @@ -72,7 +71,7 @@ def initialize(self, properties: dict):
# Otherwise we assume model artifacts are in the model_dir
model_id_or_path = properties.get("model_id") or properties.get(
"model_dir")
device_id = int(properties.get("device_id", "-1"))
self.device_id = int(properties.get("device_id", "-1"))
task = properties.get("task")
tp_degree = int(properties.get("tensor_parallel_degree", "-1"))
self.enable_streaming = properties.get("enable_streaming",
Expand All @@ -83,7 +82,7 @@ def initialize(self, properties: dict):
if "device_map" in properties:
kwargs["device_map"] = properties.get("device_map")
logging.info(f"Using device map {kwargs['device_map']}")
elif tp_degree > 0:
elif tp_degree > 0 and torch.cuda.device_count() > 0:
kwargs["device_map"] = "auto"
world_size = torch.cuda.device_count()
assert world_size == tp_degree, f"TP degree ({tp_degree}) doesn't match available GPUs ({world_size})"
Expand All @@ -109,7 +108,7 @@ def initialize(self, properties: dict):

self.hf_pipeline = self.get_pipeline(task=task,
model_id_or_path=model_id_or_path,
device=device_id,
device=self.device_id,
kwargs=kwargs)

self.initialized = True
Expand All @@ -121,7 +120,7 @@ def inference(self, inputs):
if not accept:
accept = content_type if content_type.startswith(
"tensor/") else "application/json"
elif accept == "*/*":
elif "*/*" in accept:
accept = "application/json"

input_map = decode(inputs, content_type)
Expand Down Expand Up @@ -229,15 +228,14 @@ def wrapped_pipeline(inputs, *args, **kwargs):

return wrapped_pipeline

@staticmethod
def wrap_text_generation_pipeline(hf_pipeline):
def wrap_text_generation_pipeline(self, hf_pipeline):

def wrapped_pipeline(inputs, *args, **kwargs):
model = hf_pipeline.model
tokenizer = hf_pipeline.tokenizer
input_tokens = tokenizer(inputs, padding=True,
return_tensors="pt").to(
torch.cuda.current_device())
input_tokens = tokenizer(inputs, padding=True, return_tensors="pt")
if self.device_id >= 0:
input_tokens.to(torch.cuda.current_device())
with torch.no_grad():
output_tokens = model.generate(
*args,
Expand Down

0 comments on commit 97594a4

Please sign in to comment.