Skip to content

Commit

Permalink
int4 model
Browse files Browse the repository at this point in the history
  • Loading branch information
nomagick committed Jul 12, 2023
1 parent db05cf7 commit 42b2647
Show file tree
Hide file tree
Showing 5 changed files with 23 additions and 6 deletions.
17 changes: 17 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# The .dockerignore file excludes files from the container build process.
#
# https://docs.docker.com/engine/reference/builder/#dockerignore-file

# Exclude Git files
.git
.github
.gitignore

# Exclude Python cache files
__pycache__
.mypy_cache
.pytest_cache
.ruff_cache

# Exclude Python virtual environment
/venv
6 changes: 3 additions & 3 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
[submodule "chatglm2-6b"]
path = chatglm2-6b
url = https://huggingface.co/THUDM/chatglm2-6b
[submodule "chatglm2-6b-int4"]
path = chatglm2-6b-int4
url = https://huggingface.co/THUDM/chatglm2-6b-int4
branch = main
1 change: 0 additions & 1 deletion chatglm2-6b
Submodule chatglm2-6b deleted from 162b62
1 change: 1 addition & 0 deletions chatglm2-6b-int4
Submodule chatglm2-6b-int4 added at eb3e68
4 changes: 2 additions & 2 deletions predict.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,10 @@ class Predictor(BasePredictor):
def setup(self):
"""Load the model into memory to make running multiple predictions efficient"""
self.tokenizer = AutoTokenizer.from_pretrained(
"./chatglm2-6b", trust_remote_code=True, local_files_only=True
"./chatglm2-6b-int4", trust_remote_code=True, local_files_only=True
)
model = AutoModel.from_pretrained(
"./chatglm2-6b", trust_remote_code=True, local_files_only=True
"./chatglm2-6b-int4", trust_remote_code=True, local_files_only=True
).cuda()
patch_chat_glm.patch(model)
self.model = model.eval()
Expand Down

0 comments on commit 42b2647

Please sign in to comment.