int4 model

THUDM · Jul 12, 2023 · 42b2647 · 42b2647
1 parent db05cf7
commit 42b2647
Show file tree

Hide file tree

Showing 5 changed files with 23 additions and 6 deletions.
diff --git a/.dockerignore b/.dockerignore
@@ -0,0 +1,17 @@
+# The .dockerignore file excludes files from the container build process.
+#
+# https://docs.docker.com/engine/reference/builder/#dockerignore-file
+
+# Exclude Git files
+.git
+.github
+.gitignore
+
+# Exclude Python cache files
+__pycache__
+.mypy_cache
+.pytest_cache
+.ruff_cache
+
+# Exclude Python virtual environment
+/venv
diff --git a/.gitmodules b/.gitmodules
@@ -1,4 +1,4 @@
-[submodule "chatglm2-6b"]
-	path = chatglm2-6b
-	url = https://huggingface.co/THUDM/chatglm2-6b
+[submodule "chatglm2-6b-int4"]
+	path = chatglm2-6b-int4
+	url = https://huggingface.co/THUDM/chatglm2-6b-int4
 	branch = main
diff --git a/chatglm2-6b b/chatglm2-6b
diff --git a/chatglm2-6b-int4 b/chatglm2-6b-int4
diff --git a/predict.py b/predict.py
@@ -11,10 +11,10 @@ class Predictor(BasePredictor):
     def setup(self):
         """Load the model into memory to make running multiple predictions efficient"""
         self.tokenizer = AutoTokenizer.from_pretrained(
-            "./chatglm2-6b", trust_remote_code=True, local_files_only=True
+            "./chatglm2-6b-int4", trust_remote_code=True, local_files_only=True
         )
         model = AutoModel.from_pretrained(
-            "./chatglm2-6b", trust_remote_code=True, local_files_only=True
+            "./chatglm2-6b-int4", trust_remote_code=True, local_files_only=True
         ).cuda()
         patch_chat_glm.patch(model)
         self.model = model.eval()