Stability-AI · mkshing · Sep 30, 2023 · Aug 26, 2023 · Sep 10, 2023 · Sep 10, 2023
@@ -178,6 +178,7 @@ def _squad_agg(self, key, item):
         predictions, references = zip(*item)
         return self._squad_metric(predictions=predictions, references=references)[key]
 
+
 class JSQuADWithFintanPrompt(JSQuAD):
     """
     prompt template is taken from [ChatGPT vs BERT: どちらが日本語をより理解できるのか?](https://fintan.jp/page/9126/)
@@ -195,7 +196,28 @@ def doc_to_text(self, doc):
             + f"{self.SEP}"
             + "回答:"
         )
-
+
+
+class JSQuADWithFintanPromptV12(JSQuADWithFintanPrompt):
+    """
+    prompt template is taken from [ChatGPT vs BERT: どちらが日本語をより理解できるのか?](https://fintan.jp/page/9126/)
+    """
+    VERSION = 1.2
+    DESCRIPTION = "質問に対する回答を題名と文章から一言で抽出してください。回答は名詞で答えてください。\n\n"
+    def doc_to_text(self, doc):
+        return (
+            "題名:"
+            + doc["title"]
+            + f"{self.SEP}"
+            + "文章:"
+            + doc["context"].split("[SEP]")[-1].strip()
+            + f"{self.SEP}"
+            + "質問:"
+            + doc["question"]
+            + f"{self.SEP}"
+            + "回答:"
+        )
+
 
 class JSQuADWithJAAlpacaPrompt(JSQuAD):
     """
@@ -231,6 +253,39 @@ def doc_to_text(self, doc):
         return f"### 指示:\n{self.INSTRUCTION}\n\n### 入力:\n{input_text}\n\n### 応答:\n"
 
 
+class JSQuADWithJAAlpacaPromptV12(JSQuADWithJAAlpacaPrompt):
+    """
+    This prompt format was inspired by the below data in fujiki/japanese_alpaca_data. 
+    ```
+    {
+        'instruction': '与えられた文脈に最も適した文を選択してください。', 
+        'input': '文脈：あなたは親友と現在の仕事の状況について話しています。\nA）私にはあまり選択肢がありません。\nB）他に選択肢がありません。\nC）私には本当に決断する必要がありません。', 
+        'output': 'A) 私には多くの選択肢がありません。'
+    }
+    ```
+    Reference:
+    - data: https://huggingface.co/datasets/fujiki/japanese_alpaca_data
+    - code: https://github.com/Stability-AI/gpt-neox/blob/c130a4edc1120dccec8f02a34eb60d3e8f484cd3/finetune/finetune_base_ja.py#LL118C23-L127C11
+    """
+    VERSION = 1.2
+    def doc_to_text(self, doc):
+        """
+        以下は、タスクを説明する指示と、文脈のある入力の組み合わせです。要求を適切に満たす応答を書きなさい。
+
+        ### 指示: 
+        {instruction}
+
+        ### 入力: 
+        {input}
+
+        ### 応答: 
+        {response}
+        """
+        input_text = f"文脈：{doc['title']}\n{doc['context'].split('[SEP]')[-1].strip()}\n質問：{doc['question']}"
+        return f"### 指示:\n{self.INSTRUCTION}\n\n### 入力:\n{input_text}\n\n### 応答:\n"
+
+
+
 class JSQuADWithRinnaInstructionSFT(JSQuAD):
     """
     Reference:
@@ -243,10 +298,22 @@ class JSQuADWithRinnaInstructionSFT(JSQuAD):
 
     def doc_to_text(self, doc):
         input_text = f"文脈：{doc['context'].split('[SEP]')[-1].strip()}{self.SEP}質問：{doc['question']}"
-        # input_text = f"質問：{doc['question']}<NL>文脈：{doc['context'].split('[SEP]')[-1].strip()}"
         return f"ユーザー: {input_text}{self.SEP}システム: "
 
 
+class JSQuADWithRinnaInstructionSFTV12(JSQuADWithRinnaInstructionSFT):
+    """
+    Reference:
+    - HF Hub: https://huggingface.co/rinna/japanese-gpt-neox-3.6b-instruction-sft
+    """
+    VERSION = 1.2
+
+    def doc_to_text(self, doc):
+        input_text = f"文脈：{doc['title']}{self.SEP}{doc['context'].split('[SEP]')[-1].strip()}{self.SEP}質問：{doc['question']}"
+        return f"ユーザー: {input_text}{self.SEP}システム: "
+
+
+
 class JSQuADWithRinnaBilingualInstructionSFT(JSQuADWithRinnaInstructionSFT):
     """
     Reference:
@@ -257,13 +324,29 @@ class JSQuADWithRinnaBilingualInstructionSFT(JSQuADWithRinnaInstructionSFT):
     SEP = "\n"
     FEWSHOT_SEP = "\n"
 
+
+class JSQuADWithRinnaBilingualInstructionSFTV12(JSQuADWithRinnaBilingualInstructionSFT):
+    """
+    Reference:
+    - HF Hub: https://huggingface.co/rinna/bilingual-gpt-neox-4b-instruction-sft
+    """
+    VERSION = 1.2
+
+    def doc_to_text(self, doc):
+        input_text = f"文脈：{doc['title']}{self.SEP}{doc['context'].split('[SEP]')[-1].strip()}{self.SEP}質問：{doc['question']}"
+        return f"ユーザー: {input_text}{self.SEP}システム: "
+
 
 VERSIONS = [
     JSQuAD,
     JSQuADWithFintanPrompt,
+    JSQuADWithFintanPromptV12,
     JSQuADWithJAAlpacaPrompt,
+    JSQuADWithJAAlpacaPromptV12,
     JSQuADWithRinnaInstructionSFT,
+    JSQuADWithRinnaInstructionSFTV12,
     JSQuADWithRinnaBilingualInstructionSFT,
+    JSQuADWithRinnaBilingualInstructionSFTV12
 ]
 
 

@@ -0,0 +1,3 @@
+MODEL_ARGS="pretrained=abeja/gpt-neox-japanese-2.7b,device_map=auto,torch_dtype=auto"
+TASK="jsquad-1.2-0.2"
+python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "3" --device "cuda" --output_path "models/abeja-gpt-neox-japanese-2.7b/result.jsquad-1.2.json"
@@ -0,0 +1,22 @@
+{
+  "results": {
+    "jsquad-1.2-0.2": {
+      "exact_match": 15.803692030616839,
+      "f1": 25.18326978234071
+    }
+  },
+  "versions": {
+    "jsquad-1.2-0.2": 1.2
+  },
+  "config": {
+    "model": "hf-causal",
+    "model_args": "pretrained=abeja/gpt-neox-japanese-2.7b,device_map=auto,torch_dtype=auto",
+    "num_fewshot": 3,
+    "batch_size": null,
+    "device": "cuda",
+    "no_cache": false,
+    "limit": null,
+    "bootstrap_iters": 100000,
+    "description_dict": {}
+  }
+}
@@ -0,0 +1,3 @@
+MODEL_ARGS="pretrained=cyberagent/open-calm-1b,device_map=auto,torch_dtype=auto"
+TASK="jsquad-1.2-0.2"
+python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "3" --device "cuda" --output_path "models/cyberagent-open-calm-1b/result.jsquad-1.2.json"
@@ -0,0 +1,22 @@
+{
+  "results": {
+    "jsquad-1.2-0.2": {
+      "exact_match": 39.53174245835209,
+      "f1": 49.49399460234075
+    }
+  },
+  "versions": {
+    "jsquad-1.2-0.2": 1.2
+  },
+  "config": {
+    "model": "hf-causal",
+    "model_args": "pretrained=cyberagent/open-calm-1b",
+    "num_fewshot": 3,
+    "batch_size": null,
+    "device": "cuda",
+    "no_cache": false,
+    "limit": null,
+    "bootstrap_iters": 100000,
+    "description_dict": {}
+  }
+}
@@ -0,0 +1,3 @@
+MODEL_ARGS="pretrained=cyberagent/open-calm-3b,device_map=auto,torch_dtype=auto"
+TASK="jsquad-1.2-0.2"
+python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2" --device "cuda" --output_path "models/cyberagent/cyberagent-open-calm-3b/result.jsquad-1.2.json"
@@ -0,0 +1,22 @@
+{
+  "results": {
+    "jsquad-1.2-0.2": {
+      "exact_match": 44.529491220171096,
+      "f1": 56.02141036867636
+    }
+  },
+  "versions": {
+    "jsquad-1.2-0.2": 1.2
+  },
+  "config": {
+    "model": "hf-causal",
+    "model_args": "pretrained=cyberagent/open-calm-3b,device_map=auto,torch_dtype=auto",
+    "num_fewshot": 2,
+    "batch_size": null,
+    "device": "cuda",
+    "no_cache": false,
+    "limit": null,
+    "bootstrap_iters": 100000,
+    "description_dict": {}
+  }
+}
@@ -0,0 +1,3 @@
+MODEL_ARGS="pretrained=cyberagent/open-calm-7b,device_map=auto,torch_dtype=auto"
+TASK="jsquad-1.2-0.2"
+python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2" --device "cuda" --output_path "models/cyberagent/cyberagent-open-calm-7b/result.jsquad-1.2.json"
@@ -0,0 +1,22 @@
+{
+  "results": {
+    "jsquad-1.2-0.2": {
+      "exact_match": 48.10895992796038,
+      "f1": 60.90961937230767
+    }
+  },
+  "versions": {
+    "jsquad-1.2-0.2": 1.2
+  },
+  "config": {
+    "model": "hf-causal",
+    "model_args": "pretrained=cyberagent/open-calm-7b,device_map=auto,torch_dtype=auto",
+    "num_fewshot": 2,
+    "batch_size": null,
+    "device": "cuda",
+    "no_cache": false,
+    "limit": null,
+    "bootstrap_iters": 100000,
+    "description_dict": {}
+  }
+}
@@ -0,0 +1,3 @@
+MODEL_ARGS="pretrained=cyberagent/open-calm-large,use_fast=True,device_map=auto,torch_dtype=auto"
+TASK="jsquad-1.2-0.2"
+python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "3" --device "cuda" --output_path "models/cyberagent-open-calm-large/result.jsquad-1.2.json"
@@ -0,0 +1,22 @@
+{
+  "results": {
+    "jsquad-1.2-0.2": {
+      "exact_match": 40.4997748761819,
+      "f1": 51.32160467436942
+    }
+  },
+  "versions": {
+    "jsquad-1.2-0.2": 1.2
+  },
+  "config": {
+    "model": "hf-causal",
+    "model_args": "pretrained=cyberagent/open-calm-large,use_fast=True,device_map=auto,torch_dtype=auto",
+    "num_fewshot": 3,
+    "batch_size": null,
+    "device": "cuda",
+    "no_cache": false,
+    "limit": null,
+    "bootstrap_iters": 100000,
+    "description_dict": {}
+  }
+}
@@ -0,0 +1,3 @@
+MODEL_ARGS="pretrained=cyberagent/open-calm-medium,use_fast=True,device_map=auto,torch_dtype=auto"
+TASK="jsquad-1.2-0.2"
+python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "3" --device "cuda" --output_path "models/cyberagent-open-calm-medium/result.jsquad-1.2.json"
@@ -0,0 +1,22 @@
+{
+  "results": {
+    "jsquad-1.2-0.2": {
+      "exact_match": 29.85141828005403,
+      "f1": 40.49655778214922
+    }
+  },
+  "versions": {
+    "jsquad-1.2-0.2": 1.2
+  },
+  "config": {
+    "model": "hf-causal",
+    "model_args": "pretrained=cyberagent/open-calm-medium,use_fast=True,device_map=auto,torch_dtype=auto",
+    "num_fewshot": 3,
+    "batch_size": null,
+    "device": "cuda",
+    "no_cache": false,
+    "limit": null,
+    "bootstrap_iters": 100000,
+    "description_dict": {}
+  }
+}
@@ -0,0 +1,4 @@
+MODEL_ARGS="pretrained=huggyllama/llama-7b,use_accelerate=True,load_in_8bit=True"
+TASK="jsquad-1.2-0.3"
+python main.py --model hf-causal-experimental --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2" --device "cuda" --output_path "models/llama/llama-7b/result.jsquad-1.2.json" --batch_size 2
+
@@ -0,0 +1,22 @@
+{
+  "results": {
+    "jsquad-1.2-0.3": {
+      "exact_match": 36.24493471409275,
+      "f1": 50.91625240527312
+    }
+  },
+  "versions": {
+    "jsquad-1.2-0.3": 1.2
+  },
+  "config": {
+    "model": "hf-causal-experimental",
+    "model_args": "pretrained=huggyllama/llama-7b,use_accelerate=True,load_in_8bit=True",
+    "num_fewshot": 2,
+    "batch_size": 2,
+    "device": "cuda",
+    "no_cache": false,
+    "limit": null,
+    "bootstrap_iters": 100000,
+    "description_dict": {}
+  }
+}
@@ -0,0 +1,4 @@
+MODEL_ARGS="pretrained=meta-llama/Llama-2-7b-hf,use_accelerate=True,dtype=auto"
+TASK="jsquad-1.2-0.3"
+python main.py --model hf-causal-experimental --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2" --device "cuda" --output_path "models/llama2/llama2-2.7b/result.jsquad-1.2.json" --batch_size 2
+
@@ -0,0 +1,22 @@
+{
+  "results": {
+    "jsquad-1.2-0.3": {
+      "exact_match": 59.92796037820801,
+      "f1": 70.8236875084182
+    }
+  },
+  "versions": {
+    "jsquad-1.2-0.3": 1.2
+  },
+  "config": {
+    "model": "hf-causal-experimental",
+    "model_args": "pretrained=meta-llama/Llama-2-7b-hf,use_accelerate=True,dtype=auto",
+    "num_fewshot": 2,
+    "batch_size": 2,
+    "device": "cuda",
+    "no_cache": false,
+    "limit": null,
+    "bootstrap_iters": 100000,
+    "description_dict": {}
+  }
+}
@@ -0,0 +1,3 @@
+MODEL_ARGS="pretrained=meta-llama/Llama-2-7b-chat-hf,use_accelerate=True,dtype=auto"
+TASK="jsquad-1.2-0.3"
+python main.py --model hf-causal-experimental --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2" --device "cuda" --output_path "models/llama2/llama2-7b-chat/result.jsquad-1.2.json" --batch_size 2
@@ -0,0 +1,22 @@
+{
+  "results": {
+    "jsquad-1.2-0.3": {
+      "exact_match": 62.17919855920756,
+      "f1": 74.84345935966519
+    }
+  },
+  "versions": {
+    "jsquad-1.2-0.3": 1.2
+  },
+  "config": {
+    "model": "hf-causal-experimental",
+    "model_args": "pretrained=meta-llama/Llama-2-7b-chat-hf,use_accelerate=True,dtype=auto",
+    "num_fewshot": 2,
+    "batch_size": 2,
+    "device": "cuda",
+    "no_cache": false,
+    "limit": null,
+    "bootstrap_iters": 100000,
+    "description_dict": {}
+  }
+}
@@ -0,0 +1,4 @@
+MODEL_ARGS="pretrained=meta-llama/Llama-2-7b-hf,use_accelerate=True,dtype=auto"
+TASK="jsquad-1.2-0.3"
+python main.py --model hf-causal-experimental --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2" --device "cuda" --output_path "models/llama2/llama2-7b/result.jsquad-1.2.json" --batch_size 2
+