From 2f1583c0735eacdfdfa5b7d656074b69577b6774 Mon Sep 17 00:00:00 2001 From: mkshing Date: Tue, 15 Aug 2023 17:36:22 +0900 Subject: [PATCH] revert to current leaderboard --- .../harness.jaqket_v1.sh | 3 -- .../harness.jaqket_v2.sh | 3 -- .../abeja-gpt-neox-japanese-2.7b/harness.sh | 4 +- .../result.jaqket_v1.json | 24 ---------- .../abeja-gpt-neox-japanese-2.7b/result.json | 7 +-- .../harness.jaqket_v1.sh | 3 -- .../cyberagent-open-calm-1b/harness.sh | 4 +- .../result.jaqket_v1.json | 24 ---------- .../cyberagent-open-calm-1b/result.json | 48 +++++++++---------- .../harness.jaqket_v1.sh | 3 -- .../harness.jaqket_v2.sh | 3 -- .../cyberagent-open-calm-3b/harness.sh | 4 +- .../result.jaqket_v1.json | 24 ---------- .../cyberagent-open-calm-3b/result.json | 8 ++-- .../harness.jaqket_v1.sh | 3 -- .../cyberagent-open-calm-large/harness.sh | 4 +- .../result.jaqket_v1.json | 24 ---------- .../cyberagent-open-calm-large/result.json | 40 ++++++++-------- .../harness.jaqket_v1.sh | 3 -- .../cyberagent-open-calm-medium/harness.sh | 4 +- .../result.jaqket_v1.json | 24 ---------- .../cyberagent-open-calm-medium/result.json | 40 ++++++++-------- .../harness.jaqket_v1.sh | 3 -- models/rinna/rinna-japanese-gpt-1b/harness.sh | 4 +- .../result.jaqket_v1.json | 24 ---------- .../rinna/rinna-japanese-gpt-1b/result.json | 32 ++++++------- .../harness.jaqket_v1.sh | 3 -- .../harness.jaqket_v2.sh | 3 -- .../harness.sh | 2 +- .../result.jaqket_v1.json | 24 ---------- .../result.json | 8 ++-- .../harness.jaqket_v1.sh | 3 -- .../harness.jaqket_v2.sh | 3 -- .../harness.sh | 2 +- .../result.jaqket_v1.json | 24 ---------- .../result.json | 6 +-- .../harness.jaqket_v1.sh | 3 -- .../harness.jaqket_v2.sh | 3 -- .../harness.sh | 2 +- .../result.jaqket_v1.json | 24 ---------- .../result.json | 8 ++-- .../harness.jaqket_v1.sh | 3 -- .../harness.jaqket_v2.sh | 3 -- .../rinna-japanese-gpt-neox-3.6b/harness.sh | 2 +- .../rinna-japanese-gpt-neox-3.6b/result.json | 8 ++-- 45 files changed, 116 insertions(+), 385 deletions(-) delete mode 100644 models/abeja-gpt-neox-japanese-2.7b/harness.jaqket_v1.sh delete mode 100644 models/abeja-gpt-neox-japanese-2.7b/harness.jaqket_v2.sh delete mode 100644 models/abeja-gpt-neox-japanese-2.7b/result.jaqket_v1.json delete mode 100644 models/cyberagent/cyberagent-open-calm-1b/harness.jaqket_v1.sh delete mode 100644 models/cyberagent/cyberagent-open-calm-1b/result.jaqket_v1.json delete mode 100644 models/cyberagent/cyberagent-open-calm-3b/harness.jaqket_v1.sh delete mode 100644 models/cyberagent/cyberagent-open-calm-3b/harness.jaqket_v2.sh delete mode 100644 models/cyberagent/cyberagent-open-calm-3b/result.jaqket_v1.json delete mode 100644 models/cyberagent/cyberagent-open-calm-large/harness.jaqket_v1.sh delete mode 100644 models/cyberagent/cyberagent-open-calm-large/result.jaqket_v1.json delete mode 100644 models/cyberagent/cyberagent-open-calm-medium/harness.jaqket_v1.sh delete mode 100644 models/cyberagent/cyberagent-open-calm-medium/result.jaqket_v1.json delete mode 100644 models/rinna/rinna-japanese-gpt-1b/harness.jaqket_v1.sh delete mode 100644 models/rinna/rinna-japanese-gpt-1b/result.jaqket_v1.json delete mode 100644 models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-ppo/harness.jaqket_v1.sh delete mode 100644 models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-ppo/harness.jaqket_v2.sh delete mode 100644 models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-ppo/result.jaqket_v1.json delete mode 100644 models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-sft-v2/harness.jaqket_v1.sh delete mode 100644 models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-sft-v2/harness.jaqket_v2.sh delete mode 100644 models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-sft-v2/result.jaqket_v1.json delete mode 100644 models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-sft/harness.jaqket_v1.sh delete mode 100644 models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-sft/harness.jaqket_v2.sh delete mode 100644 models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-sft/result.jaqket_v1.json delete mode 100644 models/rinna/rinna-japanese-gpt-neox-3.6b/harness.jaqket_v1.sh delete mode 100644 models/rinna/rinna-japanese-gpt-neox-3.6b/harness.jaqket_v2.sh diff --git a/models/abeja-gpt-neox-japanese-2.7b/harness.jaqket_v1.sh b/models/abeja-gpt-neox-japanese-2.7b/harness.jaqket_v1.sh deleted file mode 100644 index 43a79a0fac..0000000000 --- a/models/abeja-gpt-neox-japanese-2.7b/harness.jaqket_v1.sh +++ /dev/null @@ -1,3 +0,0 @@ -MODEL_ARGS="pretrained=abeja/gpt-neox-japanese-2.7b,torch_dtype=auto,device_map=auto" -TASK="jaqket_v1-0.1-0.2" -python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "1" --device "cuda" --output_path "models/abeja-gpt-neox-japanese-2.7b/result.jaqket_v1.json" \ No newline at end of file diff --git a/models/abeja-gpt-neox-japanese-2.7b/harness.jaqket_v2.sh b/models/abeja-gpt-neox-japanese-2.7b/harness.jaqket_v2.sh deleted file mode 100644 index a72dfb6a6b..0000000000 --- a/models/abeja-gpt-neox-japanese-2.7b/harness.jaqket_v2.sh +++ /dev/null @@ -1,3 +0,0 @@ -MODEL_ARGS="pretrained=abeja/gpt-neox-japanese-2.7b,torch_dtype=auto,device_map=auto" -TASK="jaqket_v2-0.2-0.2" -python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "1" --device "cuda" --output_path "models/abeja-gpt-neox-japanese-2.7b/result.jaqket_v2.json" \ No newline at end of file diff --git a/models/abeja-gpt-neox-japanese-2.7b/harness.sh b/models/abeja-gpt-neox-japanese-2.7b/harness.sh index 0bc2a4ee41..a9d071805c 100644 --- a/models/abeja-gpt-neox-japanese-2.7b/harness.sh +++ b/models/abeja-gpt-neox-japanese-2.7b/harness.sh @@ -1,3 +1,3 @@ MODEL_ARGS="pretrained=abeja/gpt-neox-japanese-2.7b" -TASK="jaqket_v2-0.2-0.2,jcommonsenseqa-1.1-0.2,jnli-1.1-0.2,marc_ja-1.1-0.2,jsquad-1.1-0.2,xlsum_ja,jaqket_v2-0.2-0.2" -python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "1,2,3,3,3,1,1" --device "cuda" --output_path "models/abeja-gpt-neox-japanese-2.7b/result.json" \ No newline at end of file +TASK="jcommonsenseqa-1.1-0.2,jnli-1.1-0.2,marc_ja-1.1-0.2,jsquad-1.1-0.2,xlsum_ja" +python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2,3,3,3,1" --device "cuda" --output_path "models/abeja-gpt-neox-japanese-2.7b/result.json" \ No newline at end of file diff --git a/models/abeja-gpt-neox-japanese-2.7b/result.jaqket_v1.json b/models/abeja-gpt-neox-japanese-2.7b/result.jaqket_v1.json deleted file mode 100644 index 778f31ce22..0000000000 --- a/models/abeja-gpt-neox-japanese-2.7b/result.jaqket_v1.json +++ /dev/null @@ -1,24 +0,0 @@ -{ - "results": { - "jaqket_v1-0.1-0.2": { - "acc": 0.4317343173431734, - "acc_stderr": 0.030144085874801368, - "acc_norm": 0.25092250922509224, - "acc_norm_stderr": 0.02638464786430254 - } - }, - "versions": { - "jaqket_v1-0.1-0.2": 0.1 - }, - "config": { - "model": "hf-causal", - "model_args": "pretrained=abeja/gpt-neox-japanese-2.7b,torch_dtype=auto,device_map=auto", - "num_fewshot": 1, - "batch_size": null, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/models/abeja-gpt-neox-japanese-2.7b/result.json b/models/abeja-gpt-neox-japanese-2.7b/result.json index 9f95896929..302db7dba0 100644 --- a/models/abeja-gpt-neox-japanese-2.7b/result.json +++ b/models/abeja-gpt-neox-japanese-2.7b/result.json @@ -28,10 +28,6 @@ "xwinograd_ja": { "acc": 0.6037539103232534, "acc_stderr": 0.01580264261655725 - }, - "jaqket_v2-0.2-0.2": { - "exact_match": 16.237113402061855, - "f1": 22.56473051576145 } }, "versions": { @@ -40,8 +36,7 @@ "jsquad-1.1-0.2": 1.1, "marc_ja-1.1-0.2": 1.1, "xlsum_ja": 1.0, - "xwinograd_ja": 1.0, - "jaqket_v2-0.2-0.2": 0.2, + "xwinograd_ja": 1.0 }, "config": { "model": "hf-causal", diff --git a/models/cyberagent/cyberagent-open-calm-1b/harness.jaqket_v1.sh b/models/cyberagent/cyberagent-open-calm-1b/harness.jaqket_v1.sh deleted file mode 100644 index 13ba5c5df8..0000000000 --- a/models/cyberagent/cyberagent-open-calm-1b/harness.jaqket_v1.sh +++ /dev/null @@ -1,3 +0,0 @@ -MODEL_ARGS="pretrained=cyberagent/open-calm-1b" -TASK="jaqket_v1-0.1-0.2" -python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "1" --device "cuda" --output_path "models/cyberagent/cyberagent-open-calm-1b/result.jaqket_v1.json" \ No newline at end of file diff --git a/models/cyberagent/cyberagent-open-calm-1b/harness.sh b/models/cyberagent/cyberagent-open-calm-1b/harness.sh index bb5a7889d9..20eb407ff1 100644 --- a/models/cyberagent/cyberagent-open-calm-1b/harness.sh +++ b/models/cyberagent/cyberagent-open-calm-1b/harness.sh @@ -1,3 +1,3 @@ MODEL_ARGS="pretrained=cyberagent/open-calm-1b" -TASK="jaqket_v2-0.2-0.2,jcommonsenseqa-1.1-0.2,jnli-1.1-0.2,marc_ja-1.1-0.2,jsquad-1.1-0.2,xlsum_ja" -python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "1,2,3,3,3,1" --device "cuda" --output_path "models/cyberagent-open-calm-1b/result.json" \ No newline at end of file +TASK="jcommonsenseqa-1.1-0.2,jnli-1.1-0.2,marc_ja-1.1-0.2,jsquad-1.1-0.2,xlsum_ja" +python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2,3,3,3,1" --device "cuda" --output_path "models/cyberagent-open-calm-1b/result.json" \ No newline at end of file diff --git a/models/cyberagent/cyberagent-open-calm-1b/result.jaqket_v1.json b/models/cyberagent/cyberagent-open-calm-1b/result.jaqket_v1.json deleted file mode 100644 index e52940eb62..0000000000 --- a/models/cyberagent/cyberagent-open-calm-1b/result.jaqket_v1.json +++ /dev/null @@ -1,24 +0,0 @@ -{ - "results": { - "jaqket_v1-0.1-0.2": { - "acc": 0.3210332103321033, - "acc_stderr": 0.028413017895165447, - "acc_norm": 0.1918819188191882, - "acc_norm_stderr": 0.02396473087912924 - } - }, - "versions": { - "jaqket_v1-0.1-0.2": 0.1 - }, - "config": { - "model": "hf-causal", - "model_args": "pretrained=cyberagent/open-calm-1b", - "num_fewshot": 1, - "batch_size": null, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/models/cyberagent/cyberagent-open-calm-1b/result.json b/models/cyberagent/cyberagent-open-calm-1b/result.json index c8c886cea4..c13bee407d 100644 --- a/models/cyberagent/cyberagent-open-calm-1b/result.json +++ b/models/cyberagent/cyberagent-open-calm-1b/result.json @@ -1,21 +1,10 @@ { "results": { - "jaqket_v2-0.2-0.2": { - "exact_match": 41.92439862542955, - "f1": 47.126090309079984 - }, - "jsquad-1.1-0.2": { - "exact_match": 37.12291760468258, - "f1": 47.16735348285718 - }, - "xlsum_ja": { - "rouge2": 2.288077088085482 - }, "jcommonsenseqa-1.1-0.2": { - "acc": 0.2725647899910634, - "acc_stderr": 0.01331714516405032, - "acc_norm": 0.2520107238605898, - "acc_norm_stderr": 0.01298483079891409 + "acc": 0.26899016979445933, + "acc_stderr": 0.013261996572328063, + "acc_norm": 0.24754244861483468, + "acc_norm_stderr": 0.01290758346346734 }, "jnli-1.1-0.2": { "acc": 0.33566146261298274, @@ -24,30 +13,41 @@ "acc_norm_stderr": 0.009556042193601356 }, "marc_ja-1.1-0.2": { - "acc": 0.746904846126636, - "acc_stderr": 0.005782758567450378, - "acc_norm": 0.746904846126636, - "acc_norm_stderr": 0.005782758567450378 + "acc": 0.7792117195674921, + "acc_stderr": 0.005478034657719626, + "acc_norm": 0.7792117195674921, + "acc_norm_stderr": 0.005478034657719626 + }, + "jsquad-1.1-0.2": { + "exact_match": 37.12291760468258, + "f1": 47.171446643186265 + }, + "xlsum_ja": { + "rouge2": 2.288077088085482 + }, + "xwinograd_ja": { + "acc": 0.6089676746611054, + "acc_stderr": 0.015765969995357912 } }, "versions": { - "jaqket_v2-0.2-0.2": 0.2, "jcommonsenseqa-1.1-0.2": 1.1, "jnli-1.1-0.2": 1.1, - "marc_ja-1.1-0.2": 1.1, "jsquad-1.1-0.2": 1.1, - "xlsum_ja": 1.0 + "marc_ja-1.1-0.2": 1.1, + "xlsum_ja": 1.0, + "xwinograd_ja": 1.0 }, "config": { "model": "hf-causal", "model_args": "pretrained=cyberagent/open-calm-1b", "num_fewshot": [ - 1, 2, 3, 3, 3, - 1 + 1, + 0 ], "batch_size": null, "device": "cuda", diff --git a/models/cyberagent/cyberagent-open-calm-3b/harness.jaqket_v1.sh b/models/cyberagent/cyberagent-open-calm-3b/harness.jaqket_v1.sh deleted file mode 100644 index 84d7bec823..0000000000 --- a/models/cyberagent/cyberagent-open-calm-3b/harness.jaqket_v1.sh +++ /dev/null @@ -1,3 +0,0 @@ -MODEL_ARGS="pretrained=cyberagent/open-calm-3b,torch_dtype=auto,device_map=auto" -TASK="jaqket_v1-0.1-0.2" -python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "1" --device "cuda" --output_path "models/cyberagent/cyberagent-open-calm-3b/result.jaqket_v1.json" diff --git a/models/cyberagent/cyberagent-open-calm-3b/harness.jaqket_v2.sh b/models/cyberagent/cyberagent-open-calm-3b/harness.jaqket_v2.sh deleted file mode 100644 index 40cd583e03..0000000000 --- a/models/cyberagent/cyberagent-open-calm-3b/harness.jaqket_v2.sh +++ /dev/null @@ -1,3 +0,0 @@ -MODEL_ARGS="pretrained=cyberagent/open-calm-3b,torch_dtype=auto,device_map=auto" -TASK="jaqket_v2-0.2-0.2" -python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "1" --device "cuda" --output_path "models/cyberagent/cyberagent-open-calm-3b/result.jaqket_v2.json" diff --git a/models/cyberagent/cyberagent-open-calm-3b/harness.sh b/models/cyberagent/cyberagent-open-calm-3b/harness.sh index 5e2b244b9f..9cec930858 100644 --- a/models/cyberagent/cyberagent-open-calm-3b/harness.sh +++ b/models/cyberagent/cyberagent-open-calm-3b/harness.sh @@ -1,3 +1,3 @@ MODEL_ARGS="pretrained=cyberagent/open-calm-3b" -TASK="jcommonsenseqa-1.1-0.2,jnli-1.1-0.2,marc_ja-1.1-0.2,jsquad-1.1-0.2,jaqket_v2-0.2-0.2,xlsum_ja,xwinograd_ja,mgsm" -python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "3,3,3,2,1,1,0,5" --device "cuda" --output_path "models/cyberagent/cyberagent-open-calm-3b/result.jaqket_v2.json" +TASK="jcommonsenseqa-1.1-0.2,jnli-1.1-0.2,marc_ja-1.1-0.2,jsquad-1.1-0.2,jaqket_v2-0.1-0.2,xlsum_ja,xwinograd_ja,mgsm" +python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "3,3,3,2,1,1,0,5" --device "cuda" --output_path "models/cyberagent/cyberagent-open-calm-3b/result.json" diff --git a/models/cyberagent/cyberagent-open-calm-3b/result.jaqket_v1.json b/models/cyberagent/cyberagent-open-calm-3b/result.jaqket_v1.json deleted file mode 100644 index edcf72e9a8..0000000000 --- a/models/cyberagent/cyberagent-open-calm-3b/result.jaqket_v1.json +++ /dev/null @@ -1,24 +0,0 @@ -{ - "results": { - "jaqket_v1-0.1-0.2": { - "acc": 0.39114391143911437, - "acc_stderr": 0.02969913030172107, - "acc_norm": 0.23247232472324722, - "acc_norm_stderr": 0.025706962809570782 - } - }, - "versions": { - "jaqket_v1-0.1-0.2": 0.1 - }, - "config": { - "model": "hf-causal", - "model_args": "pretrained=cyberagent/open-calm-3b,torch_dtype=auto,device_map=auto", - "num_fewshot": 1, - "batch_size": null, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} diff --git a/models/cyberagent/cyberagent-open-calm-3b/result.json b/models/cyberagent/cyberagent-open-calm-3b/result.json index 035509b71a..328806af9f 100644 --- a/models/cyberagent/cyberagent-open-calm-3b/result.json +++ b/models/cyberagent/cyberagent-open-calm-3b/result.json @@ -26,9 +26,9 @@ "exact_match": 40.45475011256191, "f1": 52.73709875917724 }, - "jaqket_v2-0.2-0.2": { - "exact_match": 44.41580756013746, - "f1": 49.390784377897766 + "jaqket_v2-0.1-0.2": { + "exact_match": 46.90721649484536, + "f1": 51.615597556319194 }, "xlsum_ja": { "rouge2": 1.948450071736146 @@ -43,7 +43,7 @@ "jnli-1.1-0.2": 1.1, "marc_ja-1.1-0.2": 1.1, "jsquad-1.1-0.2": 1.1, - "jaqket_v2-0.2-0.2": 0.2, + "jaqket_v2-0.1-0.2": 0.1, "xlsum_ja": 1.0, "xwinograd_ja": 1.0, "mgsm": 1.0 diff --git a/models/cyberagent/cyberagent-open-calm-large/harness.jaqket_v1.sh b/models/cyberagent/cyberagent-open-calm-large/harness.jaqket_v1.sh deleted file mode 100644 index 2ef6a7980c..0000000000 --- a/models/cyberagent/cyberagent-open-calm-large/harness.jaqket_v1.sh +++ /dev/null @@ -1,3 +0,0 @@ -MODEL_ARGS="pretrained=cyberagent/open-calm-large,use_fast=True" -TASK="jaqket_v1-0.1-0.2" -python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "1" --device "cuda" --output_path "models/cyberagent/cyberagent-open-calm-large/result.jaqket_v1.json" \ No newline at end of file diff --git a/models/cyberagent/cyberagent-open-calm-large/harness.sh b/models/cyberagent/cyberagent-open-calm-large/harness.sh index ea56387bad..3b2fd9d276 100644 --- a/models/cyberagent/cyberagent-open-calm-large/harness.sh +++ b/models/cyberagent/cyberagent-open-calm-large/harness.sh @@ -1,3 +1,3 @@ MODEL_ARGS="pretrained=cyberagent/open-calm-large,use_fast=True" -TASK="jaqket_v2-0.2-0.2,jcommonsenseqa-1.1-0.2,jnli-1.1-0.2,marc_ja-1.1-0.2,jsquad-1.1-0.2,xlsum_ja" -python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "1,2,3,3,3,1" --device "cuda" --output_path "models/cyberagent-open-calm-large/result.json" \ No newline at end of file +TASK="jcommonsenseqa-1.1-0.2,jnli-1.1-0.2,marc_ja-1.1-0.2,jsquad-1.1-0.2,xlsum_ja" +python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2,3,3,3,1" --device "cuda" --output_path "models/cyberagent-open-calm-large/result.json" \ No newline at end of file diff --git a/models/cyberagent/cyberagent-open-calm-large/result.jaqket_v1.json b/models/cyberagent/cyberagent-open-calm-large/result.jaqket_v1.json deleted file mode 100644 index 1016fa1800..0000000000 --- a/models/cyberagent/cyberagent-open-calm-large/result.jaqket_v1.json +++ /dev/null @@ -1,24 +0,0 @@ -{ - "results": { - "jaqket_v1-0.1-0.2": { - "acc": 0.34686346863468637, - "acc_stderr": 0.028966724380545983, - "acc_norm": 0.17712177121771217, - "acc_norm_stderr": 0.023233887238607802 - } - }, - "versions": { - "jaqket_v1-0.1-0.2": 0.1 - }, - "config": { - "model": "hf-causal", - "model_args": "pretrained=cyberagent/open-calm-large,use_fast=True", - "num_fewshot": 1, - "batch_size": null, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/models/cyberagent/cyberagent-open-calm-large/result.json b/models/cyberagent/cyberagent-open-calm-large/result.json index 9e6445b161..7d6f95f225 100644 --- a/models/cyberagent/cyberagent-open-calm-large/result.json +++ b/models/cyberagent/cyberagent-open-calm-large/result.json @@ -1,16 +1,5 @@ { "results": { - "jaqket_v2-0.2-0.2": { - "exact_match": 44.58762886597938, - "f1": 49.138445079166715 - }, - "jsquad-1.1-0.2": { - "exact_match": 37.23547951373255, - "f1": 48.50349592141573 - }, - "xlsum_ja": { - "rouge2": 1.9854375467671679 - }, "jcommonsenseqa-1.1-0.2": { "acc": 0.2993744414655943, "acc_stderr": 0.013697125864334919, @@ -24,30 +13,41 @@ "acc_norm_stderr": 0.009815408241248635 }, "marc_ja-1.1-0.2": { - "acc": 0.7507958967102936, - "acc_stderr": 0.005753061901645736, - "acc_norm": 0.7507958967102936, - "acc_norm_stderr": 0.005753061901645736 + "acc": 0.7912452040460412, + "acc_stderr": 0.005367632889806105, + "acc_norm": 0.7912452040460412, + "acc_norm_stderr": 0.005367632889806105 + }, + "jsquad-1.1-0.2": { + "exact_match": 37.23547951373255, + "f1": 48.50349592141573 + }, + "xlsum_ja": { + "rouge2": 1.9854375467671679 + }, + "xwinograd_ja": { + "acc": 0.6152241918665277, + "acc_stderr": 0.015719467393137274 } }, "versions": { - "jaqket_v2-0.2-0.2": 0.2, "jcommonsenseqa-1.1-0.2": 1.1, "jnli-1.1-0.2": 1.1, - "marc_ja-1.1-0.2": 1.1, "jsquad-1.1-0.2": 1.1, - "xlsum_ja": 1.0 + "marc_ja-1.1-0.2": 1.1, + "xlsum_ja": 1.0, + "xwinograd_ja": 1.0 }, "config": { "model": "hf-causal", "model_args": "pretrained=cyberagent/open-calm-large,use_fast=True", "num_fewshot": [ - 1, 2, 3, 3, 3, - 1 + 1, + 0 ], "batch_size": null, "device": "cuda", diff --git a/models/cyberagent/cyberagent-open-calm-medium/harness.jaqket_v1.sh b/models/cyberagent/cyberagent-open-calm-medium/harness.jaqket_v1.sh deleted file mode 100644 index 9bb220bee7..0000000000 --- a/models/cyberagent/cyberagent-open-calm-medium/harness.jaqket_v1.sh +++ /dev/null @@ -1,3 +0,0 @@ -MODEL_ARGS="pretrained=cyberagent/open-calm-medium,use_fast=True" -TASK="jaqket_v1-0.1-0.2" -python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "1" --device "cuda" --output_path "models/cyberagent/cyberagent-open-calm-medium/result.jaqket_v1.json" \ No newline at end of file diff --git a/models/cyberagent/cyberagent-open-calm-medium/harness.sh b/models/cyberagent/cyberagent-open-calm-medium/harness.sh index a1e601a8c3..caf7466383 100644 --- a/models/cyberagent/cyberagent-open-calm-medium/harness.sh +++ b/models/cyberagent/cyberagent-open-calm-medium/harness.sh @@ -1,3 +1,3 @@ MODEL_ARGS="pretrained=cyberagent/open-calm-medium,use_fast=True" -TASK="jaqket_v2-0.2-0.2,jcommonsenseqa-1.1-0.2,jnli-1.1-0.2,marc_ja-1.1-0.2,jsquad-1.1-0.2,xlsum_ja" -python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "1,2,3,3,3,1" --device "cuda" --output_path "models/cyberagent-open-calm-medium/result.json" \ No newline at end of file +TASK="jcommonsenseqa-1.1-0.2,jnli-1.1-0.2,marc_ja-1.1-0.2,jsquad-1.1-0.2,xlsum_ja" +python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2,3,3,3,1" --device "cuda" --output_path "models/cyberagent-open-calm-medium/result.json" \ No newline at end of file diff --git a/models/cyberagent/cyberagent-open-calm-medium/result.jaqket_v1.json b/models/cyberagent/cyberagent-open-calm-medium/result.jaqket_v1.json deleted file mode 100644 index 36817a04c4..0000000000 --- a/models/cyberagent/cyberagent-open-calm-medium/result.jaqket_v1.json +++ /dev/null @@ -1,24 +0,0 @@ -{ - "results": { - "jaqket_v1-0.1-0.2": { - "acc": 0.33948339483394835, - "acc_stderr": 0.02881835959720729, - "acc_norm": 0.18450184501845018, - "acc_norm_stderr": 0.023606411103706027 - } - }, - "versions": { - "jaqket_v1-0.1-0.2": 0.1 - }, - "config": { - "model": "hf-causal", - "model_args": "pretrained=cyberagent/open-calm-medium,use_fast=True", - "num_fewshot": 1, - "batch_size": null, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/models/cyberagent/cyberagent-open-calm-medium/result.json b/models/cyberagent/cyberagent-open-calm-medium/result.json index 08425a34cb..25799df78b 100644 --- a/models/cyberagent/cyberagent-open-calm-medium/result.json +++ b/models/cyberagent/cyberagent-open-calm-medium/result.json @@ -1,16 +1,5 @@ { "results": { - "jaqket_v2-0.2-0.2": { - "exact_match": 30.326460481099655, - "f1": 34.99700614391335 - }, - "jsquad-1.1-0.2": { - "exact_match": 28.725799189554255, - "f1": 39.80333448254385 - }, - "xlsum_ja": { - "rouge2": 2.5775988917922406 - }, "jcommonsenseqa-1.1-0.2": { "acc": 0.39499553172475427, "acc_stderr": 0.0146202392872941, @@ -24,30 +13,41 @@ "acc_norm_stderr": 0.009920570907906705 }, "marc_ja-1.1-0.2": { - "acc": 0.8187124159886806, - "acc_stderr": 0.005124013038846332, - "acc_norm": 0.8187124159886806, - "acc_norm_stderr": 0.005124013038846332 + "acc": 0.8357167771189397, + "acc_stderr": 0.004893675823612713, + "acc_norm": 0.8357167771189397, + "acc_norm_stderr": 0.004893675823612713 + }, + "jsquad-1.1-0.2": { + "exact_match": 28.725799189554255, + "f1": 39.80333448254385 + }, + "xlsum_ja": { + "rouge2": 2.5775988917922406 + }, + "xwinograd_ja": { + "acc": 0.5964546402502607, + "acc_stderr": 0.015850834635341565 } }, "versions": { - "jaqket_v2-0.2-0.2": 0.2, "jcommonsenseqa-1.1-0.2": 1.1, "jnli-1.1-0.2": 1.1, - "marc_ja-1.1-0.2": 1.1, "jsquad-1.1-0.2": 1.1, - "xlsum_ja": 1.0 + "marc_ja-1.1-0.2": 1.1, + "xlsum_ja": 1.0, + "xwinograd_ja": 1.0 }, "config": { "model": "hf-causal", "model_args": "pretrained=cyberagent/open-calm-medium,use_fast=True", "num_fewshot": [ - 1, 2, 3, 3, 3, - 1 + 1, + 0 ], "batch_size": null, "device": "cuda", diff --git a/models/rinna/rinna-japanese-gpt-1b/harness.jaqket_v1.sh b/models/rinna/rinna-japanese-gpt-1b/harness.jaqket_v1.sh deleted file mode 100644 index 0512f451c0..0000000000 --- a/models/rinna/rinna-japanese-gpt-1b/harness.jaqket_v1.sh +++ /dev/null @@ -1,3 +0,0 @@ -MODEL_ARGS="pretrained=rinna/japanese-gpt-1b,use_fast=False" -TASK="jaqket_v1-0.1-0.2" -python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "1" --device "cuda" --output_path "models/rinna/rinna-japanese-gpt-1b/result.jaqket_v1.json" \ No newline at end of file diff --git a/models/rinna/rinna-japanese-gpt-1b/harness.sh b/models/rinna/rinna-japanese-gpt-1b/harness.sh index ddba68cca9..dd69debc48 100644 --- a/models/rinna/rinna-japanese-gpt-1b/harness.sh +++ b/models/rinna/rinna-japanese-gpt-1b/harness.sh @@ -1,3 +1,3 @@ MODEL_ARGS="pretrained=rinna/japanese-gpt-1b,use_fast=False" -TASK="jcommonsenseqa-1.1-0.2,jnli-1.1-0.2,marc_ja-1.1-0.2,jsquad-1.1-0.2,jaqket_v2-0.2-0.2,xlsum_ja,xwinograd_ja,mgsm" -python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "3,3,3,2,1,1,0,5" --device "cuda" --output_path "models/rinna/rinna-japanese-gpt-1b/result.json" \ No newline at end of file +TASK="jcommonsenseqa-1.1-0.2,jnli-1.1-0.2,marc_ja-1.1-0.2,jsquad-1.1-0.2,jaqket_v2-0.1-0.2,xlsum_ja,xwinograd_ja,mgsm" +python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "3,3,3,2,1,1,0,5" --device "cuda" --output_path "models/rinna/rinna-japanese-gpt-1b/result.json" diff --git a/models/rinna/rinna-japanese-gpt-1b/result.jaqket_v1.json b/models/rinna/rinna-japanese-gpt-1b/result.jaqket_v1.json deleted file mode 100644 index cdae167db2..0000000000 --- a/models/rinna/rinna-japanese-gpt-1b/result.jaqket_v1.json +++ /dev/null @@ -1,24 +0,0 @@ -{ - "results": { - "jaqket_v1-0.1-0.2": { - "acc": 0.3210332103321033, - "acc_stderr": 0.02841301789516545, - "acc_norm": 0.18081180811808117, - "acc_norm_stderr": 0.02342196643570677 - } - }, - "versions": { - "jaqket_v1-0.1-0.2": 0.1 - }, - "config": { - "model": "hf-causal", - "model_args": "pretrained=rinna/japanese-gpt-1b,use_fast=False", - "num_fewshot": 1, - "batch_size": null, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/models/rinna/rinna-japanese-gpt-1b/result.json b/models/rinna/rinna-japanese-gpt-1b/result.json index 312c4052d5..2dabf2db43 100644 --- a/models/rinna/rinna-japanese-gpt-1b/result.json +++ b/models/rinna/rinna-japanese-gpt-1b/result.json @@ -1,10 +1,10 @@ { "results": { "jcommonsenseqa-1.1-0.2": { - "acc": 0.3512064343163539, - "acc_stderr": 0.014276232008753156, - "acc_norm": 0.2707774798927614, - "acc_norm_stderr": 0.013289706976329895 + "acc": 0.34763181411974975, + "acc_stderr": 0.014242467674129443, + "acc_norm": 0.257372654155496, + "acc_norm_stderr": 0.013075122531072186 }, "jnli-1.1-0.2": { "acc": 0.37674609695973704, @@ -13,26 +13,26 @@ "acc_norm_stderr": 0.009300633175085522 }, "marc_ja-1.1-0.2": { - "acc": 0.8546162009197029, - "acc_stderr": 0.004688177807265067, - "acc_norm": 0.8546162009197029, - "acc_norm_stderr": 0.004688177807265067 + "acc": 0.8786187652598535, + "acc_stderr": 0.0043130554527802374, + "acc_norm": 0.8786187652598535, + "acc_norm_stderr": 0.0043130554527802374 + }, + "xwinograd_ja": { + "acc": 0.6454640250260688, + "acc_stderr": 0.015455512877686553 }, "jsquad-1.1-0.2": { "exact_match": 26.181900045024765, "f1": 44.67532835280053 }, - "jaqket_v2-0.2-0.2": { - "exact_match": 32.178714859437754, - "f1": 44.36543794224513 + "jaqket_v2-0.1-0.2": { + "exact_match": 37.02749140893471, + "f1": 57.99059569678122 }, "xlsum_ja": { "rouge2": 5.335027032779865 }, - "xwinograd_ja": { - "acc": 0.6454640250260688, - "acc_stderr": 0.015455512877686553 - }, "mgsm": { "acc": 0.02, "acc_stderr": 0.008872139507342681 @@ -43,7 +43,7 @@ "jnli-1.1-0.2": 1.1, "marc_ja-1.1-0.2": 1.1, "jsquad-1.1-0.2": 1.1, - "jaqket_v2-0.2-0.2": 0.2, + "jaqket_v2-0.1-0.2": 0.1, "xlsum_ja": 1.0, "xwinograd_ja": 1.0, "mgsm": 1.0 diff --git a/models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-ppo/harness.jaqket_v1.sh b/models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-ppo/harness.jaqket_v1.sh deleted file mode 100644 index 03b8ef4295..0000000000 --- a/models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-ppo/harness.jaqket_v1.sh +++ /dev/null @@ -1,3 +0,0 @@ -MODEL_ARGS="pretrained=rinna/japanese-gpt-neox-3.6b-instruction-ppo,use_fast=False,device_map=auto,torch_dtype=auto" -TASK="jaqket_v1-0.1-0.4" -python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "1" --device "cuda" --output_path "models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-ppo/result.json" diff --git a/models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-ppo/harness.jaqket_v2.sh b/models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-ppo/harness.jaqket_v2.sh deleted file mode 100644 index 010e613db3..0000000000 --- a/models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-ppo/harness.jaqket_v2.sh +++ /dev/null @@ -1,3 +0,0 @@ -MODEL_ARGS="pretrained=rinna/japanese-gpt-neox-3.6b-instruction-ppo,use_fast=False,device_map=auto,torch_dtype=auto" -TASK="jaqket_v2-0.2-0.4" -python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "1" --device "cuda" --output_path "models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-ppo/result.jaqket_v2.json" diff --git a/models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-ppo/harness.sh b/models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-ppo/harness.sh index ff30e2a64d..0e6048285b 100644 --- a/models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-ppo/harness.sh +++ b/models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-ppo/harness.sh @@ -1,3 +1,3 @@ MODEL_ARGS="pretrained=rinna/japanese-gpt-neox-3.6b-instruction-ppo,use_fast=False,device_map=auto,torch_dtype=auto" -TASK="jcommonsenseqa-1.1-0.4,jnli-1.1-0.4,marc_ja-1.1-0.4,jsquad-1.1-0.4,jaqket_v2-0.2-0.4,xlsum_ja-1.0-0.4,xwinograd_ja,mgsm-1.0-0.4" +TASK="jcommonsenseqa-1.1-0.4,jnli-1.1-0.4,marc_ja-1.1-0.4,jsquad-1.1-0.4,jaqket_v2-0.1-0.4,xlsum_ja-1.0-0.4,xwinograd_ja,mgsm-1.0-0.4" python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "3,3,3,2,1,1,0,5" --device "cuda" --output_path "models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-ppo/result.json" diff --git a/models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-ppo/result.jaqket_v1.json b/models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-ppo/result.jaqket_v1.json deleted file mode 100644 index 2b8b1cbe36..0000000000 --- a/models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-ppo/result.jaqket_v1.json +++ /dev/null @@ -1,24 +0,0 @@ -{ - "results": { - "jaqket_v1-0.1-0.4": { - "acc": 0.33948339483394835, - "acc_stderr": 0.02881835959720729, - "acc_norm": 0.19557195571955718, - "acc_norm_stderr": 0.0241387628833626 - } - }, - "versions": { - "jaqket_v1-0.1-0.4": 0.1 - }, - "config": { - "model": "hf-causal", - "model_args": "pretrained=rinna/japanese-gpt-neox-3.6b-instruction-ppo,use_fast=False,device_map=auto,torch_dtype=auto", - "num_fewshot": 1, - "batch_size": null, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} diff --git a/models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-ppo/result.json b/models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-ppo/result.json index ef1298ae64..b9fe71613a 100644 --- a/models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-ppo/result.json +++ b/models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-ppo/result.json @@ -26,9 +26,9 @@ "exact_match": 51.62089149031968, "f1": 63.676339985467465 }, - "jaqket_v2-0.2-0.4": { - "exact_match": 53.78006872852234, - "f1": 58.313430078893965 + "jaqket_v2-0.1-0.4": { + "exact_match": 50.945017182130584, + "f1": 55.79263424624247 }, "xlsum_ja-1.0-0.4": { "rouge2": 6.633741717885442 @@ -43,7 +43,7 @@ "jnli-1.1-0.4": 1.1, "marc_ja-1.1-0.4": 1.1, "jsquad-1.1-0.4": 1.1, - "jaqket_v2-0.2-0.4": 0.2, + "jaqket_v2-0.1-0.4": 0.1, "xlsum_ja-1.0-0.4": 1.0, "xwinograd_ja": 1.0, "mgsm-1.0-0.4": 1.0 diff --git a/models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-sft-v2/harness.jaqket_v1.sh b/models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-sft-v2/harness.jaqket_v1.sh deleted file mode 100644 index b4e1d7b539..0000000000 --- a/models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-sft-v2/harness.jaqket_v1.sh +++ /dev/null @@ -1,3 +0,0 @@ -MODEL_ARGS="pretrained=rinna/japanese-gpt-neox-3.6b-instruction-sft-v2,use_fast=False,torch_dtype=auto,device_map=auto" -TASK="jaqket_v1-0.1-0.4" -python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "1" --device "cuda" --output_path "models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-sft-v2/result.json" diff --git a/models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-sft-v2/harness.jaqket_v2.sh b/models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-sft-v2/harness.jaqket_v2.sh deleted file mode 100644 index 9970f29d8f..0000000000 --- a/models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-sft-v2/harness.jaqket_v2.sh +++ /dev/null @@ -1,3 +0,0 @@ -MODEL_ARGS="pretrained=rinna/japanese-gpt-neox-3.6b-instruction-sft-v2,use_fast=False,torch_dtype=auto,device_map=auto" -TASK="jaqket_v2-0.2-0.4" -python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "1" --device "cuda" --output_path "models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-sft-v2/result.jaqket_v2.json" diff --git a/models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-sft-v2/harness.sh b/models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-sft-v2/harness.sh index 1014260c5e..cbbc67a9cc 100644 --- a/models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-sft-v2/harness.sh +++ b/models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-sft-v2/harness.sh @@ -1,3 +1,3 @@ MODEL_ARGS="pretrained=rinna/japanese-gpt-neox-3.6b-instruction-sft-v2,use_fast=False" -TASK="jcommonsenseqa-1.1-0.4,jnli-1.1-0.4,marc_ja-1.1-0.4,jsquad-1.1-0.4,jaqket_v2-0.2-0.4,xlsum_ja-1.0-0.4,xwinograd_ja,mgsm-1.0-0.4" +TASK="jcommonsenseqa-1.1-0.4,jnli-1.1-0.4,marc_ja-1.1-0.4,jsquad-1.1-0.4,jaqket_v2-0.1-0.4,xlsum_ja-1.0-0.4,xwinograd_ja,mgsm-1.0-0.4" python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "3,3,3,2,1,1,0,5" --device "cuda" --output_path "models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-sft-v2/result.json" diff --git a/models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-sft-v2/result.jaqket_v1.json b/models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-sft-v2/result.jaqket_v1.json deleted file mode 100644 index 5037474743..0000000000 --- a/models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-sft-v2/result.jaqket_v1.json +++ /dev/null @@ -1,24 +0,0 @@ -{ - "results": { - "jaqket_v1-0.1-0.4": { - "acc": 0.3284132841328413, - "acc_stderr": 0.028581138492856806, - "acc_norm": 0.16974169741697417, - "acc_norm_stderr": 0.02284646447848304 - } - }, - "versions": { - "jaqket_v1-0.1-0.4": 0.1 - }, - "config": { - "model": "hf-causal", - "model_args": "pretrained=rinna/japanese-gpt-neox-3.6b-instruction-sft-v2,use_fast=False,torch_dtype=auto,device_map=auto", - "num_fewshot": 1, - "batch_size": null, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} diff --git a/models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-sft-v2/result.json b/models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-sft-v2/result.json index 3c2276139b..029530e799 100644 --- a/models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-sft-v2/result.json +++ b/models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-sft-v2/result.json @@ -26,9 +26,9 @@ "exact_match": 44.91220171094102, "f1": 59.37701704807803 }, - "jaqket_v2-0.2-0.4": { - "exact_match": 54.553264604811, - "f1": 58.6920815915661 + "jaqket_v2-0.1-0.4": { + "exact_match": 52.83505154639175, + "f1": 57.11081730411627 }, "xlsum_ja-1.0-0.4": { "rouge2": 6.143135835551484 diff --git a/models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-sft/harness.jaqket_v1.sh b/models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-sft/harness.jaqket_v1.sh deleted file mode 100644 index 016bbe14f9..0000000000 --- a/models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-sft/harness.jaqket_v1.sh +++ /dev/null @@ -1,3 +0,0 @@ -MODEL_ARGS="pretrained=rinna/japanese-gpt-neox-3.6b-instruction-sft,use_fast=False,torch_dtype=auto,device_map=auto" -TASK="jaqket_v1-0.1-0.4" -python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "1" --device "cuda" --output_path "models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-sft/result.json" diff --git a/models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-sft/harness.jaqket_v2.sh b/models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-sft/harness.jaqket_v2.sh deleted file mode 100644 index 4289c59e11..0000000000 --- a/models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-sft/harness.jaqket_v2.sh +++ /dev/null @@ -1,3 +0,0 @@ -MODEL_ARGS="pretrained=rinna/japanese-gpt-neox-3.6b-instruction-sft,use_fast=False,torch_dtype=auto,device_map=auto" -TASK="jaqket_v2-0.2-0.4" -python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "1" --device "cuda" --output_path "models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-sft/result.jaqket_v2.json" diff --git a/models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-sft/harness.sh b/models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-sft/harness.sh index d0aba68b84..40abb5e9fa 100644 --- a/models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-sft/harness.sh +++ b/models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-sft/harness.sh @@ -1,3 +1,3 @@ MODEL_ARGS="pretrained=rinna/japanese-gpt-neox-3.6b-instruction-sft,use_fast=False" -TASK="jcommonsenseqa-1.1-0.4,jnli-1.1-0.4,marc_ja-1.1-0.4,jsquad-1.1-0.4,jaqket_v2-0.2-0.4,xlsum_ja-1.0-0.4,xwinograd_ja,mgsm-1.0-0.4" +TASK="jcommonsenseqa-1.1-0.4,jnli-1.1-0.4,marc_ja-1.1-0.4,jsquad-1.1-0.4,jaqket_v2-0.1-0.4,xlsum_ja-1.0-0.4,xwinograd_ja,mgsm-1.0-0.4" python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "3,3,3,2,1,1,0,5" --device "cuda" --output_path "models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-sft/result.json" diff --git a/models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-sft/result.jaqket_v1.json b/models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-sft/result.jaqket_v1.json deleted file mode 100644 index 59f8a4d6de..0000000000 --- a/models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-sft/result.jaqket_v1.json +++ /dev/null @@ -1,24 +0,0 @@ -{ - "results": { - "jaqket_v1-0.1-0.4": { - "acc": 0.2952029520295203, - "acc_stderr": 0.027759427437164075, - "acc_norm": 0.16236162361623616, - "acc_norm_stderr": 0.0224433712718903 - } - }, - "versions": { - "jaqket_v1-0.1-0.4": 0.1 - }, - "config": { - "model": "hf-causal", - "model_args": "pretrained=rinna/japanese-gpt-neox-3.6b-instruction-sft,use_fast=False,torch_dtype=auto,device_map=auto", - "num_fewshot": 1, - "batch_size": null, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-sft/result.json b/models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-sft/result.json index 2190edad45..dc0ed85bd1 100644 --- a/models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-sft/result.json +++ b/models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-sft/result.json @@ -26,9 +26,9 @@ "exact_match": 47.41107609185052, "f1": 61.67308851187465 }, - "jaqket_v2-0.2-0.4": { - "exact_match": 54.381443298969074, - "f1": 58.44545727277683 + "jaqket_v2-0.1-0.4": { + "exact_match": 53.69415807560137, + "f1": 57.76489737829943 }, "xlsum_ja-1.0-0.4": { "rouge2": 4.735848492592129 @@ -43,7 +43,7 @@ "jnli-1.1-0.4": 1.1, "marc_ja-1.1-0.4": 1.1, "jsquad-1.1-0.4": 1.1, - "jaqket_v2-0.2-0.4": 0.2, + "jaqket_v2-0.1-0.4": 0.1, "xlsum_ja-1.0-0.4": 1.0, "xwinograd_ja": 1.0, "mgsm-1.0-0.4": 1.0 diff --git a/models/rinna/rinna-japanese-gpt-neox-3.6b/harness.jaqket_v1.sh b/models/rinna/rinna-japanese-gpt-neox-3.6b/harness.jaqket_v1.sh deleted file mode 100644 index 9da89c4fdf..0000000000 --- a/models/rinna/rinna-japanese-gpt-neox-3.6b/harness.jaqket_v1.sh +++ /dev/null @@ -1,3 +0,0 @@ -MODEL_ARGS="pretrained=rinna/japanese-gpt-neox-3.6b,use_fast=False,torch_dtype=auto,device_map=auto" -TASK="jaqket_v1-0.1-0.2" -python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "1" --device "cuda" --output_path "models/rinna/rinna-japanese-gpt-neox-3.6b/result.jaqket_v1.json" diff --git a/models/rinna/rinna-japanese-gpt-neox-3.6b/harness.jaqket_v2.sh b/models/rinna/rinna-japanese-gpt-neox-3.6b/harness.jaqket_v2.sh deleted file mode 100644 index ac1c4cbce4..0000000000 --- a/models/rinna/rinna-japanese-gpt-neox-3.6b/harness.jaqket_v2.sh +++ /dev/null @@ -1,3 +0,0 @@ -MODEL_ARGS="pretrained=rinna/japanese-gpt-neox-3.6b,use_fast=False,torch_dtype=auto,device_map=auto" -TASK="jaqket_v2-0.2-0.2" -python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "1" --device "cuda" --output_path "models/rinna/rinna-japanese-gpt-neox-3.6b/result.jaqket_v2.json" diff --git a/models/rinna/rinna-japanese-gpt-neox-3.6b/harness.sh b/models/rinna/rinna-japanese-gpt-neox-3.6b/harness.sh index ebb302ea2c..58b6a44639 100644 --- a/models/rinna/rinna-japanese-gpt-neox-3.6b/harness.sh +++ b/models/rinna/rinna-japanese-gpt-neox-3.6b/harness.sh @@ -1,3 +1,3 @@ MODEL_ARGS="pretrained=rinna/japanese-gpt-neox-3.6b,use_fast=False" -TASK="jcommonsenseqa-1.1-0.2,jnli-1.1-0.2,marc_ja-1.1-0.2,jsquad-1.1-0.2,jaqket_v2-0.2-0.2,xlsum_ja,xwinograd_ja,mgsm" +TASK="jcommonsenseqa-1.1-0.2,jnli-1.1-0.2,marc_ja-1.1-0.2,jsquad-1.1-0.2,jaqket_v2-0.1-0.2,xlsum_ja,xwinograd_ja,mgsm" python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "3,3,3,2,1,1,0,5" --device "cuda" --output_path "models/rinna/rinna-japanese-gpt-neox-3.6b/result.json" diff --git a/models/rinna/rinna-japanese-gpt-neox-3.6b/result.json b/models/rinna/rinna-japanese-gpt-neox-3.6b/result.json index 7816acedf8..596ec042ae 100644 --- a/models/rinna/rinna-japanese-gpt-neox-3.6b/result.json +++ b/models/rinna/rinna-japanese-gpt-neox-3.6b/result.json @@ -26,9 +26,9 @@ "exact_match": 47.90634849167042, "f1": 58.804568288439675 }, - "jaqket_v2-0.2-0.2": { - "exact_match": 67.43986254295532, - "f1": 71.82856423835801 + "jaqket_v2-0.1-0.2": { + "exact_match": 68.38487972508591, + "f1": 72.4344388906244 }, "xlsum_ja": { "rouge2": 5.157849646982534 @@ -43,7 +43,7 @@ "jnli-1.1-0.2": 1.1, "marc_ja-1.1-0.2": 1.1, "jsquad-1.1-0.2": 1.1, - "jaqket_v2-0.2-0.2": 0.2, + "jaqket_v2-0.1-0.2": 0.1, "xlsum_ja": 1.0, "xwinograd_ja": 1.0, "mgsm": 1.0