Skip to content

Commit

Permalink
Model Evaluation sample notebooks - Fetch and Use latest model version (
Browse files Browse the repository at this point in the history
#2352)

* Model Evaluation sample notebooks - Fetch and Use latest model version

* Model Evaluation sample notebooks - Fix Fill Mask task notebooks

* Model Evaluation sample notebooks - Fixing black runs

* Model Evaluation sample notebooks - Fixing Fill Mask notebooks

* Model Evaluation sample notebooks - Temporarily removing camembert-base for Fill Mask task

---------

Co-authored-by: Sarthak Singhal <sarsinghal@microsoft.com>
  • Loading branch information
sarthaks95 and Sarthak Singhal committed Jun 8, 2023
1 parent 2eee6de commit b7e0b18
Show file tree
Hide file tree
Showing 9 changed files with 307 additions and 166 deletions.
Original file line number Diff line number Diff line change
@@ -1,8 +1,4 @@
{
"metrics": ["perplexities"],
"model_id": "gpt2",
"add_start_token": true,
"tokenizer_config": {
"truncation": true
}
"add_start_token": true
}
Original file line number Diff line number Diff line change
Expand Up @@ -203,112 +203,87 @@
"outputs": [],
"source": [
"# need to specify model versions until the bug to support fetching the latest version using latest label is fixed\n",
"models = [\n",
" {\"name\": \"bert-base-cased\", \"version\": \"4\", \"mask\": \"[MASK]\"},\n",
" {\"name\": \"bert-base-uncased\", \"version\": \"4\", \"mask\": \"[MASK]\"},\n",
" {\"name\": \"bert-large-cased\", \"version\": \"4\", \"mask\": \"[MASK]\"},\n",
" {\"name\": \"bert-large-uncased\", \"version\": \"4\", \"mask\": \"[MASK]\"},\n",
" {\"name\": \"camembert-base\", \"version\": \"4\", \"mask\": \"<mask>\"},\n",
" {\"name\": \"distilbert-base-cased\", \"version\": \"4\", \"mask\": \"[MASK]\"},\n",
" {\"name\": \"distilbert-base-uncased\", \"version\": \"4\", \"mask\": \"[MASK]\"},\n",
" {\"name\": \"distilroberta-base\", \"version\": \"4\", \"mask\": \"<mask>\"},\n",
" {\"name\": \"microsoft-deberta-base\", \"version\": \"4\", \"mask\": \"[MASK]\"},\n",
" {\"name\": \"microsoft-deberta-large\", \"version\": \"4\", \"mask\": \"[MASK]\"},\n",
" {\"name\": \"microsoft-deberta-xlarge\", \"version\": \"4\", \"mask\": \"[MASK]\"},\n",
" {\"name\": \"roberta-base\", \"version\": \"4\", \"mask\": \"<mask>\"},\n",
" {\"name\": \"roberta-large\", \"version\": \"4\", \"mask\": \"<mask>\"},\n",
"]\n",
"for model in models:\n",
" model = registry_ml_client.models.get(model[\"name\"], version=model[\"version\"])\n",
" print(model.id)"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### 3. Pick the test dataset for evaluation\n",
"The next few cells show basic data preparation:\n",
"* Visualize some data rows\n",
"* We want this sample to run quickly, so we use a smaller dataset containing 10% of the original.\n",
"* To use the entire dataset, uncomment the cells below and run."
"model_details = [\n",
" {\"name\": \"bert-base-cased\"},\n",
" {\"name\": \"bert-base-uncased\"},\n",
" {\"name\": \"bert-large-cased\"},\n",
" {\"name\": \"bert-large-uncased\"},\n",
" # {\"name\": \"camembert-base\"},\n",
" {\"name\": \"distilbert-base-cased\"},\n",
" {\"name\": \"distilbert-base-uncased\"},\n",
" {\"name\": \"distilroberta-base\"},\n",
" {\"name\": \"microsoft-deberta-base\", \"pretrained\": \"microsoft/deberta-base\"},\n",
" {\"name\": \"microsoft-deberta-large\", \"pretrained\": \"microsoft/deberta-large\"},\n",
" {\"name\": \"microsoft-deberta-xlarge\", \"pretrained\": \"microsoft/deberta-xlarge\"},\n",
" {\"name\": \"roberta-base\"},\n",
" {\"name\": \"roberta-large\"},\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from datasets import load_dataset\n",
"models = []\n",
"\n",
"hf_test_data = load_dataset(\n",
" \"rcds/wikipedia-for-mask-filling\", \"original_512\", split=\"train\", streaming=True\n",
")\n",
"\n",
"test_data_df = pd.DataFrame(hf_test_data.take(1000))"
]
},
{
"cell_type": "code",
"execution_count": null,
"for model in model_details:\n",
" reg_model = list(registry_ml_client.models.list(name=model[\"name\"]))[0]\n",
" print(reg_model.id)\n",
" models.append({**model, \"version\": reg_model.version})"
],
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"test_data_df[\"input_string\"] = test_data_df[\"texts\"]\n",
"test_data_df[\"title\"] = test_data_df[\"masks\"].apply(\n",
" lambda x: x[0] if len(x) > 0 else \"\"\n",
")"
]
}
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"test_data_mask_2_df = test_data_df\n",
"test_data_mask_1_df = pd.DataFrame(test_data_df)\n",
"test_data_mask_1_df[\"input_string\"] = test_data_mask_1_df[\"input_string\"].apply(\n",
" lambda x: x.replace(\"<mask>\", \"[MASK]\")\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"models"
],
"metadata": {
"collapsed": false
},
"outputs": [],
}
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"test_data_mask_1_df.head()"
"### 3. Pick the test dataset for evaluation\n",
"The next few cells show basic data preparation:\n",
"* Visualize some data rows\n",
"* We want this sample to run quickly, so we use a smaller dataset containing 10% of the original.\n",
"* To use the entire dataset, uncomment the cells below and run."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"test_data_mask_2_df.head()"
]
"%pip install transformers\n",
"%pip install torch"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"test_data_mask_1 = \"./small-test-[MASK].jsonl\" # [MASK]\n",
"test_data_mask_2 = \"./small-test-mask.jsonl\" # <mask>"
"from datasets import load_dataset\n",
"\n",
"hf_test_data = load_dataset(\n",
" \"rcds/wikipedia-for-mask-filling\", \"original_512\", split=\"train\", streaming=True\n",
")\n",
"\n",
"test_data_df = pd.DataFrame(hf_test_data.take(1000))"
]
},
{
Expand All @@ -319,31 +294,35 @@
},
"outputs": [],
"source": [
"test_data_mask_1_df.to_json(test_data_mask_1, lines=True, orient=\"records\")\n",
"test_data_mask_2_df.to_json(test_data_mask_2, lines=True, orient=\"records\")"
"test_data_df[\"title\"] = test_data_df[\"masks\"].apply(\n",
" lambda x: x[0] if len(x) > 0 else \"\"\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"pd.read_json(test_data_mask_1, lines=True).head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"from transformers import AutoTokenizer\n",
"\n",
"for model in models:\n",
" tokenizer = AutoTokenizer.from_pretrained(model.get(\"pretrained\", model[\"name\"]))\n",
" test_data_df[\"input_string\"] = test_data_df[\"texts\"].apply(\n",
" lambda x: tokenizer.decode(\n",
" tokenizer.encode(\n",
" x.replace(\"<mask>\", tokenizer.mask_token),\n",
" max_length=512,\n",
" truncation=True,\n",
" )[:500]\n",
" )\n",
" )\n",
" test_data_file_name = \"small-test-{}.jsonl\".format(model[\"name\"])\n",
" test_data_df.to_json(test_data_file_name, lines=True, orient=\"records\")"
],
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"pd.read_json(test_data_mask_2, lines=True).head()"
]
}
},
{
"attachments": {},
Expand Down Expand Up @@ -375,8 +354,8 @@
" name=\"model_evaluation_pipeline\", label=\"latest\"\n",
")\n",
"\n",
"with open(\"./eval-config.json\") as f:\n",
" evaluation_config_params = json.dumps(json.load(f))\n",
"# with open(\"./eval-config.json\") as f:\n",
"# evaluation_config_params = json.dumps(json.load(f))\n",
"\n",
"\n",
"# define the pipeline job\n",
Expand All @@ -394,8 +373,8 @@
" # Evaluation settings\n",
" task=\"fill-mask\",\n",
" # config file containing the details of evaluation metrics to calculate\n",
" # evaluation_config=Input(type=AssetTypes.URI_FILE, path=\"./eval-config.json\"),\n",
" evaluation_config_params=evaluation_config_params,\n",
" evaluation_config=Input(type=AssetTypes.URI_FILE, path=\"./eval-config.json\"),\n",
" # evaluation_config_params=evaluation_config_params,\n",
" # config cluster/device job is running on\n",
" # set device to GPU/CPU on basis if GPU count was found\n",
" device=\"gpu\" if gpu_count_found else \"cpu\",\n",
Expand Down Expand Up @@ -429,12 +408,10 @@
" model_object = registry_ml_client.models.get(\n",
" model[\"name\"], version=model[\"version\"]\n",
" )\n",
" if model[\"mask\"] == \"[MASK]\":\n",
" test_data = Input(type=AssetTypes.URI_FILE, path=test_data_mask_1)\n",
" else:\n",
" test_data = Input(type=AssetTypes.URI_FILE, path=test_data_mask_2)\n",
" pipeline_object = evaluation_pipeline(\n",
" test_data=test_data,\n",
" test_data=Input(\n",
" type=AssetTypes.URI_FILE, path=\"small-test-{}.jsonl\".format(model[\"name\"])\n",
" ),\n",
" mlflow_model=Input(type=AssetTypes.MLFLOW_MODEL, path=f\"{model_object.id}\"),\n",
" )\n",
" # don't reuse cached results from previous jobs\n",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -206,17 +206,41 @@
"outputs": [],
"source": [
"# need to specify model versions until the bug to support fetching the latest version using latest label is fixed\n",
"models = [\n",
" {\"name\": \"deepset-minilm-uncased-squad2\", \"version\": \"4\"},\n",
" {\"name\": \"deepset-roberta-base-squad2\", \"version\": \"4\"},\n",
" {\"name\": \"distilbert-base-cased-distilled-squad\", \"version\": \"4\"},\n",
" {\"name\": \"distilbert-base-uncased-distilled-squad\", \"version\": \"4\"},\n",
"]\n",
"for model in models:\n",
" model = registry_ml_client.models.get(model[\"name\"], version=model[\"version\"])\n",
" print(model.id)"
"model_details = [\n",
" {\"name\": \"deepset-minilm-uncased-squad2\"},\n",
" {\"name\": \"deepset-roberta-base-squad2\"},\n",
" {\"name\": \"distilbert-base-cased-distilled-squad\"},\n",
" {\"name\": \"distilbert-base-uncased-distilled-squad\"},\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"models = []\n",
"\n",
"for model in model_details:\n",
" reg_model = list(registry_ml_client.models.list(name=model[\"name\"]))[0]\n",
" print(reg_model.id)\n",
" models.append({**model, \"version\": reg_model.version})"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"models"
],
"metadata": {
"collapsed": false
}
},
{
"attachments": {},
"cell_type": "markdown",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -201,15 +201,39 @@
"outputs": [],
"source": [
"# need to specify model versions until the bug to support fetching the latest version using latest label is fixed\n",
"models = [\n",
" {\"name\": \"facebook-bart-large-cnn\", \"version\": \"4\"},\n",
" {\"name\": \"sshleifer-distilbart-cnn-12-6\", \"version\": \"4\"},\n",
"]\n",
"for model in models:\n",
" model = registry_ml_client.models.get(model[\"name\"], version=model[\"version\"])\n",
" print(model.id)"
"model_details = [\n",
" {\"name\": \"facebook-bart-large-cnn\"},\n",
" {\"name\": \"sshleifer-distilbart-cnn-12-6\"},\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"models = []\n",
"\n",
"for model in model_details:\n",
" reg_model = list(registry_ml_client.models.list(name=model[\"name\"]))[0]\n",
" print(reg_model.id)\n",
" models.append({**model, \"version\": reg_model.version})"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"models"
],
"metadata": {
"collapsed": false
}
},
{
"attachments": {},
"cell_type": "markdown",
Expand Down Expand Up @@ -319,8 +343,8 @@
" name=\"model_evaluation_pipeline\", label=\"latest\"\n",
")\n",
"\n",
"with open(\"./eval-config.json\") as f:\n",
" evaluation_config_params = json.dumps(json.load(f))\n",
"# with open(\"./eval-config.json\") as f:\n",
"# evaluation_config_params = json.dumps(json.load(f))\n",
"\n",
"\n",
"# define the pipeline job\n",
Expand All @@ -338,8 +362,8 @@
" # Evaluation settings\n",
" task=\"text-summarization\",\n",
" # config file containing the details of evaluation metrics to calculate\n",
" # evaluation_config=Input(type=AssetTypes.URI_FILE, path=\"./eval-config.json\"),\n",
" evaluation_config_params=evaluation_config_params,\n",
" evaluation_config=Input(type=AssetTypes.URI_FILE, path=\"./eval-config.json\"),\n",
" # evaluation_config_params=evaluation_config_params,\n",
" # config cluster/device job is running on\n",
" # set device to GPU/CPU on basis if GPU count was found\n",
" device=\"gpu\" if gpu_count_found else \"cpu\",\n",
Expand Down
Loading

0 comments on commit b7e0b18

Please sign in to comment.