Skip to content

Commit

Permalink
resolving conflicts between training args and optimization args (#3193)
Browse files Browse the repository at this point in the history
* resolving conflicts between training args and optimization args

* Changing the log text

* black formatting

* adding the pipeline name to the notebook
  • Loading branch information
jpmann committed May 15, 2024
1 parent df0bfcc commit 8a5c1c0
Showing 1 changed file with 71 additions and 12 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -377,24 +377,84 @@
"metadata": {},
"outputs": [],
"source": [
"# Training parameters\n",
"# Default training parameters\n",
"training_parameters = dict(\n",
" num_train_epochs=3,\n",
" per_device_train_batch_size=1,\n",
" per_device_eval_batch_size=1,\n",
" learning_rate=5e-6,\n",
" lr_scheduler_type=\"cosine\",\n",
")\n",
"print(f\"The following training parameters are enabled - {training_parameters}\")\n",
"# Default optimization parameters\n",
"optimization_parameters = dict(\n",
" apply_lora=\"true\",\n",
" apply_deepspeed=\"true\",\n",
" deepspeed_stage=2,\n",
")\n",
"# Let's construct finetuning parameters using training and optimization paramters.\n",
"finetune_parameters = {**training_parameters, **optimization_parameters}\n",
"\n",
"# Optimization parameters - As these parameters are packaged with the model itself, lets retrieve those parameters\n",
"# Each model finetuning works best with certain finetuning parameters which are packed with model as `model_specific_defaults`.\n",
"# Let's override the finetune_parameters in case the model has some custom defaults.\n",
"if \"model_specific_defaults\" in foundation_model.tags:\n",
" optimization_parameters = ast.literal_eval(\n",
" foundation_model.tags[\"model_specific_defaults\"]\n",
" ) # convert string to python dict\n",
"else:\n",
" optimization_parameters = dict(apply_lora=\"true\", apply_deepspeed=\"true\")\n",
"print(f\"The following optimizations are enabled - {optimization_parameters}\")"
" print(\"Warning! Model specific defaults exist. The defaults could be overridden.\")\n",
" finetune_parameters.update(\n",
" ast.literal_eval( # convert string to python dict\n",
" foundation_model.tags[\"model_specific_defaults\"]\n",
" )\n",
" )\n",
"print(\n",
" f\"The following finetune parameters are going to be set for the run: {finetune_parameters}\"\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Set the pipeline display name for distinguishing different runs from the name\n",
"def get_pipeline_display_name():\n",
" batch_size = (\n",
" int(finetune_parameters.get(\"per_device_train_batch_size\", 1))\n",
" * int(finetune_parameters.get(\"gradient_accumulation_steps\", 1))\n",
" * int(gpus_per_node)\n",
" * int(finetune_parameters.get(\"num_nodes_finetune\", 1))\n",
" )\n",
" scheduler = finetune_parameters.get(\"lr_scheduler_type\", \"linear\")\n",
" deepspeed = finetune_parameters.get(\"apply_deepspeed\", \"false\")\n",
" ds_stage = finetune_parameters.get(\"deepspeed_stage\", \"2\")\n",
" if deepspeed == \"true\":\n",
" ds_string = f\"ds{ds_stage}\"\n",
" else:\n",
" ds_string = \"nods\"\n",
" lora = finetune_parameters.get(\"apply_lora\", \"false\")\n",
" if lora == \"true\":\n",
" lora_string = \"lora\"\n",
" else:\n",
" lora_string = \"nolora\"\n",
" save_limit = finetune_parameters.get(\"save_total_limit\", -1)\n",
" seq_len = finetune_parameters.get(\"max_seq_length\", -1)\n",
" return (\n",
" model_name\n",
" + \"-\"\n",
" + \"ultrachat\"\n",
" + \"-\"\n",
" + f\"bs{batch_size}\"\n",
" + \"-\"\n",
" + f\"{scheduler}\"\n",
" + \"-\"\n",
" + ds_string\n",
" + \"-\"\n",
" + lora_string\n",
" + f\"-save_limit{save_limit}\"\n",
" + f\"-seqlen{seq_len}\"\n",
" )\n",
"\n",
"\n",
"pipeline_display_name = get_pipeline_display_name()\n",
"print(f\"Display name used for the run: {pipeline_display_name}\")"
]
},
{
Expand All @@ -413,7 +473,7 @@
"\n",
"\n",
"# define the pipeline job\n",
"@pipeline()\n",
"@pipeline(name=pipeline_display_name)\n",
"def create_pipeline():\n",
" chat_completion_pipeline = pipeline_component_func(\n",
" mlflow_model_path=foundation_model.id,\n",
Expand All @@ -430,8 +490,7 @@
" ),\n",
" # Training settings\n",
" number_of_gpu_to_use_finetuning=gpus_per_node, # set to the number of GPUs available in the compute\n",
" **training_parameters,\n",
" **optimization_parameters\n",
" **finetune_parameters\n",
" )\n",
" return {\n",
" # map the output of the fine tuning job to the output of pipeline job so that we can easily register the fine tuned model\n",
Expand Down

0 comments on commit 8a5c1c0

Please sign in to comment.