From 5c6023b1a8e56a2850188f18d618743a52543fef Mon Sep 17 00:00:00 2001 From: Daniel Dale Date: Tue, 14 Jun 2022 15:01:26 -0700 Subject: [PATCH] change underscores to hyphens to prevent sphinx substitution reference interpretation --- .../finetuning-scheduler/finetuning-scheduler.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/lightning_examples/finetuning-scheduler/finetuning-scheduler.py b/lightning_examples/finetuning-scheduler/finetuning-scheduler.py index 6458cacec..27835265a 100644 --- a/lightning_examples/finetuning-scheduler/finetuning-scheduler.py +++ b/lightning_examples/finetuning-scheduler/finetuning-scheduler.py @@ -79,7 +79,7 @@ # # 2. Alter the schedule as desired. # -# ![side_by_side_yaml](side_by_side_yaml.png){height="327px" width="800px"} +# ![side-by-side-yaml](side_by_side_yaml.png){height="327px" width="800px"} # # 3. Once the finetuning schedule has been altered as desired, pass it to # [FinetuningScheduler](https://finetuning-scheduler.readthedocs.io/en/stable/api/finetuning_scheduler.fts.html#finetuning_scheduler.fts.FinetuningScheduler) to commence scheduled training: @@ -105,7 +105,7 @@ # # **Tip:** Use of regex expressions can be convenient for specifying more complex schedules. Also, a per-phase base maximum lr can be specified: # -# ![emphasized_yaml](emphasized_yaml.png){height="380px" width="800px"} +# ![emphasized-yaml](emphasized_yaml.png){height="380px" width="800px"} # # # @@ -645,8 +645,8 @@ def train() -> None: # produced in the scenarios [here](https://drive.google.com/file/d/1t7myBgcqcZ9ax_IT9QVk-vFH_l_o5UXB/view?usp=sharing) # (caution, ~3.5GB). # -# [![fts_explicit_accuracy](fts_explicit_accuracy.png){height="315px" width="492px"}](https://tensorboard.dev/experiment/n7U8XhrzRbmvVzC4SQSpWw/#scalars&_smoothingWeight=0&runSelectionState=eyJmdHNfZXhwbGljaXQiOnRydWUsIm5vZnRzX2Jhc2VsaW5lIjpmYWxzZSwiZnRzX2ltcGxpY2l0IjpmYWxzZX0%3D) -# [![nofts_baseline](nofts_baseline_accuracy.png){height="316px" width="505px"}](https://tensorboard.dev/experiment/n7U8XhrzRbmvVzC4SQSpWw/#scalars&_smoothingWeight=0&runSelectionState=eyJmdHNfZXhwbGljaXQiOmZhbHNlLCJub2Z0c19iYXNlbGluZSI6dHJ1ZSwiZnRzX2ltcGxpY2l0IjpmYWxzZX0%3D) +# [![fts-explicit-accuracy](fts_explicit_accuracy.png){height="315px" width="492px"}](https://tensorboard.dev/experiment/n7U8XhrzRbmvVzC4SQSpWw/#scalars&_smoothingWeight=0&runSelectionState=eyJmdHNfZXhwbGljaXQiOnRydWUsIm5vZnRzX2Jhc2VsaW5lIjpmYWxzZSwiZnRzX2ltcGxpY2l0IjpmYWxzZX0%3D) +# [![nofts-baseline](nofts_baseline_accuracy.png){height="316px" width="505px"}](https://tensorboard.dev/experiment/n7U8XhrzRbmvVzC4SQSpWw/#scalars&_smoothingWeight=0&runSelectionState=eyJmdHNfZXhwbGljaXQiOmZhbHNlLCJub2Z0c19iYXNlbGluZSI6dHJ1ZSwiZnRzX2ltcGxpY2l0IjpmYWxzZX0%3D) # # Note there could be around ~1% variation in performance from the tensorboard summaries generated by this notebook # which uses DP and 1 GPU. @@ -656,7 +656,7 @@ def train() -> None: # greater finetuning flexibility for model exploration in research. For example, glancing at DeBERTa-v3's implicit training # run, a critical tuning transition point is immediately apparent: # -# [![implicit_training_transition](implicit_training_transition.png){height="272px" width="494px"}](https://tensorboard.dev/experiment/n7U8XhrzRbmvVzC4SQSpWw/#scalars&_smoothingWeight=0&runSelectionState=eyJmdHNfZXhwbGljaXQiOmZhbHNlLCJub2Z0c19iYXNlbGluZSI6ZmFsc2UsImZ0c19pbXBsaWNpdCI6dHJ1ZX0%3D) +# [![implicit-training-transition](implicit_training_transition.png){height="272px" width="494px"}](https://tensorboard.dev/experiment/n7U8XhrzRbmvVzC4SQSpWw/#scalars&_smoothingWeight=0&runSelectionState=eyJmdHNfZXhwbGljaXQiOmZhbHNlLCJub2Z0c19iYXNlbGluZSI6ZmFsc2UsImZ0c19pbXBsaWNpdCI6dHJ1ZX0%3D) # # Our `val_loss` begins a precipitous decline at step 3119 which corresponds to phase 17 in the schedule. Referring to our # schedule, in phase 17 we're beginning tuning the attention parameters of our 10th encoder layer (of 11). Interesting!