In [52]:
# import torch
import json
import re
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

table_path = Path("tables")
if not table_path.exists():
    table_path.mkdir(exist_ok=True)

## RoBERTa Base Results

In [75]:
# Table :
# Method | # params # SST-2 # MRPC # CoLA # QNLI # RTE # STS-B #

tab = [
    ["FT$^*$", "125M", 94.8, 90.2, 63.6, 92.8, 78.7, 91.2],
    ["BitFit$^*$", "0.1M", 93.7, 92.7, 62.0, 91.8, 81.5, 90.8],
    [
        "VeRA$^*$",
        "0.04M",
        "94.6±.1",
        "89.5±.5",
        "65.6±.8",
        "91.8±.2",
        "78.7±.7",
        "90.7±.2",
    ],
    [
        "LoRA$^*$",
        "0.3M",
        "95.1±.2",
        "89.7±.7",
        "63.4±1.2",
        "93.3±.3",
        "86.6±.7",
        "91.5±.2",
    ],
]

tab = pd.DataFrame(
    tab,
    columns=[
        "Method",
        "\# Trainable Params",
        "SST-2",
        "MRPC",
        "CoLA",
        "QNLI",
        "RTE",
        "STS-B",
    ],
)


tab["Average"] = (
    (tab.iloc[:, 2:].map(lambda x: float(x.split("±")[0]) if isinstance(x, str) else x))
    .mean(axis=1)
    .round(1)
)

display(tab)

tab_latex = (
    tab.map(str)
    .style.hide(level=0, axis=0)
    .highlight_max(axis=0, props="font-weight:bold")
    .to_latex(
        # position_float="centering",
        convert_css=True,
        hrules=True,
        # caption="Results on the GLUE benchmark for the RoBERTa-base model. Results derived from the original paper are indicated with asterisk.",
        # label="tab:glue_results_base",
        column_format="lcccccccc",
    )
)

tab_latex = tab_latex.replace("table", "table*").replace("±", r"\tiny ±")
with open(table_path / "glue_results_base.tex", "w") as f:
    f.write(tab_latex)
print(tab_latex)

Unnamed: 0,Method,\# Trainable Params,SST-2,MRPC,CoLA,QNLI,RTE,STS-B,Average
0,FT$^*$,125M,94.8,90.2,63.6,92.8,78.7,91.2,85.2
1,BitFit$^*$,0.1M,93.7,92.7,62.0,91.8,81.5,90.8,85.4
2,VeRA$^*$,0.04M,94.6±.1,89.5±.5,65.6±.8,91.8±.2,78.7±.7,90.7±.2,85.1
3,LoRA$^*$,0.3M,95.1±.2,89.7±.7,63.4±1.2,93.3±.3,86.6±.7,91.5±.2,86.6


\begin{tabular}{lcccccccc}
\toprule
Method & \# Trainable Params & SST-2 & MRPC & CoLA & QNLI & RTE & STS-B & Average \\
\midrule
FT$^*$ & \bfseries 125M & 94.8 & 90.2 & 63.6 & 92.8 & 78.7 & 91.2 & 85.2 \\
BitFit$^*$ & 0.1M & 93.7 & \bfseries 92.7 & 62.0 & 91.8 & 81.5 & 90.8 & 85.4 \\
\bfseries VeRA$^*$ & 0.04M & 94.6\tiny ±.1 & 89.5\tiny ±.5 & \bfseries 65.6\tiny ±.8 & 91.8\tiny ±.2 & 78.7\tiny ±.7 & 90.7\tiny ±.2 & 85.1 \\
LoRA$^*$ & 0.3M & \bfseries 95.1\tiny ±.2 & 89.7\tiny ±.7 & 63.4\tiny ±1.2 & \bfseries 93.3\tiny ±.3 & \bfseries 86.6\tiny ±.7 & \bfseries 91.5\tiny ±.2 & \bfseries 86.6 \\
\bottomrule
\end{tabular}



In [None]:
#LoRA Periodic;50 scaled_multinomial;16 target_modules=qv; everything else default from the original paper
#sst2 and qnli got interrupted but doesn't matter because the performance plateaued
sst2:{'eval_loss': 0.29289835691452026, 'eval_accuracy': 0.9357798165137615, 'eval_runtime': 1.1564, 'eval_samples_per_second': 754.057, 'eval_steps_per_second': 47.561, 'epoch': 14.0}
mrpc:{'eval_loss': 0.2071428894996643, 'eval_accuracy': 0.9245835621453414, 'eval_runtime': 3.9549, 'eval_samples_per_second': 1381.322, 'eval_steps_per_second': 43.237, 'epoch': 18.0}
cola:{'eval_loss': 0.7748498916625977, 'eval_matthews_correlation': 0.613211494270806, 'eval_runtime': 0.7249, 'eval_samples_per_second': 1438.813, 'eval_steps_per_second': 45.523, 'epoch': 80.0}
qnli:{'eval_loss': 0.6382217407226562, 'eval_accuracy': 0.8799019607843137, 'eval_f1': 0.9126559714795008, 'eval_runtime': 0.5765, 'eval_samples_per_second': 707.693, 'eval_steps_per_second': 45.098, 'epoch': 30.0}
rte:{'eval_loss': 2.398937225341797, 'eval_accuracy': 0.7436823104693141, 'eval_runtime': 0.2486, 'eval_samples_per_second': 1114.257, 'eval_steps_per_second': 36.203, 'epoch': 80.0}
stsb:{'eval_loss': 0.4319670796394348, 'eval_pearson': 0.9015937752451963, 'eval_spearmanr': 0.8973924327737192, 'eval_runtime': 2.0066, 'eval_samples_per_second': 747.547, 'eval_steps_per_second': 46.846, 'epoch': 60.0}

In [None]:
# dynalora : Periodic;50, scaled_multinomial;20, target_modules=qkv, 60 epochs
sst2 : {'eval_loss': 0.239357128739357, 'eval_accuracy': 0.9369266055045872, 'eval_runtime': 0.7175, 'eval_samples_per_second': 1215.352, 'eval_steps_per_second': 39.025, 'epoch': 24.0}
mrpc : {'eval_loss': 0.7762095928192139, 'eval_accuracy': 0.8799019607843137, 'eval_f1': 0.9144851657940664, 'eval_runtime': 0.3631, 'eval_samples_per_second': 1123.625, 'eval_steps_per_second': 35.802, 'epoch': 60.0}
cola : {'eval_loss': 0.7190002202987671, 'eval_matthews_correlation': 0.6008475155631261, 'eval_runtime': 0.8257, 'eval_samples_per_second': 1263.146, 'eval_steps_per_second': 39.965, 'epoch': 60.0}
qnli :{'eval_loss': 0.2183120846748352, 'eval_accuracy': 0.9229361156873512, 'eval_runtime': 4.5767, 'eval_samples_per_second': 1193.642, 'eval_steps_per_second': 37.363, 'epoch': 14.0}
rte: {'eval_loss': 1.549071192741394, 'eval_accuracy': 0.7581227436823105, 'eval_runtime': 0.2701, 'eval_samples_per_second': 1025.541, 'eval_steps_per_second': 33.321, 'epoch': 60.0}
stsb : {'eval_loss': 0.4212930500507355, 'eval_pearson': 0.9052542545685183, 'eval_spearmanr': 0.9010987912062565, 'eval_runtime': 1.2137, 'eval_samples_per_second': 1235.874, 'eval_steps_per_second': 38.724, 'epoch': 60.0}


In [None]:
# dynavera (default parameters paper, 5E-3) : Periodic;50, scaled_multinomial;20, target_modules=qkv
sst2:{'eval_loss': 0.2185388207435608, 'eval_accuracy': 0.9426605504587156, 'eval_runtime': 0.4014, 'eval_samples_per_second': 2172.206, 'eval_steps_per_second': 34.875, 'epoch': 45.0}
mrpc:{'eval_loss': 0.4157954454421997, 'eval_accuracy': 0.8700980392156863, 'eval_f1': 0.9065255731922398, 'eval_runtime': 0.2406, 'eval_samples_per_second': 1696.037, 'eval_steps_per_second': 29.099, 'epoch': 30.0}
cola:{'eval_loss': 0.6738746166229248, 'eval_matthews_correlation': 0.5909585115904812, 'eval_runtime': 0.4746, 'eval_samples_per_second': 2197.695, 'eval_steps_per_second': 35.821, 'epoch': 80.0}
qnli:{'eval_loss': 0.21886950731277466, 'eval_accuracy': 0.917993776313381, 'eval_runtime': 3.7892, 'eval_samples_per_second': 1441.748, 'eval_steps_per_second': 22.696, 'epoch': 17.0}
rte:{'eval_loss': 1.7529410123825073, 'eval_accuracy': 0.7545126353790613, 'eval_runtime': 0.3114, 'eval_samples_per_second': 889.48, 'eval_steps_per_second': 16.056, 'epoch': 160.0}
stsb:{'eval_loss': 0.4286552369594574, 'eval_pearson': 0.903294532696409, 'eval_spearmanr': 0.9001999829846955, 'eval_runtime': 0.7144, 'eval_samples_per_second': 2099.528, 'eval_steps_per_second': 33.592, 'epoch': 80.0}


In [None]:
# dynavera (paper config, 5E-3) : once;3, scaled_multinomial;20, target_modules=qkv,
sst2:{'eval_loss': 0.26094841957092285, 'eval_accuracy': 0.9346330275229358, 'eval_runtime': 0.4125, 'eval_samples_per_second': 2113.804, 'eval_steps_per_second': 33.937, 'epoch': 45.0}
mrpc:{'eval_loss': 0.42555972933769226, 'eval_accuracy': 0.8676470588235294, 'eval_f1': 0.9045936395759717, 'eval_runtime': 0.2419, 'eval_samples_per_second': 1686.814, 'eval_steps_per_second': 28.94, 'epoch': 30.0}
cola:{'eval_loss': 0.6868337988853455, 'eval_matthews_correlation': 0.6380902412628672, 'eval_runtime': 0.4635, 'eval_samples_per_second': 2250.175, 'eval_steps_per_second': 36.676, 'epoch': 80.0}
qnli:{'eval_loss': 0.21345952153205872, 'eval_accuracy': 0.9161632802489474, 'eval_runtime': 3.7829, 'eval_samples_per_second': 1444.129, 'eval_steps_per_second': 22.734, 'epoch': 17.0}
rte:{'eval_loss': 1.6604375839233398, 'eval_accuracy': 0.7364620938628159, 'eval_runtime': 0.3104, 'eval_samples_per_second': 892.399, 'eval_steps_per_second': 16.108, 'epoch': 160.0}
stsb:{'eval_loss': 0.4192526340484619, 'eval_pearson': 0.9062964448204726, 'eval_spearmanr': 0.9031000200058047, 'eval_runtime': 0.7215, 'eval_samples_per_second': 2078.879, 'eval_steps_per_second': 33.262, 'epoch': 80.0}