In [2]:
import pandas as pd

from pandas.api.types import CategoricalDtype

# Define the specific order for adapter_config_string
# adapter_order = ['full_tuning', 'houlsby', 'pfeiffer', 'scaled_parallel', 'ln_tuning',
#                  'lora', 'hf_lora_all', 'hf_krona', 'compacter', 'compacter++', 'ia3',
#                  'prefix_tuning[bottleneck_size=800,kv_size=64]|par_bn', 'prefix_tuning[kv_size=64]', 'prefix_tuning_flat[kv_size=64]', 'lora[r=8,use_gating=True]|prefix_tuning[prefix_length=10,use_gating=True,kv_size=64]|seq_bn[reduction_factor=16,use_gating=True]']
adapter_order = ['full_tuning', 'houlsby', 'pfeiffer', 'scaled_parallel', 'ln_tuning',
                 'lora', 'hf_lora', 'hf_lora_all', 'hf_krona', 'compacter', 'compacter++', 'ia3',
                 'mam', 'prefix_tuning', 'prefix_tuning_flat', 'unipelt']
adapter_order = adapter_order[::-1]

dataset_config_order = ["3.0.0", "boolq", "rte", "copa"]

In [3]:
df = pd.read_csv("../wandb_results/wandb_export_2023-12-23T10_15_59.462-05_00.csv")

relevant_columns = ['Name', 'model_name_or_path', 'adapter_config_string',
                    'dataset_name', 'dataset_config_name',
                    'per_device_train_batch_size', 'gradient_accumulation_steps',
                    'throughput_tokens', 'test/throughput_tokens',
                    'total_batch_size',
                    'peak_memory_usage', 'eval/peak_memory_usage', 'test/peak_memory_usage',
                    ]

df = df[relevant_columns]
df = df.dropna()
# df = df[df.model_name_or_path != "t5-11b"]

df['adapter_config_string'] = df['adapter_config_string'].replace(
    "prefix_tuning[bottleneck_size=800,kv_size=64]|par_bn", "mam"
)
df['adapter_config_string'] = df['adapter_config_string'].replace(
    "prefix_tuning[bottleneck_size=800,kv_size=128]|par_bn", "mam"
)
df['adapter_config_string'] = df['adapter_config_string'].replace(
    "prefix_tuning_flat[kv_size=128]", "prefix_tuning_flat"
)
df['adapter_config_string'] = df['adapter_config_string'].replace(
    "prefix_tuning_flat[kv_size=64]", "prefix_tuning_flat"
)
df['adapter_config_string'] = df['adapter_config_string'].replace(
    "prefix_tuning[kv_size=128]", "prefix_tuning"
)
df['adapter_config_string'] = df['adapter_config_string'].replace(
    "prefix_tuning[kv_size=64]", "prefix_tuning"
)
df['adapter_config_string'] = df['adapter_config_string'].replace(
    "lora[r=8,use_gating=True]|prefix_tuning[prefix_length=10,use_gating=True,kv_size=128]|seq_bn[reduction_factor=16,use_gating=True]", "unipelt"
)
df['adapter_config_string'] = df['adapter_config_string'].replace(
    "lora[r=8,use_gating=True]|prefix_tuning[prefix_length=10,use_gating=True,kv_size=64]|seq_bn[reduction_factor=16,use_gating=True]", "unipelt"
)

# num_gpus = total_batch_size / (per_device_train_batch_size * gradient_accumulation_steps)
df = df.assign(num_gpus=df['total_batch_size'] / (
        df['per_device_train_batch_size'] * df['gradient_accumulation_steps']))

df = df.assign(
    throughput_tokens_per_gpu=df['throughput_tokens'] / df['num_gpus'])

df = df.assign(
    throughput_tokens_per_gpu_test=df['test/throughput_tokens'] / df['num_gpus'])

# table = df.groupby(
#     ['model_name_or_path', 'adapter_config_string', 'num_gpus'])\
#     .agg({
#         'total_parameters': 'mean',
#         'trainable_parameters': 'mean',
#         'throughput_tokens_per_gpu': ['mean', 'std'],
#         'throughput_tokens_per_gpu_test': ['mean', 'std'],
#         'Name': 'count',
#     }).reset_index()

df['adapter_config_string'] = df['adapter_config_string'].astype(
    CategoricalDtype(categories=adapter_order, ordered=True)
)
df['dataset_config_name'] = df['dataset_config_name'].astype(
    CategoricalDtype(categories=dataset_config_order, ordered=True)
)
df = df.sort_values(
    by=['model_name_or_path', 'num_gpus', 'adapter_config_string', 'dataset_config_name'],
    ascending=[False, True, False, True]
)

# remove column total_batch_size
df = df.drop(columns=['total_batch_size'])

print("Unique models:", len(df['model_name_or_path'].unique()))
print("Unique adapter configs:", len(df['adapter_config_string'].unique()))

print(df.shape)
df.to_csv('../wandb_results/throughput_noaverage_evalonly.csv', index=False)
df

Unique models: 0
Unique adapter configs: 0
(0, 15)


Unnamed: 0,Name,model_name_or_path,adapter_config_string,dataset_name,dataset_config_name,per_device_train_batch_size,gradient_accumulation_steps,throughput_tokens,test/throughput_tokens,peak_memory_usage,eval/peak_memory_usage,test/peak_memory_usage,num_gpus,throughput_tokens_per_gpu,throughput_tokens_per_gpu_test


# Test throughput table

In [10]:
df = pd.read_csv("../wandb_results/wandb_export_2023-12-23T10_15_59.462-05_00.csv")

relevant_columns = ['Name', 'model_name_or_path', 'adapter_config_string',
                    'dataset_name', 'dataset_config_name',
                    'per_device_train_batch_size', 'gradient_accumulation_steps',
                    'eval/throughput_tokens_mean', 'eval/throughput_tokens_std',
                    'total_batch_size',
                    'peak_memory_usage', 'eval/peak_memory_usage',
                    ]

df = df[relevant_columns]
df = df.dropna()

df

Unnamed: 0,Name,model_name_or_path,adapter_config_string,dataset_name,dataset_config_name,per_device_train_batch_size,gradient_accumulation_steps,eval/throughput_tokens_mean,eval/throughput_tokens_std,total_batch_size,peak_memory_usage,eval/peak_memory_usage
3,glad-planet-2946,t5-11b,"lora[r=8,use_gating=True]|prefix_tuning[prefix...",super_glue,copa,1.0,32.0,305.699457,33.227493,32.0,24242.183105,24242.183105
4,splendid-leaf-2945,t5-11b,"prefix_tuning[bottleneck_size=800,kv_size=128]...",cnn_dailymail,3.0.0,1.0,32.0,3260.064222,208.213074,32.0,27090.796387,27090.796387
5,likely-wave-2944,t5-11b,prefix_tuning_flat[kv_size=128],super_glue,copa,1.0,32.0,467.299949,49.448879,32.0,21958.032227,21958.476945
6,true-cosmos-2943,t5-11b,prefix_tuning[kv_size=128],super_glue,copa,1.0,32.0,449.217234,49.058655,32.0,24281.101562,24281.101562
7,divine-bush-2942,t5-11b,ia3,cnn_dailymail,3.0.0,1.0,32.0,4242.824479,282.659058,32.0,23229.260742,23229.260742
...,...,...,...,...,...,...,...,...,...,...,...,...
157,daily-wildflower-2792,t5-large,hf_lora,super_glue,copa,32.0,1.0,23667.412848,4264.500977,32.0,1624.643555,1790.048828
158,olive-forest-2791,t5-large,scaled_parallel,super_glue,copa,32.0,1.0,17511.523876,3203.271729,32.0,1779.057617,1941.204590
159,decent-firebrand-2790,t5-large,ln_tuning,super_glue,copa,32.0,1.0,30371.497942,5562.807617,32.0,1620.143555,1782.220703
160,fresh-disco-2789,t5-large,pfeiffer,super_glue,copa,32.0,1.0,19303.642570,3444.671631,32.0,1718.702148,1881.161621


In [15]:
df = pd.read_csv("../wandb_results/wandb_export_2023-12-23T11_41_11.610-05_00.csv")

relevant_columns = ['Name', 'model_name_or_path', 'adapter_config_string',
                    'dataset_name', 'dataset_config_name',
                    'per_device_train_batch_size', 'gradient_accumulation_steps',
                    'eval/throughput_tokens_mean', 'eval/throughput_tokens_std',
                    'total_batch_size',
                    'peak_memory_usage', 'eval/peak_memory_usage',
                    ]

df = df[relevant_columns]
df = df.dropna()
# df = df[df.model_name_or_path != "t5-11b"]

df['adapter_config_string'] = df['adapter_config_string'].replace(
    "prefix_tuning[bottleneck_size=800,kv_size=64]|par_bn", "mam"
)
df['adapter_config_string'] = df['adapter_config_string'].replace(
    "prefix_tuning[bottleneck_size=800,kv_size=128]|par_bn", "mam"
)
df['adapter_config_string'] = df['adapter_config_string'].replace(
    "prefix_tuning_flat[kv_size=128]", "prefix_tuning_flat"
)
df['adapter_config_string'] = df['adapter_config_string'].replace(
    "prefix_tuning_flat[kv_size=64]", "prefix_tuning_flat"
)
df['adapter_config_string'] = df['adapter_config_string'].replace(
    "prefix_tuning[kv_size=128]", "prefix_tuning"
)
df['adapter_config_string'] = df['adapter_config_string'].replace(
    "prefix_tuning[kv_size=64]", "prefix_tuning"
)
df['adapter_config_string'] = df['adapter_config_string'].replace(
    "lora[r=8,use_gating=True]|prefix_tuning[prefix_length=10,use_gating=True,kv_size=128]|seq_bn[reduction_factor=16,use_gating=True]", "unipelt"
)
df['adapter_config_string'] = df['adapter_config_string'].replace(
    "lora[r=8,use_gating=True]|prefix_tuning[prefix_length=10,use_gating=True,kv_size=64]|seq_bn[reduction_factor=16,use_gating=True]", "unipelt"
)

# num_gpus = total_batch_size / (per_device_train_batch_size * gradient_accumulation_steps)
df = df.assign(num_gpus=df['total_batch_size'] / (
        df['per_device_train_batch_size'] * df['gradient_accumulation_steps']))

df = df.assign(
    throughput_tokens_per_gpu_test=df['eval/throughput_tokens_mean'] / df['num_gpus'])

# table = df.groupby(
#     ['model_name_or_path', 'adapter_config_string', 'num_gpus'])\
#     .agg({
#         'total_parameters': 'mean',
#         'trainable_parameters': 'mean',
#         'throughput_tokens_per_gpu': ['mean', 'std'],
#         'throughput_tokens_per_gpu_test': ['mean', 'std'],
#         'Name': 'count',
#     }).reset_index()

df['adapter_config_string'] = df['adapter_config_string'].astype(
    CategoricalDtype(categories=adapter_order, ordered=True)
)
df['dataset_config_name'] = df['dataset_config_name'].astype(
    CategoricalDtype(categories=dataset_config_order, ordered=True)
)
df = df.sort_values(
    by=['model_name_or_path', 'num_gpus', 'adapter_config_string', 'dataset_config_name'],
    ascending=[False, True, False, True]
)

# remove column total_batch_size
df = df.drop(columns=['total_batch_size'])

print("Unique models:", len(df['model_name_or_path'].unique()))
print("Unique adapter configs:", len(df['adapter_config_string'].unique()))

df = df[df["adapter_config_string"] == "compacter"]

print(df.shape)
df.to_csv('../wandb_results/throughput_noaverage_evalonly_compacter.csv', index=False)
df

Unique models: 3
Unique adapter configs: 15
(10, 13)


Unnamed: 0,Name,model_name_or_path,adapter_config_string,dataset_name,dataset_config_name,per_device_train_batch_size,gradient_accumulation_steps,eval/throughput_tokens_mean,eval/throughput_tokens_std,peak_memory_usage,eval/peak_memory_usage,num_gpus,throughput_tokens_per_gpu_test
4,noble-blaze-2976,t5-large,compacter,cnn_dailymail,3.0.0,4.0,8.0,16543.9716,266.527496,1927.661133,1927.661133,1.0,16543.9716
5,fanciful-silence-2975,t5-large,compacter,super_glue,boolq,16.0,2.0,38839.069641,5884.388672,2268.171387,2795.84419,1.0,38839.069641
7,honest-water-2973,t5-large,compacter,super_glue,rte,32.0,1.0,49533.506632,4303.685059,2305.194824,3159.204054,1.0,49533.506632
8,swift-darkness-2971,t5-large,compacter,super_glue,copa,32.0,1.0,12976.799077,2392.590332,1683.597168,1845.744141,1.0,12976.799077
1,efficient-energy-2979,t5-3b,compacter,cnn_dailymail,3.0.0,1.0,32.0,4239.623007,586.033752,5904.960449,5904.960449,1.0,4239.623007
3,vocal-night-2977,t5-3b,compacter,super_glue,boolq,2.0,16.0,3127.52097,970.247192,5928.335449,5928.335449,1.0,3127.52097
6,graceful-dew-2973,t5-3b,compacter,super_glue,rte,4.0,8.0,4581.579083,1566.018188,6050.745605,6091.258808,1.0,4581.579083
9,honest-morning-2972,t5-3b,compacter,super_glue,copa,4.0,8.0,1567.368689,144.680191,5733.413086,5733.413086,1.0,1567.368689
2,glowing-snowball-2978,t5-11b,compacter,super_glue,rte,1.0,32.0,718.281145,377.803894,22202.440918,22279.650237,1.0,718.281145
10,sleek-galaxy-2970,t5-11b,compacter,super_glue,copa,1.0,32.0,345.94685,36.218567,21815.406738,21815.406738,1.0,345.94685
