# **Parameter Efficient Fine Tuning using LoRA adapter**

Paper being referred: "*LORA: LOW-RANK ADAPTATION OF LARGE LANGUAGE MODELS*" (https://arxiv.org/pdf/2106.09685.pdf)

In [1]:
# Mount into drive

from google.colab import drive

drive.mount("/content/drive")

# Change directory to the package folder
#!! This should be the base directory of the project, this directory should look like the screenshot below.

%cd /content/drive/MyDrive/llm_finetuning-main/notebooks

# Verify the contents of the current folder
!ls

Mounted at /content/drive
/content/drive/MyDrive/llm_finetuning-main/notebooks
context_distillation_mnli.ipynb		  peft_Lora_RTE_350m.csv
few_shot_context_distillation_mnli.ipynb  peft_Lora_RTE_V3.csv
ICL.ipynb				  pre_trained_opt_with_inference.ipynb
logs					  results
pbft_cola_baseline.ipynb		  vanilla_cola_adaptive_v2.ipynb
pbft_mnli_baseline_hansOOD_350m.ipynb	  vanilla_cola_baseline_350M.ipynb
pbft_mnli_baseline_hansOOD.ipynb	  vanilla_cola_baseline.ipynb
pbft_mnli_baseline.ipynb		  vanilla_cola_baseline_v2.ipynb
peft_LoRA_on_cola.ipynb			  vanilla_mnli_baseline_Hans_350M.ipynb
peft_LoRA_on_RTE.ipynb			  vanilla_mnli_baseline_Hans.ipynb
peft_LoRA_on_RTE_v3.ipynb		  wandb
peft_Lora_RTE2.csv


In [2]:
!pip install -q transformers accelerate bitsandbytes datasets torch peft tqdm

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m69.1/69.1 MB[0m [31m7.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m480.6/480.6 kB[0m [31m16.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m7.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m179.3/179.3 kB[0m [31m12.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m6.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m194.1/194.1 kB[0m [31m5.4 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
gcsfs 2024.10.0 requires fsspec==2024.10.0, but you have fsspec 2024.9.0 which is incompatible

In [3]:
# Load required libraries

from datasets import load_dataset, ClassLabel
from sklearn.metrics import accuracy_score
from transformers import Trainer, TrainingArguments, AutoModelForSequenceClassification, AutoTokenizer, AdamW, AutoConfig
from peft import LoraConfig, get_peft_model
import numpy as np
import pandas as pd
import torch
from tqdm import tqdm

# Set seed, load RTE dataset

torch.manual_seed(42)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(42)

rte_data = load_dataset("glue", "rte") # load rte data
hans_data = load_dataset("hans")

# preprocess RTE dataset
# def preprocess_rte(batch):
#     encoding = tokenizer(
#         [f"{premise} Question: {hypothesis} Yes or No?" for premise, hypothesis in zip(batch["sentence1"], batch["sentence2"])],
#         truncation=True,
#         padding="max_length",
#         max_length=128
#     )
#     return encoding

def preprocess_rte(batch):
    encoding = tokenizer(
        [f'Given the statement "{sentence1}", does it necessarily follow that "{sentence2}" is true?'
         for sentence1, sentence2 in zip(batch["sentence1"], batch["sentence2"])],
        truncation=True,
        padding="max_length",
        max_length=128,
        return_tensors='pt'
    )
    batch["input_ids"] = encoding["input_ids"].squeeze()
    batch["attention_mask"] = encoding["attention_mask"].squeeze()
    return batch

# # preprocess hans
# def preprocess_hans(batch):
#     encoding = tokenizer(
#         [f"{premise} Question: {hypothesis} Yes or No?" for premise, hypothesis in zip(batch["premise"], batch["hypothesis"])],
#         truncation=True,
#         padding="max_length",
#         max_length=128
#     )
#     if "labels" not in batch:
#         encoding["labels"] = batch["label"]
#     return encoding

# # Ensure 'labels' column is correct for HANS
# if "labels" in hans_data["validation"].column_names:
#     print("'labels' column already exists in HANS. Skipping renaming.")
# else:
#     hans_data = hans_data.rename_column("label", "labels")

# # Remove redundant 'label' column
# if "label" in hans_data["validation"].column_names:
#     hans_data = hans_data.remove_columns(["label"])

# # Set format for PyTorch
# hans_data = hans_data.with_format("torch")

# # Preprocess HANS dataset
# def preprocess_hans(batch):
#     encoding = tokenizer(
#         [f"{premise} Question: {hypothesis} Yes or No?" for premise, hypothesis in zip(batch["premise"], batch["hypothesis"])],
#         truncation=True,
#         padding="max_length",
#         max_length=128
#     )
#     # Use 'labels' directly
#     encoding["labels"] = batch["labels"]
#     return encoding

def preprocess_hans(batch):
    fixed_context = "Given the premise, does the hypothesis hold true? "
    encoding = tokenizer(
        [f'{fixed_context} Premise: {premise} Hypothesis: {hypothesis}'
         for premise, hypothesis in zip(batch["premise"], batch["hypothesis"])],
        truncation=True,
        padding="max_length",
        max_length=128,
        return_tensors='pt'
    )
    batch["input_ids"] = encoding["input_ids"].squeeze()
    batch["attention_mask"] = encoding["attention_mask"].squeeze()
    return batch

# Load tokenizer and model config
tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m")
config = AutoConfig.from_pretrained("facebook/opt-350m", num_labels=2, hidden_dropout_prob=0.1, attention_probs_dropout_prob=0.1)

# Preprocess datasets
rte_data = rte_data.map(preprocess_rte, batched=True)
hans_data = hans_data.map(preprocess_hans, batched=True)

# Inspect the first few rows of the HANS dataset
# print(hans_data["validation"].to_pandas().head())


# # Set dataset format to PyTorch
# rte_data = rte_data.rename_column("label", "labels").with_format("torch")
# hans_data = hans_data.rename_column("label", "labels").with_format("torch")

# Split datasets
train_dataset = rte_data["train"]
eval_dataset = rte_data["validation"]
hans_eval_dataset = hans_data["validation"]

# Define compute metrics function
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return {"accuracy": accuracy_score(labels, predictions)}

# Define parameters for training experiments (per reference paper)

few_shot_sample_size = [2, 32, 128]  # number of examples for each class
num_epochs = 40
batch_size = 32
learning_rate = 1e-5
weight_decay = 0.0
warmup_ratio = 0.1
num_runs = 3

# Vaiables to store training results into CSV
results_df = pd.DataFrame(columns=["n", "rank", "run", "in_domain_accuracy", "out_of_domain_accuracy"])

lora_configs = [
    LoraConfig(r=4, lora_alpha=8, lora_dropout=0.1, task_type="SEQ_CLS")
]

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md:   0%|          | 0.00/35.3k [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/584k [00:00<?, ?B/s]

validation-00000-of-00001.parquet:   0%|          | 0.00/69.0k [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/621k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/2490 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/277 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/3000 [00:00<?, ? examples/s]

README.md:   0%|          | 0.00/7.02k [00:00<?, ?B/s]

hans.py:   0%|          | 0.00/5.22k [00:00<?, ?B/s]

The repository for hans contains custom code which must be executed to correctly load the dataset. You can inspect the repository content at https://hf.co/datasets/hans.
You can avoid this prompt in future by passing the argument `trust_remote_code=True`.

Do you wish to run the custom code? [y/N] y


Downloading data:   0%|          | 0.00/15.5M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/15.5M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/30000 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/30000 [00:00<?, ? examples/s]

tokenizer_config.json:   0%|          | 0.00/685 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/644 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/441 [00:00<?, ?B/s]

Map:   0%|          | 0/2490 [00:00<?, ? examples/s]

Map:   0%|          | 0/277 [00:00<?, ? examples/s]

Map:   0%|          | 0/3000 [00:00<?, ? examples/s]

Map:   0%|          | 0/30000 [00:00<?, ? examples/s]

Map:   0%|          | 0/30000 [00:00<?, ? examples/s]

# **Experimentation loop**

In [None]:
import time
start_time = time.time()

for _, n in enumerate(tqdm(few_shot_sample_size)):
    for _, lora_config in enumerate(tqdm(lora_configs)):
      for run_idx in tqdm(range(num_runs)):  # repeat 10 times for each n
          # re-iniialize model for each run
          model = AutoModelForSequenceClassification.from_pretrained("facebook/opt-350m")
          model = get_peft_model(model, lora_config)
          optimizer = AdamW(model.parameters(), lr=learning_rate)

          # Select n random examples for each class from the original data
          indices_yes = [i for i, label in enumerate(train_dataset["label"]) if label == 0]
          indices_no = [i for i, label in enumerate(train_dataset["label"]) if label == 1]
          indices_yes = np.random.choice(indices_yes, n, replace=False)
          indices_no = np.random.choice(indices_no, n, replace=False)
          indices = np.concatenate([indices_yes, indices_no])

          # Select the examples for the new training set
          few_shot_train_dataset = train_dataset.select(indices)

          # Define training config
          # Total steps = (#samples/batch size) * epochs
          total_steps = (len(few_shot_train_dataset) // batch_size) * num_epochs

          training_args = TrainingArguments(
              output_dir = "./results",
              overwrite_output_dir = True,
              num_train_epochs = num_epochs,
              per_device_train_batch_size = batch_size,
              learning_rate = learning_rate,
              weight_decay = weight_decay,
              save_steps = 10_000,
              save_total_limit = 2,
              warmup_steps = int(warmup_ratio * total_steps),
          )

          # Define the trainer
          trainer = Trainer(
              model = model,
              args=training_args,
              train_dataset = train_dataset,
              compute_metrics = compute_metrics,
              optimizers=(optimizer, None),
          )

          # Train the model
          trainer.train()

          # Evaluate in-domain performance
          print(f"Evaluating in-domain performance for n={n}, rank={lora_config.r}...")
          eval_results_in_domain = trainer.evaluate(eval_dataset=eval_dataset)
          in_domain_accuracy = eval_results_in_domain["eval_accuracy"]

          # Print the in-domain evaluation results
          for key, value in eval_results_in_domain.items():
              print(f"In-domain {key}: {value}")

          # Evaluate out-of-domain performance
          print(f"Evaluating out-of-domain performance for n={n}, rank={lora_config.r}...")
          eval_results_out_domain = trainer.evaluate(eval_dataset=hans_eval_dataset)
          out_of_domain_accuracy = eval_results_out_domain["eval_accuracy"]

          # Print the out-of-domain evaluation results
          for key, value in eval_results_out_domain.items():
              print(f"Out-of-domain {key}: {value}")

          # Add the results to the DataFrame
          new_row = pd.DataFrame({
              "n": [n],
              "rank": [lora_config.r],
              "run": [run_idx],
              "in_domain_accuracy": [in_domain_accuracy],
              "out_of_domain_accuracy": [out_of_domain_accuracy]
          })
          results_df = pd.concat([results_df, new_row], ignore_index=True)

print(f'Finished training!!')
end_time = time.time()

# Calculate elapsed time
elapsed_time = end_time - start_time

print(f"Time used: {elapsed_time:.2f} seconds")

  0%|          | 0/3 [00:00<?, ?it/s]
  0%|          | 0/1 [00:00<?, ?it/s][A

  0%|          | 0/3 [00:00<?, ?it/s][A[A

pytorch_model.bin:   0%|          | 0.00/663M [00:00<?, ?B/s]

Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-350m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit:[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Step,Training Loss
500,0.8015
1000,0.7472
1500,0.7129
2000,0.6849
2500,0.6516
3000,0.6381


Evaluating in-domain performance for n=2, rank=4...


In-domain eval_loss: 0.6399139761924744
In-domain eval_accuracy: 0.6642599277978339
In-domain eval_runtime: 6.7378
In-domain eval_samples_per_second: 41.111
In-domain eval_steps_per_second: 5.195
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=2, rank=4...


  results_df = pd.concat([results_df, new_row], ignore_index=True)


 33%|███▎      | 1/3 [1:31:50<3:03:41, 5510.62s/it][A[A

Out-of-domain eval_loss: 0.6881028413772583
Out-of-domain eval_accuracy: 0.5471333333333334
Out-of-domain eval_runtime: 715.6914
Out-of-domain eval_samples_per_second: 41.918
Out-of-domain eval_steps_per_second: 5.24
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-350m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
500,0.8919
1000,0.7794
1500,0.7507
2000,0.734
2500,0.7234
3000,0.7172


Evaluating in-domain performance for n=2, rank=4...


In-domain eval_loss: 0.7446869611740112
In-domain eval_accuracy: 0.628158844765343
In-domain eval_runtime: 6.7938
In-domain eval_samples_per_second: 40.773
In-domain eval_steps_per_second: 5.152
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=2, rank=4...




 67%|██████▋   | 2/3 [3:00:46<1:30:07, 5407.90s/it][A[A

Out-of-domain eval_loss: 0.8851304054260254
Out-of-domain eval_accuracy: 0.5000666666666667
Out-of-domain eval_runtime: 714.6346
Out-of-domain eval_samples_per_second: 41.979
Out-of-domain eval_steps_per_second: 5.247
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-350m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
500,0.8919
1000,0.7794
1500,0.7507
2000,0.734
2500,0.7234
3000,0.7172


Evaluating in-domain performance for n=2, rank=4...


In-domain eval_loss: 0.7446869611740112
In-domain eval_accuracy: 0.628158844765343
In-domain eval_runtime: 6.8052
In-domain eval_samples_per_second: 40.704
In-domain eval_steps_per_second: 5.143
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=2, rank=4...




100%|██████████| 3/3 [4:29:43<00:00, 5394.48s/it]

100%|██████████| 1/1 [4:29:43<00:00, 16183.46s/it]
 33%|███▎      | 1/3 [4:29:43<8:59:26, 16183.47s/it]

Out-of-domain eval_loss: 0.8851304054260254
Out-of-domain eval_accuracy: 0.5000666666666667
Out-of-domain eval_runtime: 715.0678
Out-of-domain eval_samples_per_second: 41.954
Out-of-domain eval_steps_per_second: 5.244
Out-of-domain epoch: 40.0



  0%|          | 0/1 [00:00<?, ?it/s][A

  0%|          | 0/3 [00:00<?, ?it/s][A[ASome weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-350m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


model.safetensors:   0%|          | 0.00/662M [00:00<?, ?B/s]

Step,Training Loss
500,0.8972
1000,0.7795
1500,0.7508
2000,0.7338
2500,0.7231
3000,0.717


Evaluating in-domain performance for n=32, rank=4...


In-domain eval_loss: 0.7450775504112244
In-domain eval_accuracy: 0.628158844765343
In-domain eval_runtime: 6.7811
In-domain eval_samples_per_second: 40.849
In-domain eval_steps_per_second: 5.161
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=32, rank=4...




 33%|███▎      | 1/3 [1:28:56<2:57:52, 5336.27s/it][A[A

Out-of-domain eval_loss: 0.8873845338821411
Out-of-domain eval_accuracy: 0.5000666666666667
Out-of-domain eval_runtime: 716.8109
Out-of-domain eval_samples_per_second: 41.852
Out-of-domain eval_steps_per_second: 5.232
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-350m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
500,0.8972
1000,0.7795
1500,0.7508
2000,0.7338
2500,0.7231
3000,0.717


Evaluating in-domain performance for n=32, rank=4...


In-domain eval_loss: 0.7450775504112244
In-domain eval_accuracy: 0.628158844765343
In-domain eval_runtime: 6.8043
In-domain eval_samples_per_second: 40.709
In-domain eval_steps_per_second: 5.144
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=32, rank=4...




 67%|██████▋   | 2/3 [2:57:49<1:28:54, 5334.26s/it][A[A

Out-of-domain eval_loss: 0.8873845338821411
Out-of-domain eval_accuracy: 0.5000666666666667
Out-of-domain eval_runtime: 713.8792
Out-of-domain eval_samples_per_second: 42.024
Out-of-domain eval_steps_per_second: 5.253
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-350m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
500,0.8972
1000,0.7795
1500,0.7508
2000,0.7338
2500,0.7231
3000,0.717


Evaluating in-domain performance for n=32, rank=4...


In-domain eval_loss: 0.7450775504112244
In-domain eval_accuracy: 0.628158844765343
In-domain eval_runtime: 6.8262
In-domain eval_samples_per_second: 40.579
In-domain eval_steps_per_second: 5.127
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=32, rank=4...




100%|██████████| 3/3 [4:26:48<00:00, 5336.10s/it]

100%|██████████| 1/1 [4:26:48<00:00, 16008.30s/it]
 67%|██████▋   | 2/3 [8:56:31<4:28:00, 16080.44s/it]

Out-of-domain eval_loss: 0.8873845338821411
Out-of-domain eval_accuracy: 0.5000666666666667
Out-of-domain eval_runtime: 716.8981
Out-of-domain eval_samples_per_second: 41.847
Out-of-domain eval_steps_per_second: 5.231
Out-of-domain epoch: 40.0



  0%|          | 0/1 [00:00<?, ?it/s][A

  0%|          | 0/3 [00:00<?, ?it/s][A[ASome weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-350m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
500,0.9114
1000,0.7795
1500,0.7506
2000,0.7334
2500,0.7226
3000,0.7167


Evaluating in-domain performance for n=128, rank=4...


In-domain eval_loss: 0.7440305948257446
In-domain eval_accuracy: 0.628158844765343
In-domain eval_runtime: 6.8455
In-domain eval_samples_per_second: 40.465
In-domain eval_steps_per_second: 5.113
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=128, rank=4...




 33%|███▎      | 1/3 [1:28:57<2:57:55, 5337.95s/it][A[A

Out-of-domain eval_loss: 0.8876795768737793
Out-of-domain eval_accuracy: 0.5000666666666667
Out-of-domain eval_runtime: 716.6388
Out-of-domain eval_samples_per_second: 41.862
Out-of-domain eval_steps_per_second: 5.233
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-350m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
500,0.9114
1000,0.7795
1500,0.7506
2000,0.7334
2500,0.7226
3000,0.7167


Evaluating in-domain performance for n=128, rank=4...


In-domain eval_loss: 0.7440305948257446
In-domain eval_accuracy: 0.628158844765343
In-domain eval_runtime: 6.8317
In-domain eval_samples_per_second: 40.546
In-domain eval_steps_per_second: 5.123
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=128, rank=4...




 67%|██████▋   | 2/3 [2:57:56<1:28:58, 5338.52s/it][A[A

Out-of-domain eval_loss: 0.8876795768737793
Out-of-domain eval_accuracy: 0.5000666666666667
Out-of-domain eval_runtime: 717.3502
Out-of-domain eval_samples_per_second: 41.821
Out-of-domain eval_steps_per_second: 5.228
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-350m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
500,0.9114
1000,0.7795


In [None]:
# Group the results by number of examples in few-shot learning runs and compute the average and maximum performance for each group
grouped_results = results_df.groupby(['n', 'rank'])

average_in_domain_accuracy = grouped_results['in_domain_accuracy'].mean()
maximum_in_domain_accuracy = grouped_results['in_domain_accuracy'].max()

average_out_of_domain_accuracy = grouped_results['out_of_domain_accuracy'].mean()
maximum_out_of_domain_accuracy = grouped_results['out_of_domain_accuracy'].max()

# Print the results for each 'n' and each rank
for n in few_shot_sample_size:
    for lora_config in lora_configs:
      print(f"For n={n}:")
      print(f"For rank={lora_config.r}:")
      print(f"Average in-domain accuracy: {average_in_domain_accuracy[n]}")
      print(f"Maximum in-domain accuracy: {maximum_in_domain_accuracy[n]}")
      print(f"Average out-of-domain accuracy: {average_out_of_domain_accuracy[n]}")
      print(f"Maximum out-of-domain accuracy: {maximum_out_of_domain_accuracy[n]}")
      print("\n")

# Compute and print the overall average and maximum performance
print("Overall:")
print(f"Average in-domain accuracy: {results_df['in_domain_accuracy'].mean()}")
print(f"Maximum in-domain accuracy: {results_df['in_domain_accuracy'].max()}")
print(f"Average out-of-domain accuracy: {results_df['out_of_domain_accuracy'].mean()}")
print(f"Maximum out-of-domain accuracy: {results_df['out_of_domain_accuracy'].max()}")


In [None]:
# Define the filename
curr_filename = "peft_Lora_RTE_350m.csv"

# Save the DataFrame to the current directory
results_df.to_csv(curr_filename, sep=',', index=False)
print(f"Results saved to {curr_filename} in the current directory.")



In [None]:
# Save the DataFrame to a CSV file
results_df.to_csv(f'../Results/{curr_filename.split(".")[0]}.csv', sep = ',', index=False)

In [None]:
# # disconnect runtime
from google.colab import runtime
runtime.unassign()