# Few shot Fine Tuning on Cola Data Set - Baseline

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
%cd '/content/drive/MyDrive/cs7643-group-project/notebooks'
!ls

/content/drive/MyDrive/cs7643-group-project/notebooks
vanilla_cola_baseline.ipynb


In [6]:
curr_filename = "vanilla_cola_baseline"

In [7]:
!pip install -q transformers accelerate bitsandbytes datasets wandb

In [9]:
from datasets import load_dataset, ClassLabel
from sklearn.metrics import accuracy_score
from transformers import Trainer, TrainingArguments, AutoModelForSequenceClassification, AutoTokenizer, AdamW, AutoConfig
import numpy as np
import pandas as pd
import torch
import wandb

wandb.login()  # This will prompt for your API key if not already logged in

# Set seed, load COLA dataset

torch.manual_seed(42)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(42)

in_domain_data = load_dataset("glue", "cola")


# Define model and tokenizer
tokenizer = AutoTokenizer.from_pretrained("facebook/opt-125m")

config = AutoConfig.from_pretrained("facebook/opt-125m", num_labels=2, hidden_dropout_prob=0.1, attention_probs_dropout_prob=0.1)
model = AutoModelForSequenceClassification.from_pretrained("facebook/opt-125m", config=config)

# Function to load and parse out-of-domain COLA dataset
'''
\cite: https://github.com/uds-lsv/llmft/blob/main/task_utils.py
'''
def load_cola_ood_dataset(path, label=None, cache_dir=None):
    data_files = {"validation": path}
    dataset = load_dataset("csv", data_files=data_files, sep="\t", column_names=[
                           'code', 'label', 'annotation', 'sentence'], cache_dir=cache_dir)
    dataset = dataset["validation"]

    # cola-ood comes without indices, so we add them
    indices = list(range(len(dataset)))
    dataset = dataset.add_column(name="idx", column=indices)

    subset = "cola-ood"

    if label is not None:  # filter dataset based on label
        dataset = dataset.filter(
            lambda example: example["label"] == label)
        subset = f"{subset}-{'acceptable' if label == 1 else 'unacceptable'}"

    return dataset, subset


def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return {"accuracy": accuracy_score(labels, predictions)}


def manipulate_inputs_for_cola_with_prompt(inputs):
    # Add pattern/prompts
    #inputs = tokenizer(["Yes or No?" + sentence for sentence in inputs["sentence"]], truncation=True, padding="max_length", max_length=128)
    inputs = tokenizer(inputs["sentence"], truncation=True, padding="max_length", max_length=128)

    return inputs

# augment in-domain-data
in_domain_data = in_domain_data.map(manipulate_inputs_for_cola_with_prompt, batched=True)

# augment out of domain data
eval_ood_data, _ = load_cola_ood_dataset(path='../datafiles/dev.tsv')
eval_ood_data = eval_ood_data.map(manipulate_inputs_for_cola_with_prompt, batched=True)


# Define parameters for training experiments (per reference paper)

few_shot_sample_size = [2, 16, 32, 64, 128]  # number of examples for each class
num_epochs = 40
batch_size = 32
learning_rate = 1e-5
weight_decay = 0.
warmup_ratio = 0.1
num_runs = 10
optimizer = AdamW(model.parameters(), lr=learning_rate) # AdamW optimizer



results_df = pd.DataFrame(columns=["n", "run", "in_domain_accuracy", "out_of_domain_accuracy"])

for n in few_shot_sample_size:
    for run_idx in range(num_runs):  # repeat 10 times for each n
        # re-iniialize model for each run
        model = AutoModelForSequenceClassification.from_pretrained("facebook/opt-125m", config=config)
        optimizer = AdamW(model.parameters(), lr=learning_rate)

        wandb.init(
          project="bo-cola-fewshot",
          name=f"bo-cola_fewshot_n{n}_run{run_idx}",
          config={
              "learning_rate": learning_rate,
              "epochs": num_epochs,
              "batch_size": batch_size,
              "model": "facebook/opt-125m",
              "few_shot_size": n
                },
          reinit=True  # Allow multiple runs in the same script
            )

        # Select n random examples for each class from the original data
        indices_yes = np.where(np.array(in_domain_data["train"]["label"]) == 0)[0]
        indices_no = np.where(np.array(in_domain_data["train"]["label"]) == 1)[0]
        indices_yes = np.random.choice(indices_yes, n, replace=False)
        indices_no = np.random.choice(indices_no, n, replace=False)
        indices = np.concatenate([indices_yes, indices_no])

        # Select the examples for the new training set
        train_dataset = in_domain_data["train"].select(indices)

        # Define training config
        # Total steps = (#samples/batch size) * epochs
        total_steps = (len(train_dataset) // batch_size) * num_epochs

        training_args = TrainingArguments(
            output_dir = "./results",
            run_name=f"bo_cola_fewshot_n{n}_run{run_idx}",
            overwrite_output_dir = True,
            num_train_epochs = num_epochs,
            per_device_train_batch_size = batch_size,
            learning_rate = learning_rate,
            weight_decay = weight_decay,
            save_steps = 10_000,
            save_total_limit = 2,
            warmup_steps = int(warmup_ratio * total_steps),
            report_to=["wandb"],  # Explicitly enable wandb reporting
            logging_strategy="steps",
            logging_steps=10  # Log every 10 steps
            )

        # Define the trainer
        trainer = Trainer(
            model = model,
            args=training_args,
            train_dataset = train_dataset,
            compute_metrics = compute_metrics,
            optimizers=(optimizer, None),
        )

        # Train the model
        trainer.train()

        # Evaluate in-domain performance
        print(f"Evaluating in-domain performance for n={n}...")
        eval_results = trainer.evaluate(eval_dataset=in_domain_data["validation"])

        # Store the in-domain accuracy
        in_domain_accuracy = eval_results["eval_accuracy"]

        # Print the in-domain evaluation results
        for key, value in eval_results.items():
            print(f"In-domain {key}: {value}")

        # Evaluate out-of-domain performance
        print(f"Evaluating out-of-domain performance for n={n}...")
        eval_results = trainer.evaluate(eval_dataset=eval_ood_data)

        # Store the out-of-domain accuracy
        out_of_domain_accuracy = eval_results["eval_accuracy"]

        # Print the out-of-domain evaluation results
        for key, value in eval_results.items():
            print(f"Out-of-domain {key}: {value}")

        # Add the results to the DataFrame
        new_row = pd.DataFrame({
            "n": [n],
            "run": [run_idx],
            "in_domain_accuracy": [in_domain_accuracy],
            "out_of_domain_accuracy": [out_of_domain_accuracy]
        })
        results_df = pd.concat([results_df, new_row], ignore_index=True)



[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

 ··········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md:   0%|          | 0.00/35.3k [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/251k [00:00<?, ?B/s]

validation-00000-of-00001.parquet:   0%|          | 0.00/37.6k [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/37.7k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/8551 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/1043 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1063 [00:00<?, ? examples/s]

tokenizer_config.json:   0%|          | 0.00/685 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/651 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/441 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/251M [00:00<?, ?B/s]

Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/8551 [00:00<?, ? examples/s]

Map:   0%|          | 0/1043 [00:00<?, ? examples/s]

Map:   0%|          | 0/1063 [00:00<?, ? examples/s]

Generating validation split: 0 examples [00:00, ? examples/s]

Map:   0%|          | 0/516 [00:00<?, ? examples/s]

Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
[34m[1mwandb[0m: Currently logged in as: [33mbofeng19971997[0m ([33mcs7643-group-project[0m). Use [1m`wandb login --relogin`[0m to force relogin


Step,Training Loss
10,0.1867
20,0.0095
30,0.0014
40,0.0008


Evaluating in-domain performance for n=2...


In-domain eval_loss: 0.783519446849823
In-domain eval_accuracy: 0.5838926174496645
In-domain eval_runtime: 8.5482
In-domain eval_samples_per_second: 122.014
In-domain eval_steps_per_second: 15.325
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=2...
Out-of-domain eval_loss: 0.7696278095245361
Out-of-domain eval_accuracy: 0.6162790697674418
Out-of-domain eval_runtime: 3.1092
Out-of-domain eval_samples_per_second: 165.96
Out-of-domain eval_steps_per_second: 20.906
Out-of-domain epoch: 40.0


  results_df = pd.concat([results_df, new_row], ignore_index=True)
Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


0,1
eval/accuracy,▁█
eval/loss,█▁
eval/runtime,█▁
eval/samples_per_second,▁█
eval/steps_per_second,▁█
train/epoch,▁▃▆████
train/global_step,▁▃▆████
train/grad_norm,█▁▁▁
train/learning_rate,█▆▃▁
train/loss,█▁▁▁

0,1
eval/accuracy,0.61628
eval/loss,0.76963
eval/runtime,3.1092
eval/samples_per_second,165.96
eval/steps_per_second,20.906
total_flos,10451870023680.0
train/epoch,40.0
train/global_step,40.0
train/grad_norm,0.0553
train/learning_rate,0.0


Step,Training Loss
10,0.2703
20,0.0157
30,0.0014
40,0.0007


Evaluating in-domain performance for n=2...


In-domain eval_loss: 0.8257512450218201
In-domain eval_accuracy: 0.5225311601150527
In-domain eval_runtime: 6.1877
In-domain eval_samples_per_second: 168.559
In-domain eval_steps_per_second: 21.171
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=2...
Out-of-domain eval_loss: 0.8057411909103394
Out-of-domain eval_accuracy: 0.5465116279069767
Out-of-domain eval_runtime: 3.2326
Out-of-domain eval_samples_per_second: 159.623
Out-of-domain eval_steps_per_second: 20.108
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


0,1
eval/accuracy,▁█
eval/loss,█▁
eval/runtime,█▁
eval/samples_per_second,█▁
eval/steps_per_second,█▁
train/epoch,▁▃▆████
train/global_step,▁▃▆████
train/grad_norm,█▁▁▁
train/learning_rate,█▆▃▁
train/loss,█▁▁▁

0,1
eval/accuracy,0.54651
eval/loss,0.80574
eval/runtime,3.2326
eval/samples_per_second,159.623
eval/steps_per_second,20.108
total_flos,10451870023680.0
train/epoch,40.0
train/global_step,40.0
train/grad_norm,0.05206
train/learning_rate,0.0


Step,Training Loss
10,0.2703
20,0.0157
30,0.0014
40,0.0007


Evaluating in-domain performance for n=2...


In-domain eval_loss: 0.8257512450218201
In-domain eval_accuracy: 0.5225311601150527
In-domain eval_runtime: 6.2332
In-domain eval_samples_per_second: 167.33
In-domain eval_steps_per_second: 21.017
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=2...
Out-of-domain eval_loss: 0.8057411909103394
Out-of-domain eval_accuracy: 0.5465116279069767
Out-of-domain eval_runtime: 3.1813
Out-of-domain eval_samples_per_second: 162.2
Out-of-domain eval_steps_per_second: 20.432
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


0,1
eval/accuracy,▁█
eval/loss,█▁
eval/runtime,█▁
eval/samples_per_second,█▁
eval/steps_per_second,█▁
train/epoch,▁▃▆████
train/global_step,▁▃▆████
train/grad_norm,█▁▁▁
train/learning_rate,█▆▃▁
train/loss,█▁▁▁

0,1
eval/accuracy,0.54651
eval/loss,0.80574
eval/runtime,3.1813
eval/samples_per_second,162.2
eval/steps_per_second,20.432
total_flos,10451870023680.0
train/epoch,40.0
train/global_step,40.0
train/grad_norm,0.05206
train/learning_rate,0.0


Step,Training Loss
10,0.2703
20,0.0157
30,0.0014
40,0.0007


Evaluating in-domain performance for n=2...


In-domain eval_loss: 0.8257512450218201
In-domain eval_accuracy: 0.5225311601150527
In-domain eval_runtime: 6.4029
In-domain eval_samples_per_second: 162.894
In-domain eval_steps_per_second: 20.459
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=2...
Out-of-domain eval_loss: 0.8057411909103394
Out-of-domain eval_accuracy: 0.5465116279069767
Out-of-domain eval_runtime: 3.5432
Out-of-domain eval_samples_per_second: 145.631
Out-of-domain eval_steps_per_second: 18.345
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


0,1
eval/accuracy,▁█
eval/loss,█▁
eval/runtime,█▁
eval/samples_per_second,█▁
eval/steps_per_second,█▁
train/epoch,▁▃▆████
train/global_step,▁▃▆████
train/grad_norm,█▁▁▁
train/learning_rate,█▆▃▁
train/loss,█▁▁▁

0,1
eval/accuracy,0.54651
eval/loss,0.80574
eval/runtime,3.5432
eval/samples_per_second,145.631
eval/steps_per_second,18.345
total_flos,10451870023680.0
train/epoch,40.0
train/global_step,40.0
train/grad_norm,0.05206
train/learning_rate,0.0


Step,Training Loss
10,0.2703
20,0.0157
30,0.0014
40,0.0007


Evaluating in-domain performance for n=2...


In-domain eval_loss: 0.8257512450218201
In-domain eval_accuracy: 0.5225311601150527
In-domain eval_runtime: 6.386
In-domain eval_samples_per_second: 163.326
In-domain eval_steps_per_second: 20.514
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=2...
Out-of-domain eval_loss: 0.8057411909103394
Out-of-domain eval_accuracy: 0.5465116279069767
Out-of-domain eval_runtime: 4.6411
Out-of-domain eval_samples_per_second: 111.18
Out-of-domain eval_steps_per_second: 14.005
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


0,1
eval/accuracy,▁█
eval/loss,█▁
eval/runtime,█▁
eval/samples_per_second,█▁
eval/steps_per_second,█▁
train/epoch,▁▃▆████
train/global_step,▁▃▆████
train/grad_norm,█▁▁▁
train/learning_rate,█▆▃▁
train/loss,█▁▁▁

0,1
eval/accuracy,0.54651
eval/loss,0.80574
eval/runtime,4.6411
eval/samples_per_second,111.18
eval/steps_per_second,14.005
total_flos,10451870023680.0
train/epoch,40.0
train/global_step,40.0
train/grad_norm,0.05206
train/learning_rate,0.0


Step,Training Loss
10,0.2703
20,0.0157
30,0.0014
40,0.0007


Evaluating in-domain performance for n=2...


In-domain eval_loss: 0.8257512450218201
In-domain eval_accuracy: 0.5225311601150527
In-domain eval_runtime: 6.3396
In-domain eval_samples_per_second: 164.52
In-domain eval_steps_per_second: 20.664
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=2...
Out-of-domain eval_loss: 0.8057411909103394
Out-of-domain eval_accuracy: 0.5465116279069767
Out-of-domain eval_runtime: 3.2487
Out-of-domain eval_samples_per_second: 158.833
Out-of-domain eval_steps_per_second: 20.008
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


0,1
eval/accuracy,▁█
eval/loss,█▁
eval/runtime,█▁
eval/samples_per_second,█▁
eval/steps_per_second,█▁
train/epoch,▁▃▆████
train/global_step,▁▃▆████
train/grad_norm,█▁▁▁
train/learning_rate,█▆▃▁
train/loss,█▁▁▁

0,1
eval/accuracy,0.54651
eval/loss,0.80574
eval/runtime,3.2487
eval/samples_per_second,158.833
eval/steps_per_second,20.008
total_flos,10451870023680.0
train/epoch,40.0
train/global_step,40.0
train/grad_norm,0.05206
train/learning_rate,0.0


Step,Training Loss
10,0.2703
20,0.0157
30,0.0014
40,0.0007


Evaluating in-domain performance for n=2...


In-domain eval_loss: 0.8257512450218201
In-domain eval_accuracy: 0.5225311601150527
In-domain eval_runtime: 6.3667
In-domain eval_samples_per_second: 163.822
In-domain eval_steps_per_second: 20.576
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=2...
Out-of-domain eval_loss: 0.8057411909103394
Out-of-domain eval_accuracy: 0.5465116279069767
Out-of-domain eval_runtime: 3.2223
Out-of-domain eval_samples_per_second: 160.133
Out-of-domain eval_steps_per_second: 20.172
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


0,1
eval/accuracy,▁█
eval/loss,█▁
eval/runtime,█▁
eval/samples_per_second,█▁
eval/steps_per_second,█▁
train/epoch,▁▃▆████
train/global_step,▁▃▆████
train/grad_norm,█▁▁▁
train/learning_rate,█▆▃▁
train/loss,█▁▁▁

0,1
eval/accuracy,0.54651
eval/loss,0.80574
eval/runtime,3.2223
eval/samples_per_second,160.133
eval/steps_per_second,20.172
total_flos,10451870023680.0
train/epoch,40.0
train/global_step,40.0
train/grad_norm,0.05206
train/learning_rate,0.0


Step,Training Loss
10,0.2703
20,0.0157
30,0.0014
40,0.0007


Evaluating in-domain performance for n=2...


In-domain eval_loss: 0.8257512450218201
In-domain eval_accuracy: 0.5225311601150527
In-domain eval_runtime: 6.403
In-domain eval_samples_per_second: 162.894
In-domain eval_steps_per_second: 20.459
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=2...
Out-of-domain eval_loss: 0.8057411909103394
Out-of-domain eval_accuracy: 0.5465116279069767
Out-of-domain eval_runtime: 3.2734
Out-of-domain eval_samples_per_second: 157.636
Out-of-domain eval_steps_per_second: 19.857
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


0,1
eval/accuracy,▁█
eval/loss,█▁
eval/runtime,█▁
eval/samples_per_second,█▁
eval/steps_per_second,█▁
train/epoch,▁▃▆████
train/global_step,▁▃▆████
train/grad_norm,█▁▁▁
train/learning_rate,█▆▃▁
train/loss,█▁▁▁

0,1
eval/accuracy,0.54651
eval/loss,0.80574
eval/runtime,3.2734
eval/samples_per_second,157.636
eval/steps_per_second,19.857
total_flos,10451870023680.0
train/epoch,40.0
train/global_step,40.0
train/grad_norm,0.05206
train/learning_rate,0.0


Step,Training Loss
10,0.2703
20,0.0157
30,0.0014
40,0.0007


Evaluating in-domain performance for n=2...


In-domain eval_loss: 0.8257512450218201
In-domain eval_accuracy: 0.5225311601150527
In-domain eval_runtime: 6.4198
In-domain eval_samples_per_second: 162.467
In-domain eval_steps_per_second: 20.406
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=2...
Out-of-domain eval_loss: 0.8057411909103394
Out-of-domain eval_accuracy: 0.5465116279069767
Out-of-domain eval_runtime: 3.2412
Out-of-domain eval_samples_per_second: 159.199
Out-of-domain eval_steps_per_second: 20.054
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


0,1
eval/accuracy,▁█
eval/loss,█▁
eval/runtime,█▁
eval/samples_per_second,█▁
eval/steps_per_second,█▁
train/epoch,▁▃▆████
train/global_step,▁▃▆████
train/grad_norm,█▁▁▁
train/learning_rate,█▆▃▁
train/loss,█▁▁▁

0,1
eval/accuracy,0.54651
eval/loss,0.80574
eval/runtime,3.2412
eval/samples_per_second,159.199
eval/steps_per_second,20.054
total_flos,10451870023680.0
train/epoch,40.0
train/global_step,40.0
train/grad_norm,0.05206
train/learning_rate,0.0


Step,Training Loss
10,0.2703
20,0.0157
30,0.0014
40,0.0007


Evaluating in-domain performance for n=2...


In-domain eval_loss: 0.8257512450218201
In-domain eval_accuracy: 0.5225311601150527
In-domain eval_runtime: 8.0814
In-domain eval_samples_per_second: 129.062
In-domain eval_steps_per_second: 16.21
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=2...
Out-of-domain eval_loss: 0.8057411909103394
Out-of-domain eval_accuracy: 0.5465116279069767
Out-of-domain eval_runtime: 3.4216
Out-of-domain eval_samples_per_second: 150.808
Out-of-domain eval_steps_per_second: 18.997
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


0,1
eval/accuracy,▁█
eval/loss,█▁
eval/runtime,█▁
eval/samples_per_second,▁█
eval/steps_per_second,▁█
train/epoch,▁▃▆████
train/global_step,▁▃▆████
train/grad_norm,█▁▁▁
train/learning_rate,█▆▃▁
train/loss,█▁▁▁

0,1
eval/accuracy,0.54651
eval/loss,0.80574
eval/runtime,3.4216
eval/samples_per_second,150.808
eval/steps_per_second,18.997
total_flos,10451870023680.0
train/epoch,40.0
train/global_step,40.0
train/grad_norm,0.05206
train/learning_rate,0.0


Step,Training Loss
10,0.5385
20,0.1595
30,0.0334
40,0.0112


Evaluating in-domain performance for n=16...


In-domain eval_loss: 0.865591824054718
In-domain eval_accuracy: 0.5263662511984659
In-domain eval_runtime: 6.4038
In-domain eval_samples_per_second: 162.873
In-domain eval_steps_per_second: 20.457
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=16...
Out-of-domain eval_loss: 0.8560364246368408
Out-of-domain eval_accuracy: 0.5310077519379846
Out-of-domain eval_runtime: 3.2227
Out-of-domain eval_samples_per_second: 160.114
Out-of-domain eval_steps_per_second: 20.169
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


0,1
eval/accuracy,▁█
eval/loss,█▁
eval/runtime,█▁
eval/samples_per_second,█▁
eval/steps_per_second,█▁
train/epoch,▁▃▆████
train/global_step,▁▃▆████
train/grad_norm,█▃▁▁
train/learning_rate,█▆▃▁
train/loss,█▃▁▁

0,1
eval/accuracy,0.53101
eval/loss,0.85604
eval/runtime,3.2227
eval/samples_per_second,160.114
eval/steps_per_second,20.169
total_flos,83614960189440.0
train/epoch,40.0
train/global_step,40.0
train/grad_norm,0.4063
train/learning_rate,0.0


Step,Training Loss
10,0.5385
20,0.1595
30,0.0334
40,0.0112


Evaluating in-domain performance for n=16...


In-domain eval_loss: 0.865591824054718
In-domain eval_accuracy: 0.5263662511984659
In-domain eval_runtime: 6.5508
In-domain eval_samples_per_second: 159.217
In-domain eval_steps_per_second: 19.998
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=16...
Out-of-domain eval_loss: 0.8560364246368408
Out-of-domain eval_accuracy: 0.5310077519379846
Out-of-domain eval_runtime: 3.2498
Out-of-domain eval_samples_per_second: 158.779
Out-of-domain eval_steps_per_second: 20.001
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


0,1
eval/accuracy,▁█
eval/loss,█▁
eval/runtime,█▁
eval/samples_per_second,█▁
eval/steps_per_second,▁█
train/epoch,▁▃▆████
train/global_step,▁▃▆████
train/grad_norm,█▃▁▁
train/learning_rate,█▆▃▁
train/loss,█▃▁▁

0,1
eval/accuracy,0.53101
eval/loss,0.85604
eval/runtime,3.2498
eval/samples_per_second,158.779
eval/steps_per_second,20.001
total_flos,83614960189440.0
train/epoch,40.0
train/global_step,40.0
train/grad_norm,0.4063
train/learning_rate,0.0


Step,Training Loss
10,0.5385
20,0.1595
30,0.0334
40,0.0112


Evaluating in-domain performance for n=16...


In-domain eval_loss: 0.865591824054718
In-domain eval_accuracy: 0.5263662511984659
In-domain eval_runtime: 6.4197
In-domain eval_samples_per_second: 162.468
In-domain eval_steps_per_second: 20.406
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=16...
Out-of-domain eval_loss: 0.8560364246368408
Out-of-domain eval_accuracy: 0.5310077519379846
Out-of-domain eval_runtime: 3.281
Out-of-domain eval_samples_per_second: 157.268
Out-of-domain eval_steps_per_second: 19.811
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


0,1
eval/accuracy,▁█
eval/loss,█▁
eval/runtime,█▁
eval/samples_per_second,█▁
eval/steps_per_second,█▁
train/epoch,▁▃▆████
train/global_step,▁▃▆████
train/grad_norm,█▃▁▁
train/learning_rate,█▆▃▁
train/loss,█▃▁▁

0,1
eval/accuracy,0.53101
eval/loss,0.85604
eval/runtime,3.281
eval/samples_per_second,157.268
eval/steps_per_second,19.811
total_flos,83614960189440.0
train/epoch,40.0
train/global_step,40.0
train/grad_norm,0.4063
train/learning_rate,0.0


Step,Training Loss
10,0.5385
20,0.1595
30,0.0334
40,0.0112


Evaluating in-domain performance for n=16...


In-domain eval_loss: 0.865591824054718
In-domain eval_accuracy: 0.5263662511984659
In-domain eval_runtime: 6.5882
In-domain eval_samples_per_second: 158.312
In-domain eval_steps_per_second: 19.884
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=16...
Out-of-domain eval_loss: 0.8560364246368408
Out-of-domain eval_accuracy: 0.5310077519379846
Out-of-domain eval_runtime: 3.279
Out-of-domain eval_samples_per_second: 157.366
Out-of-domain eval_steps_per_second: 19.823
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


0,1
eval/accuracy,▁█
eval/loss,█▁
eval/runtime,█▁
eval/samples_per_second,█▁
eval/steps_per_second,█▁
train/epoch,▁▃▆████
train/global_step,▁▃▆████
train/grad_norm,█▃▁▁
train/learning_rate,█▆▃▁
train/loss,█▃▁▁

0,1
eval/accuracy,0.53101
eval/loss,0.85604
eval/runtime,3.279
eval/samples_per_second,157.366
eval/steps_per_second,19.823
total_flos,83614960189440.0
train/epoch,40.0
train/global_step,40.0
train/grad_norm,0.4063
train/learning_rate,0.0


Step,Training Loss
10,0.5385
20,0.1595
30,0.0334
40,0.0112


Evaluating in-domain performance for n=16...


In-domain eval_loss: 0.865591824054718
In-domain eval_accuracy: 0.5263662511984659
In-domain eval_runtime: 6.714
In-domain eval_samples_per_second: 155.347
In-domain eval_steps_per_second: 19.511
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=16...
Out-of-domain eval_loss: 0.8560364246368408
Out-of-domain eval_accuracy: 0.5310077519379846
Out-of-domain eval_runtime: 3.2953
Out-of-domain eval_samples_per_second: 156.588
Out-of-domain eval_steps_per_second: 19.725
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


0,1
eval/accuracy,▁█
eval/loss,█▁
eval/runtime,█▁
eval/samples_per_second,▁█
eval/steps_per_second,▁█
train/epoch,▁▃▆████
train/global_step,▁▃▆████
train/grad_norm,█▃▁▁
train/learning_rate,█▆▃▁
train/loss,█▃▁▁

0,1
eval/accuracy,0.53101
eval/loss,0.85604
eval/runtime,3.2953
eval/samples_per_second,156.588
eval/steps_per_second,19.725
total_flos,83614960189440.0
train/epoch,40.0
train/global_step,40.0
train/grad_norm,0.4063
train/learning_rate,0.0


Step,Training Loss
10,0.5385
20,0.1595
30,0.0334
40,0.0112


Evaluating in-domain performance for n=16...


In-domain eval_loss: 0.865591824054718
In-domain eval_accuracy: 0.5263662511984659
In-domain eval_runtime: 6.6082
In-domain eval_samples_per_second: 157.835
In-domain eval_steps_per_second: 19.824
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=16...
Out-of-domain eval_loss: 0.8560364246368408
Out-of-domain eval_accuracy: 0.5310077519379846
Out-of-domain eval_runtime: 3.3696
Out-of-domain eval_samples_per_second: 153.135
Out-of-domain eval_steps_per_second: 19.29
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


0,1
eval/accuracy,▁█
eval/loss,█▁
eval/runtime,█▁
eval/samples_per_second,█▁
eval/steps_per_second,█▁
train/epoch,▁▃▆████
train/global_step,▁▃▆████
train/grad_norm,█▃▁▁
train/learning_rate,█▆▃▁
train/loss,█▃▁▁

0,1
eval/accuracy,0.53101
eval/loss,0.85604
eval/runtime,3.3696
eval/samples_per_second,153.135
eval/steps_per_second,19.29
total_flos,83614960189440.0
train/epoch,40.0
train/global_step,40.0
train/grad_norm,0.4063
train/learning_rate,0.0


Step,Training Loss
10,0.5385
20,0.1595
30,0.0334
40,0.0112


Evaluating in-domain performance for n=16...


In-domain eval_loss: 0.865591824054718
In-domain eval_accuracy: 0.5263662511984659
In-domain eval_runtime: 6.4398
In-domain eval_samples_per_second: 161.961
In-domain eval_steps_per_second: 20.342
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=16...
Out-of-domain eval_loss: 0.8560364246368408
Out-of-domain eval_accuracy: 0.5310077519379846
Out-of-domain eval_runtime: 3.3125
Out-of-domain eval_samples_per_second: 155.773
Out-of-domain eval_steps_per_second: 19.623
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


0,1
eval/accuracy,▁█
eval/loss,█▁
eval/runtime,█▁
eval/samples_per_second,█▁
eval/steps_per_second,█▁
train/epoch,▁▃▆████
train/global_step,▁▃▆████
train/grad_norm,█▃▁▁
train/learning_rate,█▆▃▁
train/loss,█▃▁▁

0,1
eval/accuracy,0.53101
eval/loss,0.85604
eval/runtime,3.3125
eval/samples_per_second,155.773
eval/steps_per_second,19.623
total_flos,83614960189440.0
train/epoch,40.0
train/global_step,40.0
train/grad_norm,0.4063
train/learning_rate,0.0


Step,Training Loss
10,0.5385
20,0.1595
30,0.0334
40,0.0112


Evaluating in-domain performance for n=16...


In-domain eval_loss: 0.865591824054718
In-domain eval_accuracy: 0.5263662511984659
In-domain eval_runtime: 6.4938
In-domain eval_samples_per_second: 160.615
In-domain eval_steps_per_second: 20.173
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=16...
Out-of-domain eval_loss: 0.8560364246368408
Out-of-domain eval_accuracy: 0.5310077519379846
Out-of-domain eval_runtime: 3.284
Out-of-domain eval_samples_per_second: 157.127
Out-of-domain eval_steps_per_second: 19.793
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


0,1
eval/accuracy,▁█
eval/loss,█▁
eval/runtime,█▁
eval/samples_per_second,█▁
eval/steps_per_second,█▁
train/epoch,▁▃▆████
train/global_step,▁▃▆████
train/grad_norm,█▃▁▁
train/learning_rate,█▆▃▁
train/loss,█▃▁▁

0,1
eval/accuracy,0.53101
eval/loss,0.85604
eval/runtime,3.284
eval/samples_per_second,157.127
eval/steps_per_second,19.793
total_flos,83614960189440.0
train/epoch,40.0
train/global_step,40.0
train/grad_norm,0.4063
train/learning_rate,0.0


Step,Training Loss
10,0.5385
20,0.1595
30,0.0334
40,0.0112


Evaluating in-domain performance for n=16...


In-domain eval_loss: 0.865591824054718
In-domain eval_accuracy: 0.5263662511984659
In-domain eval_runtime: 6.504
In-domain eval_samples_per_second: 160.364
In-domain eval_steps_per_second: 20.142
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=16...
Out-of-domain eval_loss: 0.8560364246368408
Out-of-domain eval_accuracy: 0.5310077519379846
Out-of-domain eval_runtime: 3.3077
Out-of-domain eval_samples_per_second: 156.001
Out-of-domain eval_steps_per_second: 19.651
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


0,1
eval/accuracy,▁█
eval/loss,█▁
eval/runtime,█▁
eval/samples_per_second,█▁
eval/steps_per_second,█▁
train/epoch,▁▃▆████
train/global_step,▁▃▆████
train/grad_norm,█▃▁▁
train/learning_rate,█▆▃▁
train/loss,█▃▁▁

0,1
eval/accuracy,0.53101
eval/loss,0.85604
eval/runtime,3.3077
eval/samples_per_second,156.001
eval/steps_per_second,19.651
total_flos,83614960189440.0
train/epoch,40.0
train/global_step,40.0
train/grad_norm,0.4063
train/learning_rate,0.0


Step,Training Loss
10,0.5385
20,0.1595
30,0.0334
40,0.0112


Evaluating in-domain performance for n=16...


In-domain eval_loss: 0.865591824054718
In-domain eval_accuracy: 0.5263662511984659
In-domain eval_runtime: 6.701
In-domain eval_samples_per_second: 155.649
In-domain eval_steps_per_second: 19.549
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=16...
Out-of-domain eval_loss: 0.8560364246368408
Out-of-domain eval_accuracy: 0.5310077519379846
Out-of-domain eval_runtime: 5.0126
Out-of-domain eval_samples_per_second: 102.94
Out-of-domain eval_steps_per_second: 12.967
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


0,1
eval/accuracy,▁█
eval/loss,█▁
eval/runtime,█▁
eval/samples_per_second,█▁
eval/steps_per_second,█▁
train/epoch,▁▃▆████
train/global_step,▁▃▆████
train/grad_norm,█▃▁▁
train/learning_rate,█▆▃▁
train/loss,█▃▁▁

0,1
eval/accuracy,0.53101
eval/loss,0.85604
eval/runtime,5.0126
eval/samples_per_second,102.94
eval/steps_per_second,12.967
total_flos,83614960189440.0
train/epoch,40.0
train/global_step,40.0
train/grad_norm,0.4063
train/learning_rate,0.0


Step,Training Loss
10,0.6449
20,0.3607
30,0.1351
40,0.0366
50,0.0093
60,0.0038
70,0.0025
80,0.0021


Evaluating in-domain performance for n=32...


In-domain eval_loss: 1.0919173955917358
In-domain eval_accuracy: 0.5723873441994247
In-domain eval_runtime: 6.6533
In-domain eval_samples_per_second: 156.765
In-domain eval_steps_per_second: 19.69
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=32...
Out-of-domain eval_loss: 1.0533303022384644
Out-of-domain eval_accuracy: 0.5833333333333334
Out-of-domain eval_runtime: 3.3297
Out-of-domain eval_samples_per_second: 154.97
Out-of-domain eval_steps_per_second: 19.521
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


0,1
eval/accuracy,▁█
eval/loss,█▁
eval/runtime,█▁
eval/samples_per_second,█▁
eval/steps_per_second,█▁
train/epoch,▁▂▃▄▅▆▇████
train/global_step,▁▂▃▄▅▆▇████
train/grad_norm,█▅▄▁▁▁▁▁
train/learning_rate,█▇▆▅▄▃▂▁
train/loss,█▅▂▁▁▁▁▁

0,1
eval/accuracy,0.58333
eval/loss,1.05333
eval/runtime,3.3297
eval/samples_per_second,154.97
eval/steps_per_second,19.521
total_flos,167229920378880.0
train/epoch,40.0
train/global_step,80.0
train/grad_norm,0.11019
train/learning_rate,0.0


Step,Training Loss
10,0.6721
20,0.4107
30,0.1894
40,0.0676
50,0.02
60,0.0073
70,0.0038
80,0.003


Evaluating in-domain performance for n=32...


In-domain eval_loss: 0.9720333814620972
In-domain eval_accuracy: 0.5627996164908916
In-domain eval_runtime: 6.6564
In-domain eval_samples_per_second: 156.691
In-domain eval_steps_per_second: 19.68
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=32...
Out-of-domain eval_loss: 0.9188656806945801
Out-of-domain eval_accuracy: 0.5465116279069767
Out-of-domain eval_runtime: 3.324
Out-of-domain eval_samples_per_second: 155.234
Out-of-domain eval_steps_per_second: 19.555
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


0,1
eval/accuracy,█▁
eval/loss,█▁
eval/runtime,█▁
eval/samples_per_second,█▁
eval/steps_per_second,█▁
train/epoch,▁▂▃▄▅▆▇████
train/global_step,▁▂▃▄▅▆▇████
train/grad_norm,█▃▄▂▁▁▁▁
train/learning_rate,█▇▆▅▄▃▂▁
train/loss,█▅▃▂▁▁▁▁

0,1
eval/accuracy,0.54651
eval/loss,0.91887
eval/runtime,3.324
eval/samples_per_second,155.234
eval/steps_per_second,19.555
total_flos,167229920378880.0
train/epoch,40.0
train/global_step,80.0
train/grad_norm,0.2078
train/learning_rate,0.0


Step,Training Loss
10,0.6721
20,0.4107
30,0.1894
40,0.0676
50,0.02
60,0.0073
70,0.0038
80,0.003


Evaluating in-domain performance for n=32...


In-domain eval_loss: 0.9720333814620972
In-domain eval_accuracy: 0.5627996164908916
In-domain eval_runtime: 6.6307
In-domain eval_samples_per_second: 157.299
In-domain eval_steps_per_second: 19.757
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=32...
Out-of-domain eval_loss: 0.9188656806945801
Out-of-domain eval_accuracy: 0.5465116279069767
Out-of-domain eval_runtime: 3.4002
Out-of-domain eval_samples_per_second: 151.754
Out-of-domain eval_steps_per_second: 19.116
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


0,1
eval/accuracy,█▁
eval/loss,█▁
eval/runtime,█▁
eval/samples_per_second,█▁
eval/steps_per_second,█▁
train/epoch,▁▂▃▄▅▆▇████
train/global_step,▁▂▃▄▅▆▇████
train/grad_norm,█▃▄▂▁▁▁▁
train/learning_rate,█▇▆▅▄▃▂▁
train/loss,█▅▃▂▁▁▁▁

0,1
eval/accuracy,0.54651
eval/loss,0.91887
eval/runtime,3.4002
eval/samples_per_second,151.754
eval/steps_per_second,19.116
total_flos,167229920378880.0
train/epoch,40.0
train/global_step,80.0
train/grad_norm,0.2078
train/learning_rate,0.0


Step,Training Loss
10,0.6721
20,0.4107
30,0.1894
40,0.0676
50,0.02
60,0.0073
70,0.0038
80,0.003


Evaluating in-domain performance for n=32...


In-domain eval_loss: 0.9720333814620972
In-domain eval_accuracy: 0.5627996164908916
In-domain eval_runtime: 6.6
In-domain eval_samples_per_second: 158.031
In-domain eval_steps_per_second: 19.849
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=32...
Out-of-domain eval_loss: 0.9188656806945801
Out-of-domain eval_accuracy: 0.5465116279069767
Out-of-domain eval_runtime: 3.3575
Out-of-domain eval_samples_per_second: 153.685
Out-of-domain eval_steps_per_second: 19.36
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


0,1
eval/accuracy,█▁
eval/loss,█▁
eval/runtime,█▁
eval/samples_per_second,█▁
eval/steps_per_second,█▁
train/epoch,▁▂▃▄▅▆▇████
train/global_step,▁▂▃▄▅▆▇████
train/grad_norm,█▃▄▂▁▁▁▁
train/learning_rate,█▇▆▅▄▃▂▁
train/loss,█▅▃▂▁▁▁▁

0,1
eval/accuracy,0.54651
eval/loss,0.91887
eval/runtime,3.3575
eval/samples_per_second,153.685
eval/steps_per_second,19.36
total_flos,167229920378880.0
train/epoch,40.0
train/global_step,80.0
train/grad_norm,0.2078
train/learning_rate,0.0


Step,Training Loss
10,0.6721
20,0.4107
30,0.1894
40,0.0676
50,0.02
60,0.0073
70,0.0038
80,0.003


Evaluating in-domain performance for n=32...


In-domain eval_loss: 0.9720333814620972
In-domain eval_accuracy: 0.5627996164908916
In-domain eval_runtime: 6.604
In-domain eval_samples_per_second: 157.935
In-domain eval_steps_per_second: 19.837
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=32...
Out-of-domain eval_loss: 0.9188656806945801
Out-of-domain eval_accuracy: 0.5465116279069767
Out-of-domain eval_runtime: 3.37
Out-of-domain eval_samples_per_second: 153.116
Out-of-domain eval_steps_per_second: 19.288
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


0,1
eval/accuracy,█▁
eval/loss,█▁
eval/runtime,█▁
eval/samples_per_second,█▁
eval/steps_per_second,█▁
train/epoch,▁▂▃▄▅▆▇████
train/global_step,▁▂▃▄▅▆▇████
train/grad_norm,█▃▄▂▁▁▁▁
train/learning_rate,█▇▆▅▄▃▂▁
train/loss,█▅▃▂▁▁▁▁

0,1
eval/accuracy,0.54651
eval/loss,0.91887
eval/runtime,3.37
eval/samples_per_second,153.116
eval/steps_per_second,19.288
total_flos,167229920378880.0
train/epoch,40.0
train/global_step,80.0
train/grad_norm,0.2078
train/learning_rate,0.0


Step,Training Loss
10,0.6721
20,0.4107
30,0.1894
40,0.0676
50,0.02
60,0.0073
70,0.0038
80,0.003


Evaluating in-domain performance for n=32...


In-domain eval_loss: 0.9720333814620972
In-domain eval_accuracy: 0.5627996164908916
In-domain eval_runtime: 6.5754
In-domain eval_samples_per_second: 158.622
In-domain eval_steps_per_second: 19.923
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=32...
Out-of-domain eval_loss: 0.9188656806945801
Out-of-domain eval_accuracy: 0.5465116279069767
Out-of-domain eval_runtime: 3.3059
Out-of-domain eval_samples_per_second: 156.084
Out-of-domain eval_steps_per_second: 19.662
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


0,1
eval/accuracy,█▁
eval/loss,█▁
eval/runtime,█▁
eval/samples_per_second,█▁
eval/steps_per_second,█▁
train/epoch,▁▂▃▄▅▆▇████
train/global_step,▁▂▃▄▅▆▇████
train/grad_norm,█▃▄▂▁▁▁▁
train/learning_rate,█▇▆▅▄▃▂▁
train/loss,█▅▃▂▁▁▁▁

0,1
eval/accuracy,0.54651
eval/loss,0.91887
eval/runtime,3.3059
eval/samples_per_second,156.084
eval/steps_per_second,19.662
total_flos,167229920378880.0
train/epoch,40.0
train/global_step,80.0
train/grad_norm,0.2078
train/learning_rate,0.0


Step,Training Loss
10,0.6721
20,0.4107
30,0.1894
40,0.0676
50,0.02
60,0.0073
70,0.0038
80,0.003


Evaluating in-domain performance for n=32...


In-domain eval_loss: 0.9720333814620972
In-domain eval_accuracy: 0.5627996164908916
In-domain eval_runtime: 6.8302
In-domain eval_samples_per_second: 152.704
In-domain eval_steps_per_second: 19.18
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=32...
Out-of-domain eval_loss: 0.9188656806945801
Out-of-domain eval_accuracy: 0.5465116279069767
Out-of-domain eval_runtime: 4.175
Out-of-domain eval_samples_per_second: 123.593
Out-of-domain eval_steps_per_second: 15.569
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


0,1
eval/accuracy,█▁
eval/loss,█▁
eval/runtime,█▁
eval/samples_per_second,█▁
eval/steps_per_second,█▁
train/epoch,▁▂▃▄▅▆▇████
train/global_step,▁▂▃▄▅▆▇████
train/grad_norm,█▃▄▂▁▁▁▁
train/learning_rate,█▇▆▅▄▃▂▁
train/loss,█▅▃▂▁▁▁▁

0,1
eval/accuracy,0.54651
eval/loss,0.91887
eval/runtime,4.175
eval/samples_per_second,123.593
eval/steps_per_second,15.569
total_flos,167229920378880.0
train/epoch,40.0
train/global_step,80.0
train/grad_norm,0.2078
train/learning_rate,0.0


Step,Training Loss
10,0.6721
20,0.4107
30,0.1894
40,0.0676
50,0.02
60,0.0073
70,0.0038
80,0.003


Evaluating in-domain performance for n=32...


In-domain eval_loss: 0.9720333814620972
In-domain eval_accuracy: 0.5627996164908916
In-domain eval_runtime: 6.637
In-domain eval_samples_per_second: 157.15
In-domain eval_steps_per_second: 19.738
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=32...
Out-of-domain eval_loss: 0.9188656806945801
Out-of-domain eval_accuracy: 0.5465116279069767
Out-of-domain eval_runtime: 3.3295
Out-of-domain eval_samples_per_second: 154.976
Out-of-domain eval_steps_per_second: 19.522
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


0,1
eval/accuracy,█▁
eval/loss,█▁
eval/runtime,█▁
eval/samples_per_second,█▁
eval/steps_per_second,█▁
train/epoch,▁▂▃▄▅▆▇████
train/global_step,▁▂▃▄▅▆▇████
train/grad_norm,█▃▄▂▁▁▁▁
train/learning_rate,█▇▆▅▄▃▂▁
train/loss,█▅▃▂▁▁▁▁

0,1
eval/accuracy,0.54651
eval/loss,0.91887
eval/runtime,3.3295
eval/samples_per_second,154.976
eval/steps_per_second,19.522
total_flos,167229920378880.0
train/epoch,40.0
train/global_step,80.0
train/grad_norm,0.2078
train/learning_rate,0.0


Step,Training Loss
10,0.6721
20,0.4107
30,0.1894
40,0.0676
50,0.02
60,0.0073
70,0.0038
80,0.003


Evaluating in-domain performance for n=32...


In-domain eval_loss: 0.9720333814620972
In-domain eval_accuracy: 0.5627996164908916
In-domain eval_runtime: 6.69
In-domain eval_samples_per_second: 155.905
In-domain eval_steps_per_second: 19.582
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=32...
Out-of-domain eval_loss: 0.9188656806945801
Out-of-domain eval_accuracy: 0.5465116279069767
Out-of-domain eval_runtime: 3.3409
Out-of-domain eval_samples_per_second: 154.449
Out-of-domain eval_steps_per_second: 19.456
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


0,1
eval/accuracy,█▁
eval/loss,█▁
eval/runtime,█▁
eval/samples_per_second,█▁
eval/steps_per_second,█▁
train/epoch,▁▂▃▄▅▆▇████
train/global_step,▁▂▃▄▅▆▇████
train/grad_norm,█▃▄▂▁▁▁▁
train/learning_rate,█▇▆▅▄▃▂▁
train/loss,█▅▃▂▁▁▁▁

0,1
eval/accuracy,0.54651
eval/loss,0.91887
eval/runtime,3.3409
eval/samples_per_second,154.449
eval/steps_per_second,19.456
total_flos,167229920378880.0
train/epoch,40.0
train/global_step,80.0
train/grad_norm,0.2078
train/learning_rate,0.0


Step,Training Loss
10,0.6721
20,0.4107
30,0.1894
40,0.0676
50,0.02
60,0.0073
70,0.0038
80,0.003


Evaluating in-domain performance for n=32...


In-domain eval_loss: 0.9720333814620972
In-domain eval_accuracy: 0.5627996164908916
In-domain eval_runtime: 6.6714
In-domain eval_samples_per_second: 156.338
In-domain eval_steps_per_second: 19.636
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=32...
Out-of-domain eval_loss: 0.9188656806945801
Out-of-domain eval_accuracy: 0.5465116279069767
Out-of-domain eval_runtime: 3.3407
Out-of-domain eval_samples_per_second: 154.461
Out-of-domain eval_steps_per_second: 19.457
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


0,1
eval/accuracy,█▁
eval/loss,█▁
eval/runtime,█▁
eval/samples_per_second,█▁
eval/steps_per_second,█▁
train/epoch,▁▂▃▄▅▆▇████
train/global_step,▁▂▃▄▅▆▇████
train/grad_norm,█▃▄▂▁▁▁▁
train/learning_rate,█▇▆▅▄▃▂▁
train/loss,█▅▃▂▁▁▁▁

0,1
eval/accuracy,0.54651
eval/loss,0.91887
eval/runtime,3.3407
eval/samples_per_second,154.461
eval/steps_per_second,19.457
total_flos,167229920378880.0
train/epoch,40.0
train/global_step,80.0
train/grad_norm,0.2078
train/learning_rate,0.0


Step,Training Loss
10,0.7143
20,0.608
30,0.4237
40,0.2532
50,0.1182
60,0.0387
70,0.0109
80,0.0031
90,0.0014
100,0.0009


Evaluating in-domain performance for n=64...


In-domain eval_loss: 1.4806867837905884
In-domain eval_accuracy: 0.5953978906999041
In-domain eval_runtime: 6.8132
In-domain eval_samples_per_second: 153.086
In-domain eval_steps_per_second: 19.227
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=64...
Out-of-domain eval_loss: 1.3931804895401
Out-of-domain eval_accuracy: 0.5930232558139535
Out-of-domain eval_runtime: 3.4195
Out-of-domain eval_samples_per_second: 150.901
Out-of-domain eval_steps_per_second: 19.009
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


0,1
eval/accuracy,█▁
eval/loss,█▁
eval/runtime,█▁
eval/samples_per_second,█▁
eval/steps_per_second,█▁
train/epoch,▁▁▂▂▃▃▄▄▅▅▆▆▇▇█████
train/global_step,▁▁▂▂▃▃▄▄▅▅▆▆▇▇█████
train/grad_norm,█▆▆▅▃▂▁▁▁▁▁▁▁▁▁▁
train/learning_rate,▆██▇▇▆▆▅▅▄▄▃▃▂▂▁
train/loss,█▇▅▃▂▁▁▁▁▁▁▁▁▁▁▁

0,1
eval/accuracy,0.59302
eval/loss,1.39318
eval/runtime,3.4195
eval/samples_per_second,150.901
eval/steps_per_second,19.009
total_flos,334459840757760.0
train/epoch,40.0
train/global_step,160.0
train/grad_norm,0.04648
train/learning_rate,0.0


Step,Training Loss
10,0.7894
20,0.6498
30,0.454
40,0.2658
50,0.1205
60,0.0339
70,0.0072
80,0.002
90,0.001
100,0.0006


Evaluating in-domain performance for n=64...


In-domain eval_loss: 1.935601830482483
In-domain eval_accuracy: 0.5915627996164909
In-domain eval_runtime: 6.8318
In-domain eval_samples_per_second: 152.668
In-domain eval_steps_per_second: 19.175
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=64...
Out-of-domain eval_loss: 1.8550150394439697
Out-of-domain eval_accuracy: 0.5755813953488372
Out-of-domain eval_runtime: 3.4224
Out-of-domain eval_samples_per_second: 150.771
Out-of-domain eval_steps_per_second: 18.992
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


0,1
eval/accuracy,█▁
eval/loss,█▁
eval/runtime,█▁
eval/samples_per_second,█▁
eval/steps_per_second,█▁
train/epoch,▁▁▂▂▃▃▄▄▅▅▆▆▇▇█████
train/global_step,▁▁▂▂▃▃▄▄▅▅▆▆▇▇█████
train/grad_norm,█▇▄▅▂▁▁▁▁▁▁▁▁▁▁▁
train/learning_rate,▆██▇▇▆▆▅▅▄▄▃▃▂▂▁
train/loss,█▇▅▃▂▁▁▁▁▁▁▁▁▁▁▁

0,1
eval/accuracy,0.57558
eval/loss,1.85502
eval/runtime,3.4224
eval/samples_per_second,150.771
eval/steps_per_second,18.992
total_flos,334459840757760.0
train/epoch,40.0
train/global_step,160.0
train/grad_norm,0.02809
train/learning_rate,0.0


Step,Training Loss
10,0.7894
20,0.6498
30,0.454
40,0.2658
50,0.1205
60,0.0339
70,0.0072
80,0.002
90,0.001
100,0.0006


Evaluating in-domain performance for n=64...


In-domain eval_loss: 1.935601830482483
In-domain eval_accuracy: 0.5915627996164909
In-domain eval_runtime: 6.7199
In-domain eval_samples_per_second: 155.21
In-domain eval_steps_per_second: 19.494
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=64...
Out-of-domain eval_loss: 1.8550150394439697
Out-of-domain eval_accuracy: 0.5755813953488372
Out-of-domain eval_runtime: 3.3952
Out-of-domain eval_samples_per_second: 151.978
Out-of-domain eval_steps_per_second: 19.145
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


0,1
eval/accuracy,█▁
eval/loss,█▁
eval/runtime,█▁
eval/samples_per_second,█▁
eval/steps_per_second,█▁
train/epoch,▁▁▂▂▃▃▄▄▅▅▆▆▇▇█████
train/global_step,▁▁▂▂▃▃▄▄▅▅▆▆▇▇█████
train/grad_norm,█▇▄▅▂▁▁▁▁▁▁▁▁▁▁▁
train/learning_rate,▆██▇▇▆▆▅▅▄▄▃▃▂▂▁
train/loss,█▇▅▃▂▁▁▁▁▁▁▁▁▁▁▁

0,1
eval/accuracy,0.57558
eval/loss,1.85502
eval/runtime,3.3952
eval/samples_per_second,151.978
eval/steps_per_second,19.145
total_flos,334459840757760.0
train/epoch,40.0
train/global_step,160.0
train/grad_norm,0.02809
train/learning_rate,0.0


Step,Training Loss
10,0.7894
20,0.6498
30,0.454
40,0.2658
50,0.1205
60,0.0339
70,0.0072
80,0.002
90,0.001
100,0.0006


Evaluating in-domain performance for n=64...


In-domain eval_loss: 1.935601830482483
In-domain eval_accuracy: 0.5915627996164909
In-domain eval_runtime: 6.8595
In-domain eval_samples_per_second: 152.051
In-domain eval_steps_per_second: 19.098
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=64...
Out-of-domain eval_loss: 1.8550150394439697
Out-of-domain eval_accuracy: 0.5755813953488372
Out-of-domain eval_runtime: 3.3863
Out-of-domain eval_samples_per_second: 152.377
Out-of-domain eval_steps_per_second: 19.195
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


0,1
eval/accuracy,█▁
eval/loss,█▁
eval/runtime,█▁
eval/samples_per_second,▁█
eval/steps_per_second,▁█
train/epoch,▁▁▂▂▃▃▄▄▅▅▆▆▇▇█████
train/global_step,▁▁▂▂▃▃▄▄▅▅▆▆▇▇█████
train/grad_norm,█▇▄▅▂▁▁▁▁▁▁▁▁▁▁▁
train/learning_rate,▆██▇▇▆▆▅▅▄▄▃▃▂▂▁
train/loss,█▇▅▃▂▁▁▁▁▁▁▁▁▁▁▁

0,1
eval/accuracy,0.57558
eval/loss,1.85502
eval/runtime,3.3863
eval/samples_per_second,152.377
eval/steps_per_second,19.195
total_flos,334459840757760.0
train/epoch,40.0
train/global_step,160.0
train/grad_norm,0.02809
train/learning_rate,0.0


Step,Training Loss
10,0.7894
20,0.6498
30,0.454
40,0.2658
50,0.1205
60,0.0339
70,0.0072
80,0.002
90,0.001
100,0.0006


Evaluating in-domain performance for n=64...


In-domain eval_loss: 1.935601830482483
In-domain eval_accuracy: 0.5915627996164909
In-domain eval_runtime: 7.4015
In-domain eval_samples_per_second: 140.917
In-domain eval_steps_per_second: 17.699
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=64...
Out-of-domain eval_loss: 1.8550150394439697
Out-of-domain eval_accuracy: 0.5755813953488372
Out-of-domain eval_runtime: 3.6912
Out-of-domain eval_samples_per_second: 139.791
Out-of-domain eval_steps_per_second: 17.609
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


0,1
eval/accuracy,█▁
eval/loss,█▁
eval/runtime,█▁
eval/samples_per_second,█▁
eval/steps_per_second,█▁
train/epoch,▁▁▂▂▃▃▄▄▅▅▆▆▇▇█████
train/global_step,▁▁▂▂▃▃▄▄▅▅▆▆▇▇█████
train/grad_norm,█▇▄▅▂▁▁▁▁▁▁▁▁▁▁▁
train/learning_rate,▆██▇▇▆▆▅▅▄▄▃▃▂▂▁
train/loss,█▇▅▃▂▁▁▁▁▁▁▁▁▁▁▁

0,1
eval/accuracy,0.57558
eval/loss,1.85502
eval/runtime,3.6912
eval/samples_per_second,139.791
eval/steps_per_second,17.609
total_flos,334459840757760.0
train/epoch,40.0
train/global_step,160.0
train/grad_norm,0.02809
train/learning_rate,0.0


Step,Training Loss
10,0.7894
20,0.6498
30,0.454
40,0.2658
50,0.1205
60,0.0339
70,0.0072
80,0.002
90,0.001
100,0.0006


Evaluating in-domain performance for n=64...


In-domain eval_loss: 1.935601830482483
In-domain eval_accuracy: 0.5915627996164909
In-domain eval_runtime: 6.605
In-domain eval_samples_per_second: 157.91
In-domain eval_steps_per_second: 19.833
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=64...
Out-of-domain eval_loss: 1.8550150394439697
Out-of-domain eval_accuracy: 0.5755813953488372
Out-of-domain eval_runtime: 3.3651
Out-of-domain eval_samples_per_second: 153.337
Out-of-domain eval_steps_per_second: 19.316
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


0,1
eval/accuracy,█▁
eval/loss,█▁
eval/runtime,█▁
eval/samples_per_second,█▁
eval/steps_per_second,█▁
train/epoch,▁▁▂▂▃▃▄▄▅▅▆▆▇▇█████
train/global_step,▁▁▂▂▃▃▄▄▅▅▆▆▇▇█████
train/grad_norm,█▇▄▅▂▁▁▁▁▁▁▁▁▁▁▁
train/learning_rate,▆██▇▇▆▆▅▅▄▄▃▃▂▂▁
train/loss,█▇▅▃▂▁▁▁▁▁▁▁▁▁▁▁

0,1
eval/accuracy,0.57558
eval/loss,1.85502
eval/runtime,3.3651
eval/samples_per_second,153.337
eval/steps_per_second,19.316
total_flos,334459840757760.0
train/epoch,40.0
train/global_step,160.0
train/grad_norm,0.02809
train/learning_rate,0.0


Step,Training Loss
10,0.7894
20,0.6498
30,0.454
40,0.2658
50,0.1205
60,0.0339
70,0.0072
80,0.002
90,0.001
100,0.0006


Evaluating in-domain performance for n=64...


In-domain eval_loss: 1.935601830482483
In-domain eval_accuracy: 0.5915627996164909
In-domain eval_runtime: 6.7902
In-domain eval_samples_per_second: 153.603
In-domain eval_steps_per_second: 19.292
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=64...
Out-of-domain eval_loss: 1.8550150394439697
Out-of-domain eval_accuracy: 0.5755813953488372
Out-of-domain eval_runtime: 3.4173
Out-of-domain eval_samples_per_second: 150.994
Out-of-domain eval_steps_per_second: 19.021
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


0,1
eval/accuracy,█▁
eval/loss,█▁
eval/runtime,█▁
eval/samples_per_second,█▁
eval/steps_per_second,█▁
train/epoch,▁▁▂▂▃▃▄▄▅▅▆▆▇▇█████
train/global_step,▁▁▂▂▃▃▄▄▅▅▆▆▇▇█████
train/grad_norm,█▇▄▅▂▁▁▁▁▁▁▁▁▁▁▁
train/learning_rate,▆██▇▇▆▆▅▅▄▄▃▃▂▂▁
train/loss,█▇▅▃▂▁▁▁▁▁▁▁▁▁▁▁

0,1
eval/accuracy,0.57558
eval/loss,1.85502
eval/runtime,3.4173
eval/samples_per_second,150.994
eval/steps_per_second,19.021
total_flos,334459840757760.0
train/epoch,40.0
train/global_step,160.0
train/grad_norm,0.02809
train/learning_rate,0.0


Step,Training Loss
10,0.7894
20,0.6498
30,0.454
40,0.2658
50,0.1205
60,0.0339
70,0.0072
80,0.002
90,0.001
100,0.0006


Evaluating in-domain performance for n=64...


In-domain eval_loss: 1.935601830482483
In-domain eval_accuracy: 0.5915627996164909
In-domain eval_runtime: 7.2111
In-domain eval_samples_per_second: 144.638
In-domain eval_steps_per_second: 18.166
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=64...
Out-of-domain eval_loss: 1.8550150394439697
Out-of-domain eval_accuracy: 0.5755813953488372
Out-of-domain eval_runtime: 4.9712
Out-of-domain eval_samples_per_second: 103.798
Out-of-domain eval_steps_per_second: 13.075
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


0,1
eval/accuracy,█▁
eval/loss,█▁
eval/runtime,█▁
eval/samples_per_second,█▁
eval/steps_per_second,█▁
train/epoch,▁▁▂▂▃▃▄▄▅▅▆▆▇▇█████
train/global_step,▁▁▂▂▃▃▄▄▅▅▆▆▇▇█████
train/grad_norm,█▇▄▅▂▁▁▁▁▁▁▁▁▁▁▁
train/learning_rate,▆██▇▇▆▆▅▅▄▄▃▃▂▂▁
train/loss,█▇▅▃▂▁▁▁▁▁▁▁▁▁▁▁

0,1
eval/accuracy,0.57558
eval/loss,1.85502
eval/runtime,4.9712
eval/samples_per_second,103.798
eval/steps_per_second,13.075
total_flos,334459840757760.0
train/epoch,40.0
train/global_step,160.0
train/grad_norm,0.02809
train/learning_rate,0.0


Step,Training Loss
10,0.7894
20,0.6498
30,0.454
40,0.2658
50,0.1205
60,0.0339
70,0.0072
80,0.002
90,0.001
100,0.0006


Evaluating in-domain performance for n=64...


In-domain eval_loss: 1.935601830482483
In-domain eval_accuracy: 0.5915627996164909
In-domain eval_runtime: 7.4691
In-domain eval_samples_per_second: 139.642
In-domain eval_steps_per_second: 17.539
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=64...
Out-of-domain eval_loss: 1.8550150394439697
Out-of-domain eval_accuracy: 0.5755813953488372
Out-of-domain eval_runtime: 5.15
Out-of-domain eval_samples_per_second: 100.194
Out-of-domain eval_steps_per_second: 12.621
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


0,1
eval/accuracy,█▁
eval/loss,█▁
eval/runtime,█▁
eval/samples_per_second,█▁
eval/steps_per_second,█▁
train/epoch,▁▁▂▂▃▃▄▄▅▅▆▆▇▇█████
train/global_step,▁▁▂▂▃▃▄▄▅▅▆▆▇▇█████
train/grad_norm,█▇▄▅▂▁▁▁▁▁▁▁▁▁▁▁
train/learning_rate,▆██▇▇▆▆▅▅▄▄▃▃▂▂▁
train/loss,█▇▅▃▂▁▁▁▁▁▁▁▁▁▁▁

0,1
eval/accuracy,0.57558
eval/loss,1.85502
eval/runtime,5.15
eval/samples_per_second,100.194
eval/steps_per_second,12.621
total_flos,334459840757760.0
train/epoch,40.0
train/global_step,160.0
train/grad_norm,0.02809
train/learning_rate,0.0


Step,Training Loss
10,0.7894
20,0.6498
30,0.454
40,0.2658
50,0.1205
60,0.0339
70,0.0072
80,0.002
90,0.001
100,0.0006


Evaluating in-domain performance for n=64...


In-domain eval_loss: 1.935601830482483
In-domain eval_accuracy: 0.5915627996164909
In-domain eval_runtime: 7.1219
In-domain eval_samples_per_second: 146.449
In-domain eval_steps_per_second: 18.394
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=64...
Out-of-domain eval_loss: 1.8550150394439697
Out-of-domain eval_accuracy: 0.5755813953488372
Out-of-domain eval_runtime: 3.8635
Out-of-domain eval_samples_per_second: 133.557
Out-of-domain eval_steps_per_second: 16.824
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


0,1
eval/accuracy,█▁
eval/loss,█▁
eval/runtime,█▁
eval/samples_per_second,█▁
eval/steps_per_second,█▁
train/epoch,▁▁▂▂▃▃▄▄▅▅▆▆▇▇█████
train/global_step,▁▁▂▂▃▃▄▄▅▅▆▆▇▇█████
train/grad_norm,█▇▄▅▂▁▁▁▁▁▁▁▁▁▁▁
train/learning_rate,▆██▇▇▆▆▅▅▄▄▃▃▂▂▁
train/loss,█▇▅▃▂▁▁▁▁▁▁▁▁▁▁▁

0,1
eval/accuracy,0.57558
eval/loss,1.85502
eval/runtime,3.8635
eval/samples_per_second,133.557
eval/steps_per_second,16.824
total_flos,334459840757760.0
train/epoch,40.0
train/global_step,160.0
train/grad_norm,0.02809
train/learning_rate,0.0


Step,Training Loss
10,0.823
20,0.6964
30,0.6354
40,0.519
50,0.3778
60,0.231
70,0.125
80,0.048
90,0.0141
100,0.0038


Evaluating in-domain performance for n=128...


In-domain eval_loss: 2.4327425956726074
In-domain eval_accuracy: 0.6279961649089166
In-domain eval_runtime: 8.1714
In-domain eval_samples_per_second: 127.64
In-domain eval_steps_per_second: 16.032
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=128...
Out-of-domain eval_loss: 2.3299949169158936
Out-of-domain eval_accuracy: 0.624031007751938
Out-of-domain eval_runtime: 3.8289
Out-of-domain eval_samples_per_second: 134.764
Out-of-domain eval_steps_per_second: 16.976
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


0,1
eval/accuracy,█▁
eval/loss,█▁
eval/runtime,█▁
eval/samples_per_second,▁█
eval/steps_per_second,▁█
train/epoch,▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇██████
train/global_step,▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇██████
train/grad_norm,█▅▃▄▅▃▃▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train/learning_rate,▃▆████▇▇▇▇▆▆▆▆▅▅▅▅▄▄▄▄▃▃▃▃▂▂▂▂▁▁
train/loss,█▇▆▅▄▃▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
eval/accuracy,0.62403
eval/loss,2.32999
eval/runtime,3.8289
eval/samples_per_second,134.764
eval/steps_per_second,16.976
total_flos,668919681515520.0
train/epoch,40.0
train/global_step,320.0
train/grad_norm,0.0081
train/learning_rate,0.0


Step,Training Loss
10,0.7859
20,0.7034
30,0.6316
40,0.5246
50,0.4017
60,0.2656
70,0.1544
80,0.0672
90,0.0213
100,0.0096


Evaluating in-domain performance for n=128...


In-domain eval_loss: 2.363882064819336
In-domain eval_accuracy: 0.6174496644295302
In-domain eval_runtime: 7.3684
In-domain eval_samples_per_second: 141.551
In-domain eval_steps_per_second: 17.779
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=128...
Out-of-domain eval_loss: 2.332620859146118
Out-of-domain eval_accuracy: 0.6104651162790697
Out-of-domain eval_runtime: 4.1318
Out-of-domain eval_samples_per_second: 124.885
Out-of-domain eval_steps_per_second: 15.732
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


0,1
eval/accuracy,█▁
eval/loss,█▁
eval/runtime,█▁
eval/samples_per_second,█▁
eval/steps_per_second,█▁
train/epoch,▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇██████
train/global_step,▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇██████
train/grad_norm,▅▅▅▅▆▄█▄▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train/learning_rate,▃▆████▇▇▇▇▆▆▆▆▅▅▅▅▄▄▄▄▃▃▃▃▂▂▂▂▁▁
train/loss,█▇▇▆▅▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
eval/accuracy,0.61047
eval/loss,2.33262
eval/runtime,4.1318
eval/samples_per_second,124.885
eval/steps_per_second,15.732
total_flos,668919681515520.0
train/epoch,40.0
train/global_step,320.0
train/grad_norm,0.00817
train/learning_rate,0.0


Step,Training Loss
10,0.7859
20,0.7034
30,0.6316
40,0.5246
50,0.4017
60,0.2656
70,0.1544
80,0.0672
90,0.0213
100,0.0096


Evaluating in-domain performance for n=128...


In-domain eval_loss: 2.363882064819336
In-domain eval_accuracy: 0.6174496644295302
In-domain eval_runtime: 7.2198
In-domain eval_samples_per_second: 144.464
In-domain eval_steps_per_second: 18.145
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=128...
Out-of-domain eval_loss: 2.332620859146118
Out-of-domain eval_accuracy: 0.6104651162790697
Out-of-domain eval_runtime: 3.6091
Out-of-domain eval_samples_per_second: 142.97
Out-of-domain eval_steps_per_second: 18.01
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


0,1
eval/accuracy,█▁
eval/loss,█▁
eval/runtime,█▁
eval/samples_per_second,█▁
eval/steps_per_second,█▁
train/epoch,▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇██████
train/global_step,▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇██████
train/grad_norm,▅▅▅▅▆▄█▄▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train/learning_rate,▃▆████▇▇▇▇▆▆▆▆▅▅▅▅▄▄▄▄▃▃▃▃▂▂▂▂▁▁
train/loss,█▇▇▆▅▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
eval/accuracy,0.61047
eval/loss,2.33262
eval/runtime,3.6091
eval/samples_per_second,142.97
eval/steps_per_second,18.01
total_flos,668919681515520.0
train/epoch,40.0
train/global_step,320.0
train/grad_norm,0.00817
train/learning_rate,0.0


Step,Training Loss
10,0.7859
20,0.7034
30,0.6316
40,0.5246
50,0.4017
60,0.2656
70,0.1544
80,0.0672
90,0.0213
100,0.0096


Evaluating in-domain performance for n=128...


In-domain eval_loss: 2.363882064819336
In-domain eval_accuracy: 0.6174496644295302
In-domain eval_runtime: 7.0569
In-domain eval_samples_per_second: 147.799
In-domain eval_steps_per_second: 18.563
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=128...
Out-of-domain eval_loss: 2.332620859146118
Out-of-domain eval_accuracy: 0.6104651162790697
Out-of-domain eval_runtime: 4.4571
Out-of-domain eval_samples_per_second: 115.771
Out-of-domain eval_steps_per_second: 14.583
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


0,1
eval/accuracy,█▁
eval/loss,█▁
eval/runtime,█▁
eval/samples_per_second,█▁
eval/steps_per_second,█▁
train/epoch,▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇██████
train/global_step,▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇██████
train/grad_norm,▅▅▅▅▆▄█▄▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train/learning_rate,▃▆████▇▇▇▇▆▆▆▆▅▅▅▅▄▄▄▄▃▃▃▃▂▂▂▂▁▁
train/loss,█▇▇▆▅▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
eval/accuracy,0.61047
eval/loss,2.33262
eval/runtime,4.4571
eval/samples_per_second,115.771
eval/steps_per_second,14.583
total_flos,668919681515520.0
train/epoch,40.0
train/global_step,320.0
train/grad_norm,0.00817
train/learning_rate,0.0


Step,Training Loss
10,0.7859
20,0.7034
30,0.6316
40,0.5246
50,0.4017
60,0.2656
70,0.1544
80,0.0672
90,0.0213
100,0.0096


Evaluating in-domain performance for n=128...


In-domain eval_loss: 2.363882064819336
In-domain eval_accuracy: 0.6174496644295302
In-domain eval_runtime: 7.318
In-domain eval_samples_per_second: 142.525
In-domain eval_steps_per_second: 17.901
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=128...
Out-of-domain eval_loss: 2.332620859146118
Out-of-domain eval_accuracy: 0.6104651162790697
Out-of-domain eval_runtime: 4.52
Out-of-domain eval_samples_per_second: 114.159
Out-of-domain eval_steps_per_second: 14.38
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


0,1
eval/accuracy,█▁
eval/loss,█▁
eval/runtime,█▁
eval/samples_per_second,█▁
eval/steps_per_second,█▁
train/epoch,▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇██████
train/global_step,▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇██████
train/grad_norm,▅▅▅▅▆▄█▄▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train/learning_rate,▃▆████▇▇▇▇▆▆▆▆▅▅▅▅▄▄▄▄▃▃▃▃▂▂▂▂▁▁
train/loss,█▇▇▆▅▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
eval/accuracy,0.61047
eval/loss,2.33262
eval/runtime,4.52
eval/samples_per_second,114.159
eval/steps_per_second,14.38
total_flos,668919681515520.0
train/epoch,40.0
train/global_step,320.0
train/grad_norm,0.00817
train/learning_rate,0.0


Step,Training Loss
10,0.7859
20,0.7034
30,0.6316
40,0.5246
50,0.4017
60,0.2656
70,0.1544
80,0.0672
90,0.0213
100,0.0096


Evaluating in-domain performance for n=128...


In-domain eval_loss: 2.363882064819336
In-domain eval_accuracy: 0.6174496644295302
In-domain eval_runtime: 7.1092
In-domain eval_samples_per_second: 146.712
In-domain eval_steps_per_second: 18.427
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=128...
Out-of-domain eval_loss: 2.332620859146118
Out-of-domain eval_accuracy: 0.6104651162790697
Out-of-domain eval_runtime: 5.1543
Out-of-domain eval_samples_per_second: 100.11
Out-of-domain eval_steps_per_second: 12.611
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


0,1
eval/accuracy,█▁
eval/loss,█▁
eval/runtime,█▁
eval/samples_per_second,█▁
eval/steps_per_second,█▁
train/epoch,▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇██████
train/global_step,▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇██████
train/grad_norm,▅▅▅▅▆▄█▄▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train/learning_rate,▃▆████▇▇▇▇▆▆▆▆▅▅▅▅▄▄▄▄▃▃▃▃▂▂▂▂▁▁
train/loss,█▇▇▆▅▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
eval/accuracy,0.61047
eval/loss,2.33262
eval/runtime,5.1543
eval/samples_per_second,100.11
eval/steps_per_second,12.611
total_flos,668919681515520.0
train/epoch,40.0
train/global_step,320.0
train/grad_norm,0.00817
train/learning_rate,0.0


Step,Training Loss
10,0.7859
20,0.7034
30,0.6316
40,0.5246
50,0.4017
60,0.2656
70,0.1544
80,0.0672
90,0.0213
100,0.0096


Evaluating in-domain performance for n=128...


In-domain eval_loss: 2.363882064819336
In-domain eval_accuracy: 0.6174496644295302
In-domain eval_runtime: 8.1342
In-domain eval_samples_per_second: 128.225
In-domain eval_steps_per_second: 16.105
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=128...
Out-of-domain eval_loss: 2.332620859146118
Out-of-domain eval_accuracy: 0.6104651162790697
Out-of-domain eval_runtime: 3.8078
Out-of-domain eval_samples_per_second: 135.512
Out-of-domain eval_steps_per_second: 17.07
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


0,1
eval/accuracy,█▁
eval/loss,█▁
eval/runtime,█▁
eval/samples_per_second,▁█
eval/steps_per_second,▁█
train/epoch,▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇██████
train/global_step,▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇██████
train/grad_norm,▅▅▅▅▆▄█▄▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train/learning_rate,▃▆████▇▇▇▇▆▆▆▆▅▅▅▅▄▄▄▄▃▃▃▃▂▂▂▂▁▁
train/loss,█▇▇▆▅▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
eval/accuracy,0.61047
eval/loss,2.33262
eval/runtime,3.8078
eval/samples_per_second,135.512
eval/steps_per_second,17.07
total_flos,668919681515520.0
train/epoch,40.0
train/global_step,320.0
train/grad_norm,0.00817
train/learning_rate,0.0


Step,Training Loss
10,0.7859
20,0.7034
30,0.6316
40,0.5246
50,0.4017
60,0.2656
70,0.1544
80,0.0672
90,0.0213
100,0.0096


Evaluating in-domain performance for n=128...


In-domain eval_loss: 2.363882064819336
In-domain eval_accuracy: 0.6174496644295302
In-domain eval_runtime: 7.6456
In-domain eval_samples_per_second: 136.419
In-domain eval_steps_per_second: 17.134
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=128...
Out-of-domain eval_loss: 2.332620859146118
Out-of-domain eval_accuracy: 0.6104651162790697
Out-of-domain eval_runtime: 3.9118
Out-of-domain eval_samples_per_second: 131.907
Out-of-domain eval_steps_per_second: 16.616
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


0,1
eval/accuracy,█▁
eval/loss,█▁
eval/runtime,█▁
eval/samples_per_second,█▁
eval/steps_per_second,█▁
train/epoch,▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇██████
train/global_step,▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇██████
train/grad_norm,▅▅▅▅▆▄█▄▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train/learning_rate,▃▆████▇▇▇▇▆▆▆▆▅▅▅▅▄▄▄▄▃▃▃▃▂▂▂▂▁▁
train/loss,█▇▇▆▅▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
eval/accuracy,0.61047
eval/loss,2.33262
eval/runtime,3.9118
eval/samples_per_second,131.907
eval/steps_per_second,16.616
total_flos,668919681515520.0
train/epoch,40.0
train/global_step,320.0
train/grad_norm,0.00817
train/learning_rate,0.0


Step,Training Loss
10,0.7859
20,0.7034
30,0.6316
40,0.5246
50,0.4017
60,0.2656
70,0.1544
80,0.0672
90,0.0213
100,0.0096


Evaluating in-domain performance for n=128...


In-domain eval_loss: 2.363882064819336
In-domain eval_accuracy: 0.6174496644295302
In-domain eval_runtime: 7.9807
In-domain eval_samples_per_second: 130.691
In-domain eval_steps_per_second: 16.415
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=128...
Out-of-domain eval_loss: 2.332620859146118
Out-of-domain eval_accuracy: 0.6104651162790697
Out-of-domain eval_runtime: 3.7745
Out-of-domain eval_samples_per_second: 136.707
Out-of-domain eval_steps_per_second: 17.221
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


0,1
eval/accuracy,█▁
eval/loss,█▁
eval/runtime,█▁
eval/samples_per_second,▁█
eval/steps_per_second,▁█
train/epoch,▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇██████
train/global_step,▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇██████
train/grad_norm,▅▅▅▅▆▄█▄▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train/learning_rate,▃▆████▇▇▇▇▆▆▆▆▅▅▅▅▄▄▄▄▃▃▃▃▂▂▂▂▁▁
train/loss,█▇▇▆▅▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
eval/accuracy,0.61047
eval/loss,2.33262
eval/runtime,3.7745
eval/samples_per_second,136.707
eval/steps_per_second,17.221
total_flos,668919681515520.0
train/epoch,40.0
train/global_step,320.0
train/grad_norm,0.00817
train/learning_rate,0.0


Step,Training Loss
10,0.7859
20,0.7034
30,0.6316
40,0.5246
50,0.4017
60,0.2656
70,0.1544
80,0.0672
90,0.0213
100,0.0096


Evaluating in-domain performance for n=128...


In-domain eval_loss: 2.363882064819336
In-domain eval_accuracy: 0.6174496644295302
In-domain eval_runtime: 7.5242
In-domain eval_samples_per_second: 138.619
In-domain eval_steps_per_second: 17.41
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=128...
Out-of-domain eval_loss: 2.332620859146118
Out-of-domain eval_accuracy: 0.6104651162790697
Out-of-domain eval_runtime: 5.6478
Out-of-domain eval_samples_per_second: 91.363
Out-of-domain eval_steps_per_second: 11.509
Out-of-domain epoch: 40.0


In [10]:
# Save the DataFrame to a CSV file
results_df.to_csv(f'../Results/{curr_filename}.csv', sep = ',', index=False)

In [None]:
# disconnect runtime
from google.colab import runtime
runtime.unassign()