In [2]:
# We have the necessary imports below
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModelForSequenceClassification, TrainingArguments, Trainer, DataCollatorForLanguageModeling
from datasets import Dataset
import torch
import evaluate
import pandas as pd
from transformers import pipeline

In [6]:
# We are setting up the model, albert-base-v2, below specifically using SequenceClassification and id2label and label2id to go back and forth between labels and their encoding
model_name = "albert-base-v2"

tokenizer = AutoTokenizer.from_pretrained(model_name)

model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2, id2label={0: "Non-Limerick", 1: "Limerick"}, label2id={"Non-Limerick": 0, "Limerick": 1})

Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert-base-v2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [7]:
# We prepare the dataset, tokenize the information, etc. below
def prepare_dataset(poems, labels):
    label_encoding = [1 if label == "Limerick" else 0 for label in labels]
    return Dataset.from_dict({"text": [f"Poem:\n{p}" for p, l in zip(poems, labels)], "label": label_encoding})

def tokenize_function(examples, tokenizer, max_length=64):
    return tokenizer(examples["text"], truncation=True, padding="max_length", max_length=max_length)
    
questions_df = pd.read_csv("Fine_Tuning_Assignment - Limerick Classification-2.csv")

dataset = prepare_dataset(questions_df["Input (Poem)"], questions_df["Label (Limerick or Non-Limerick)"])

In [8]:
dataset

Dataset({
    features: ['text', 'label'],
    num_rows: 300
})

In [9]:
tokenized_dataset = dataset.map(lambda examples: tokenize_function(examples, tokenizer), batched=True)

Map:   0%|          | 0/300 [00:00<?, ? examples/s]

In [6]:
train_test = tokenized_dataset.train_test_split(test_size=0.1)

In [7]:
train_test

DatasetDict({
    train: Dataset({
        features: ['text', 'label', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 270
    })
    test: Dataset({
        features: ['text', 'label', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 30
    })
})

In [8]:
dataset[0], tokenized_dataset[0]

({'text': "Poem:\nIf the Limerick's cocktail you 'd quaff,\nStir nonsense with wit, each a half,\nAdd a dash of good fun,\nDrop in a pun-\nAnd then make a noise like a laugh.",
  'label': 1},
 {'text': "Poem:\nIf the Limerick's cocktail you 'd quaff,\nStir nonsense with wit, each a half,\nAdd a dash of good fun,\nDrop in a pun-\nAnd then make a noise like a laugh.",
  'label': 1,
  'input_ids': [2,
   4629,
   45,
   100,
   14,
   18185,
   22,
   18,
   18816,
   42,
   13,
   22,
   43,
   7131,
   2460,
   15,
   13216,
   13,
   16684,
   29,
   9642,
   15,
   206,
   21,
   519,
   15,
   3547,
   21,
   8405,
   16,
   254,
   2414,
   15,
   2804,
   19,
   21,
   11582,
   8,
   17,
   94,
   233,
   21,
   3406,
   101,
   21,
   3051,
   9,
   3,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0],
  'token_type_ids': [0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
  

In [9]:
# Define training arguments
output_dir = "./fine_tuned_albert"

# Set up training arguments
training_args = TrainingArguments(
    output_dir=output_dir,
    per_device_train_batch_size=8,
    num_train_epochs=5,
    save_steps=1000,
    save_total_limit=2,
    logging_dir=f"{output_dir}/logs",
    logging_steps=10,
    learning_rate=2e-5,
    warmup_steps=5,
    weight_decay=0.01,
    evaluation_strategy="steps"
)



In [10]:
# We compute classification metrics (accuracy, precision, recall, and f1 using the evaluate library
accuracy = evaluate.load("accuracy")
precision = evaluate.load("precision")
recall = evaluate.load("recall")
f1 = evaluate.load("f1")

def compute_metrics(predictions):
    all_logits, all_labels = predictions
    final_predictions = all_logits.argmax(axis=-1)
    accuracy_score = accuracy.compute(predictions=final_predictions, references=all_labels)
    precision_score = precision.compute(predictions=final_predictions, references=all_labels, average="binary")
    recall_score = recall.compute(predictions=final_predictions, references=all_labels, average="binary")
    f1_score = f1.compute(predictions=final_predictions, references=all_labels, average="binary")
    return {
        "Accuracy": accuracy_score["accuracy"],
        "Precision": precision_score["precision"],
        "Recall": recall_score["recall"],
        "F1": f1_score["f1"]
    }

In [11]:
# We initialize trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_test["train"],
    eval_dataset=train_test["test"],
    compute_metrics=compute_metrics
)
trainer.can_return_loss = True

# We start training
trainer.train()

# We save the trained model and evaluate the results
trainer.save_model("./fine_tuned_albert")
tokenizer.save_pretrained("./fine_tuned_albert")

test_results = trainer.evaluate()
print("Test Results:", test_results)

Step,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
10,0.7171,0.643103,0.733333,0.636364,1.0,0.777778
20,0.6504,0.603371,0.866667,0.8125,0.928571,0.866667
30,0.5926,0.567159,0.733333,0.636364,1.0,0.777778
40,0.4624,0.475693,0.833333,0.8,0.857143,0.827586
50,0.5388,0.524115,0.733333,0.636364,1.0,0.777778
60,0.5012,0.520818,0.8,0.7,1.0,0.823529
70,0.5028,0.47092,0.733333,0.75,0.642857,0.692308
80,0.5232,0.44808,0.8,0.7,1.0,0.823529
90,0.4187,0.729242,0.666667,0.583333,1.0,0.736842
100,0.4284,0.414524,0.833333,0.764706,0.928571,0.83871


Test Results: {'eval_loss': 0.47164714336395264, 'eval_Accuracy': 0.8, 'eval_Precision': 0.7222222222222222, 'eval_Recall': 0.9285714285714286, 'eval_F1': 0.8125000000000001, 'eval_runtime': 0.1614, 'eval_samples_per_second': 185.822, 'eval_steps_per_second': 24.776, 'epoch': 5.0}


In [95]:
# We are setting up the base version of the same model without the fine tuning for comparison purposes
model_name = "albert-base-v2"
finetuned_model_path = "./fine_tuned_albert"

tokenizer = AutoTokenizer.from_pretrained(model_name)

# We test the model on new examples that were not in our dataset
new_test_examples = [
    # Limerick
    '''Poem:
    A cannibal monarch imperial
    Kept his wives on a diet of cereal,
    But he didn't much care
    What the women should wear,
    Nor did they; it was quite immaterial.''',
    # Limerick:
    '''Poem:
    There once was a foppish old beau,
    Who said, "I find walking too sleau.
    So I prances down the street
    And throw out my feet
    And trip my fantastical teau."''',
    # Limerick:
    '''Poem:
    There was a young maid from Japan
    Who married a Hottentot man.
    The girl she was yellow.
    And black was the fellow.
    And their children were all black and tan.''',
    # Limerick:
    '''Poem:
    There was a poor fellow from Lynn,
    By accident sat on a pynn,
    He let out a shriek,
    A howl and a squiek.
    And his language was really a synn.''',
    #Limerick
    '''Poem:
    Professor, you should be commended
    On your theory so geniusly splendid.
    But some say it's luck,
    And you really just suck,
    'Cause your theory's not what you intended!''',
    # Limerick
    '''Poem:
    There once was a classical theory
    Of which quantum disciples were leery.
    They said, ‚ÄúWhy spend so long
    On a theory that‚Äôs wrong?‚Äù
    Well, it works for your everyday query!''',
    # Limerick
    '''Poem:
    Consider, when seeking gestalts,
    The theories that science exalts.
    It's not that they're known
    To be written in stone.
    It's just that we can't say they're false.''',
    # Limerick
    '''Poem:
    God's first tries were hardly ideal,
    You see, complex worlds have no appeal.
    In the present edition,
    He made things Hermitian,
    And this world, it seems, is quite real.''',
    # Non-Limerick
    '''Poem:
    We need to take care of the one world we live in!''',
    # Non-Limerick
    '''Poem:
    In familiar bed,
    hands reaching into the light.
    Soul blossoms tonight.''',
    # Non-Limerick
    '''Poem:
    Prayers are good wishes
    rising up to the realm of
    possibilities.''',
    # Non-Limerick
    '''Poem:
    Once more the storm is howling, and half hid
    Under this cradle-hood and coverlid
    My child sleeps on. There is no obstacle
    But Gregory's wood and one bare hill
    Whereby the haystack- and roof-levelling wind,
    Bred on the Atlantic, can be stayed;
    And for an hour I have walked and prayed
    Because of the great gloom that is in my mind.
    I have walked and prayed for this young child an hour
    And heard the sea-wind scream upon the tower,
    And under the arches of the bridge, and scream
    In the elms above the flooded stream;
    Imagining in excited reverie
    That the future years had come,
    Dancing to a frenzied drum,
    Out of the murderous innocence of the sea.''',
    # Non-Limerick
    '''Poem:
    May she be granted beauty and yet not
    Beauty to make a stranger's eye distraught,
    Or hers before a looking-glass, for such,
    Being made beautiful overmuch,
    Consider beauty a sufficient end,
    Lose natural kindness and maybe
    The heart-revealing intimacy
    That chooses right, and never find a friend.
    Helen being chosen found life flat and dull
    And later had much trouble from a fool,
    While that great Queen, that rose out of the spray,
    Being fatherless could have her way
    Yet chose a bandy-legg√®d smith for man.
    It's certain that fine women eat
    A crazy salad with their meat
    Whereby the Horn of Plenty is undone.''',
    # Non-Limerick
    '''Poem:
    A cannibal monarch
    Kept his wives on a diet,
    But he didn't much care
    What the women should look like
    Nor did they; it was quite immaterial.''',
    # Non-Limerick
    '''Poem:
    There was a poor fellow,
    By accident sat on a pynn,
    He yelled out loud,
    A howl and a squiek.
    And his language was really a curse.''',
    # Non-Limerick
    '''Poem:
    There once was a
    Of which quantum.
    They said,
    On a theory
    Well, it works'''
]
# Below, 1 is a Limerick and 0 is a Non-Limerick
new_test_labels = [1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0]

new_test_dataset = Dataset.from_dict({"text": new_test_examples, "label": new_test_labels})

new_token_test_dataset = new_test_dataset.map(lambda examples: tokenize_function(examples, tokenizer), batched=True)

id2label = {0: "Non-Limerick", 1: "Limerick"}
label2id = {"Non-Limerick": 0, "Limerick": 1}

# Our baseline model
base_model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2, id2label=id2label, label2id=label2id)
base_trainer = Trainer(model=base_model, eval_dataset=new_token_test_dataset, compute_metrics=compute_metrics)

# Our finetuned model
finetuned_model = AutoModelForSequenceClassification.from_pretrained(finetuned_model_path)
finetuned_trainer = Trainer(model=finetuned_model, eval_dataset=new_token_test_dataset, compute_metrics=compute_metrics)

# We print the results
def print_results(trainer, model_name):
    all_predictions = trainer.predict(new_token_test_dataset)
    predicted_labels = torch.argmax(torch.tensor(all_predictions.predictions), axis=1).tolist()
    
    model_metrics = all_predictions.metrics
    print(f"{model_name} Model Metrics:", model_metrics)

    print(f"\nClassification Predictions for {model_name} Model:\n")
    for example, true_label, pred_label in zip(new_test_examples, new_test_labels, predicted_labels):
        print(f"{example}\n\nTrue Label: {id2label[true_label]}\n\nPredicted Label: {id2label[pred_label]}\n")

print_results(base_trainer, "Baseline")
print_results(finetuned_trainer, "Fine-Tuned")

Map:   0%|          | 0/16 [00:00<?, ? examples/s]

Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert-base-v2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Baseline Model Metrics: {'test_loss': 0.7043936848640442, 'test_model_preparation_time': 0.0002, 'test_Accuracy': 0.4375, 'test_Precision': 0.4, 'test_Recall': 0.25, 'test_F1': 0.3076923076923077, 'test_runtime': 0.1447, 'test_samples_per_second': 110.598, 'test_steps_per_second': 13.825}

Classification Predictions for Baseline Model:

Poem:
    A cannibal monarch imperial
    Kept his wives on a diet of cereal,
    But he didn't much care
    What the women should wear,
    Nor did they; it was quite immaterial.

True Label: Limerick

Predicted Label: Non-Limerick

Poem:
    There once was a foppish old beau,
    Who said, "I find walking too sleau.
    So I prances down the street
    And throw out my feet
    And trip my fantastical teau."

True Label: Limerick

Predicted Label: Non-Limerick

Poem:
    There was a young maid from Japan
    Who married a Hottentot man.
    The girl she was yellow.
    And black was the fellow.
    And their children were all black and tan.

True Lab

Fine-Tuned Model Metrics: {'test_loss': 0.371559202671051, 'test_model_preparation_time': 0.0002, 'test_Accuracy': 0.875, 'test_Precision': 0.8, 'test_Recall': 1.0, 'test_F1': 0.888888888888889, 'test_runtime': 0.0909, 'test_samples_per_second': 176.001, 'test_steps_per_second': 22.0}

Classification Predictions for Fine-Tuned Model:

Poem:
    A cannibal monarch imperial
    Kept his wives on a diet of cereal,
    But he didn't much care
    What the women should wear,
    Nor did they; it was quite immaterial.

True Label: Limerick

Predicted Label: Limerick

Poem:
    There once was a foppish old beau,
    Who said, "I find walking too sleau.
    So I prances down the street
    And throw out my feet
    And trip my fantastical teau."

True Label: Limerick

Predicted Label: Limerick

Poem:
    There was a young maid from Japan
    Who married a Hottentot man.
    The girl she was yellow.
    And black was the fellow.
    And their children were all black and tan.

True Label: Limeri

In [72]:
# We are setting up the model, google/electra-base-discriminator, below specifically using SequenceClassification and id2label and label2id to go back and forth between labels and their encoding
model_name = "google/electra-base-discriminator"

tokenizer = AutoTokenizer.from_pretrained(model_name)

model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2, id2label={0: "Non-Limerick", 1: "Limerick"}, label2id={"Non-Limerick": 0, "Limerick": 1})

Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-base-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [73]:
# We prepare the dataset, tokenize the information, etc. below
def prepare_dataset(poems, labels):
    label_encoding = [1 if label == "Limerick" else 0 for label in labels]
    return Dataset.from_dict({"text": [f"Poem:\n{p}" for p, l in zip(poems, labels)], "label": label_encoding})

def tokenize_function(examples, tokenizer, max_length=64):
    return tokenizer(examples["text"], truncation=True, padding="max_length", max_length=max_length)
    
questions_df = pd.read_csv("Fine_Tuning_Assignment - Limerick Classification-2.csv")

dataset = prepare_dataset(questions_df["Input (Poem)"], questions_df["Label (Limerick or Non-Limerick)"])

In [74]:
dataset

Dataset({
    features: ['text', 'label'],
    num_rows: 300
})

In [75]:
tokenized_dataset = dataset.map(lambda examples: tokenize_function(examples, tokenizer), batched=True)

Map:   0%|          | 0/300 [00:00<?, ? examples/s]

In [76]:
train_test = tokenized_dataset.train_test_split(test_size=0.1)

In [77]:
train_test

DatasetDict({
    train: Dataset({
        features: ['text', 'label', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 270
    })
    test: Dataset({
        features: ['text', 'label', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 30
    })
})

In [78]:
dataset[0], tokenized_dataset[0]

({'text': "Poem:\nIf the Limerick's cocktail you 'd quaff,\nStir nonsense with wit, each a half,\nAdd a dash of good fun,\nDrop in a pun-\nAnd then make a noise like a laugh.",
  'label': 1},
 {'text': "Poem:\nIf the Limerick's cocktail you 'd quaff,\nStir nonsense with wit, each a half,\nAdd a dash of good fun,\nDrop in a pun-\nAnd then make a noise like a laugh.",
  'label': 1,
  'input_ids': [101,
   5961,
   1024,
   2065,
   1996,
   15679,
   1005,
   1055,
   18901,
   2017,
   1005,
   1040,
   24209,
   10354,
   2546,
   1010,
   16130,
   14652,
   2007,
   15966,
   1010,
   2169,
   1037,
   2431,
   1010,
   5587,
   1037,
   11454,
   1997,
   2204,
   4569,
   1010,
   4530,
   1999,
   1037,
   26136,
   1011,
   1998,
   2059,
   2191,
   1037,
   5005,
   2066,
   1037,
   4756,
   1012,
   102,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0],
  'token_type_ids': [0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,

In [79]:
# Define training arguments
output_dir = "./fine_tuned_electra_5"

# Set up training arguments
training_args = TrainingArguments(
    output_dir=output_dir,
    per_device_train_batch_size=8,
    num_train_epochs=5,
    save_steps=1000,
    save_total_limit=2,
    logging_dir=f"{output_dir}/logs",
    logging_steps=10,
    learning_rate=1e-5,
    warmup_steps=5,
    weight_decay=0.01,
    evaluation_strategy="steps"
)



In [80]:
# We compute classification metrics (accuracy, precision, recall, and f1 using the evaluate library
accuracy = evaluate.load("accuracy")
precision = evaluate.load("precision")
recall = evaluate.load("recall")
f1 = evaluate.load("f1")

def compute_metrics(predictions):
    all_logits, all_labels = predictions
    final_predictions = all_logits.argmax(axis=-1)
    accuracy_score = accuracy.compute(predictions=final_predictions, references=all_labels)
    precision_score = precision.compute(predictions=final_predictions, references=all_labels, average="binary")
    recall_score = recall.compute(predictions=final_predictions, references=all_labels, average="binary")
    f1_score = f1.compute(predictions=final_predictions, references=all_labels, average="binary")
    return {
        "Accuracy": accuracy_score["accuracy"],
        "Precision": precision_score["precision"],
        "Recall": recall_score["recall"],
        "F1": f1_score["f1"]
    }

In [81]:
# We initialize trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_test["train"],
    eval_dataset=train_test["test"],
    compute_metrics=compute_metrics
)
trainer.can_return_loss = True

# We start training
trainer.train()

# We save the trained model and evaluate the results
trainer.save_model("./fine_tuned_electra_5")
tokenizer.save_pretrained("./fine_tuned_electra_5")

test_results = trainer.evaluate()
print("Test Results:", test_results)

Step,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
10,0.6918,0.687751,0.5,0.5,0.533333,0.516129
20,0.6865,0.68437,0.466667,0.333333,0.066667,0.111111
30,0.6613,0.680296,0.433333,0.25,0.066667,0.105263
40,0.6479,0.660673,0.733333,0.666667,0.933333,0.777778
50,0.609,0.64687,0.766667,0.681818,1.0,0.810811
60,0.5957,0.638431,0.733333,0.666667,0.933333,0.777778
70,0.6008,0.652733,0.633333,0.625,0.666667,0.645161
80,0.5237,0.61458,0.766667,0.681818,1.0,0.810811
90,0.5223,0.60635,0.766667,0.681818,1.0,0.810811
100,0.5466,0.629916,0.7,0.65,0.866667,0.742857


Test Results: {'eval_loss': 0.6364413499832153, 'eval_Accuracy': 0.6666666666666666, 'eval_Precision': 0.631578947368421, 'eval_Recall': 0.8, 'eval_F1': 0.7058823529411765, 'eval_runtime': 0.1988, 'eval_samples_per_second': 150.912, 'eval_steps_per_second': 20.122, 'epoch': 5.0}


In [97]:
# We are setting up the base version of the same model without the fine tuning for comparison purposes
model_name = "google/electra-base-discriminator"
finetuned_model_path = "./fine_tuned_electra_5"

tokenizer = AutoTokenizer.from_pretrained(model_name)

# We test the model on new examples that were not in our dataset
new_test_examples = [
    # Limerick
    '''Poem:
    A cannibal monarch imperial
    Kept his wives on a diet of cereal,
    But he didn't much care
    What the women should wear,
    Nor did they; it was quite immaterial.''',
    # Limerick:
    '''Poem:
    There once was a foppish old beau,
    Who said, "I find walking too sleau.
    So I prances down the street
    And throw out my feet
    And trip my fantastical teau."''',
    # Limerick:
    '''Poem:
    There was a young maid from Japan
    Who married a Hottentot man.
    The girl she was yellow.
    And black was the fellow.
    And their children were all black and tan.''',
    # Limerick:
    '''Poem:
    There was a poor fellow from Lynn,
    By accident sat on a pynn,
    He let out a shriek,
    A howl and a squiek.
    And his language was really a synn.''',
    #Limerick
    '''Poem:
    Professor, you should be commended
    On your theory so geniusly splendid.
    But some say it's luck,
    And you really just suck,
    'Cause your theory's not what you intended!''',
    # Limerick
    '''Poem:
    There once was a classical theory
    Of which quantum disciples were leery.
    They said, ‚ÄúWhy spend so long
    On a theory that‚Äôs wrong?‚Äù
    Well, it works for your everyday query!''',
    # Limerick
    '''Poem:
    Consider, when seeking gestalts,
    The theories that science exalts.
    It's not that they're known
    To be written in stone.
    It's just that we can't say they're false.''',
    # Limerick
    '''Poem:
    God's first tries were hardly ideal,
    You see, complex worlds have no appeal.
    In the present edition,
    He made things Hermitian,
    And this world, it seems, is quite real.''',
    # Non-Limerick
    '''Poem:
    We need to take care of the one world we live in!''',
    # Non-Limerick
    '''Poem:
    In familiar bed,
    hands reaching into the light.
    Soul blossoms tonight.''',
    # Non-Limerick
    '''Poem:
    Prayers are good wishes
    rising up to the realm of
    possibilities.''',
    # Non-Limerick
    '''Poem:
    Once more the storm is howling, and half hid
    Under this cradle-hood and coverlid
    My child sleeps on. There is no obstacle
    But Gregory's wood and one bare hill
    Whereby the haystack- and roof-levelling wind,
    Bred on the Atlantic, can be stayed;
    And for an hour I have walked and prayed
    Because of the great gloom that is in my mind.
    I have walked and prayed for this young child an hour
    And heard the sea-wind scream upon the tower,
    And under the arches of the bridge, and scream
    In the elms above the flooded stream;
    Imagining in excited reverie
    That the future years had come,
    Dancing to a frenzied drum,
    Out of the murderous innocence of the sea.''',
    # Non-Limerick
    '''Poem:
    May she be granted beauty and yet not
    Beauty to make a stranger's eye distraught,
    Or hers before a looking-glass, for such,
    Being made beautiful overmuch,
    Consider beauty a sufficient end,
    Lose natural kindness and maybe
    The heart-revealing intimacy
    That chooses right, and never find a friend.
    Helen being chosen found life flat and dull
    And later had much trouble from a fool,
    While that great Queen, that rose out of the spray,
    Being fatherless could have her way
    Yet chose a bandy-legg√®d smith for man.
    It's certain that fine women eat
    A crazy salad with their meat
    Whereby the Horn of Plenty is undone.''',
    # Non-Limerick
    '''Poem:
    A cannibal monarch
    Kept his wives on a diet,
    But he didn't much care
    What the women should look like
    Nor did they; it was quite immaterial.''',
    # Non-Limerick
    '''Poem:
    There was a poor fellow,
    By accident sat on a pynn,
    He yelled out loud,
    A howl and a squiek.
    And his language was really a curse.''',
    # Non-Limerick
    '''Poem:
    There once was a
    Of which quantum.
    They said,
    On a theory
    Well, it works'''
]
# Below, 1 is a Limerick and 0 is a Non-Limerick
new_test_labels = [1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0]

new_test_dataset = Dataset.from_dict({"text": new_test_examples, "label": new_test_labels})

new_token_test_dataset = new_test_dataset.map(lambda examples: tokenize_function(examples, tokenizer), batched=True)

id2label = {0: "Non-Limerick", 1: "Limerick"}
label2id = {"Non-Limerick": 0, "Limerick": 1}

# Our baseline model
base_model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2, id2label=id2label, label2id=label2id)
base_trainer = Trainer(model=base_model, eval_dataset=new_token_test_dataset, compute_metrics=compute_metrics)

# Our finetuned model
finetuned_model = AutoModelForSequenceClassification.from_pretrained(finetuned_model_path)
finetuned_trainer = Trainer(model=finetuned_model, eval_dataset=new_token_test_dataset, compute_metrics=compute_metrics)

# We print the results
def print_results(trainer, model_name):
    all_predictions = trainer.predict(new_token_test_dataset)
    predicted_labels = torch.argmax(torch.tensor(all_predictions.predictions), axis=1).tolist()
    
    model_metrics = all_predictions.metrics
    print(f"{model_name} Model Metrics:", model_metrics)

    print(f"\nClassification Predictions for {model_name} Model:\n")
    for example, true_label, pred_label in zip(new_test_examples, new_test_labels, predicted_labels):
        print(f"{example}\n\nTrue Label: {id2label[true_label]}\n\nPredicted Label: {id2label[pred_label]}\n")

print_results(base_trainer, "Baseline")
print_results(finetuned_trainer, "Fine-Tuned")

Map:   0%|          | 0/16 [00:00<?, ? examples/s]

Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-base-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Baseline Model Metrics: {'test_loss': 0.6812812685966492, 'test_model_preparation_time': 0.0011, 'test_Accuracy': 0.75, 'test_Precision': 1.0, 'test_Recall': 0.5, 'test_F1': 0.6666666666666666, 'test_runtime': 0.115, 'test_samples_per_second': 139.144, 'test_steps_per_second': 17.393}

Classification Predictions for Baseline Model:

Poem:
    A cannibal monarch imperial
    Kept his wives on a diet of cereal,
    But he didn't much care
    What the women should wear,
    Nor did they; it was quite immaterial.

True Label: Limerick

Predicted Label: Non-Limerick

Poem:
    There once was a foppish old beau,
    Who said, "I find walking too sleau.
    So I prances down the street
    And throw out my feet
    And trip my fantastical teau."

True Label: Limerick

Predicted Label: Non-Limerick

Poem:
    There was a young maid from Japan
    Who married a Hottentot man.
    The girl she was yellow.
    And black was the fellow.
    And their children were all black and tan.

True Label: 

Fine-Tuned Model Metrics: {'test_loss': 0.47391584515571594, 'test_model_preparation_time': 0.0012, 'test_Accuracy': 0.75, 'test_Precision': 0.75, 'test_Recall': 0.75, 'test_F1': 0.75, 'test_runtime': 0.0754, 'test_samples_per_second': 212.272, 'test_steps_per_second': 26.534}

Classification Predictions for Fine-Tuned Model:

Poem:
    A cannibal monarch imperial
    Kept his wives on a diet of cereal,
    But he didn't much care
    What the women should wear,
    Nor did they; it was quite immaterial.

True Label: Limerick

Predicted Label: Limerick

Poem:
    There once was a foppish old beau,
    Who said, "I find walking too sleau.
    So I prances down the street
    And throw out my feet
    And trip my fantastical teau."

True Label: Limerick

Predicted Label: Limerick

Poem:
    There was a young maid from Japan
    Who married a Hottentot man.
    The girl she was yellow.
    And black was the fellow.
    And their children were all black and tan.

True Label: Limerick

Pred