In [183]:
# We have the necessary imports below
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModelForSequenceClassification, TrainingArguments, Trainer, DataCollatorForLanguageModeling, DataCollatorWithPadding
from datasets import Dataset
import torch
import evaluate
import pandas as pd
import numpy as np
from transformers import pipeline
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [184]:
# We are setting up the model, albert-base-v2, below specifically using SequenceClassification and id2label and label2id to go back and forth between labels and their encoding
model_name = "albert-base-v2"

tokenizer = AutoTokenizer.from_pretrained(model_name)

model = AutoModelForSequenceClassification.from_pretrained(model_name, id2label={0: "Non-Limerick", 1: "Limerick", 2: "5 Lines", 3: "Not 5 Lines", 4: "AABBA Rhyme Scheme", 5: "Not AABBA Rhyme Scheme"}, label2id={"Non-Limerick": 0, "Limerick": 1, "5 Lines": 2, "Not 5 Lines": 3, "AABBA Rhyme Scheme": 4, "Not AABBA Rhyme Scheme": 5})


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert-base-v2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [185]:
# We prepare the dataset by creating encodings specified below for classification labels and reasoning labels, tokenize the input information, etc. below
id2label={0: "Non-Limerick", 1: "Limerick", 2: "5 Lines", 3: "Not 5 Lines", 4: "AABBA Rhyme Scheme", 5: "Not AABBA Rhyme Scheme"}
encoding_length = len(id2label)
encoding_elements = id2label.items()

def prepare_dataset(poems, classification_labels, reasoning_labels):
    label_encoding = []
    for classification_label, reasoning_label in zip(classification_labels, reasoning_labels):
        labels = [0] * encoding_length
        for index, label in encoding_elements:
            if label == classification_label or label in reasoning_label.split(", "):
                labels[index] = 1
        label_encoding.append(labels)
    return Dataset.from_dict({"text": [f"Poem:\n{p}" for p in poems], "label": label_encoding})

def tokenize_function(examples, tokenizer, max_length=256):
    return tokenizer(examples["text"], truncation=True, padding="max_length", max_length=max_length)

questions_df = pd.read_csv("Copy of Fine_Tuning_Assignment - Limerick Classification.csv")

dataset = prepare_dataset(questions_df["Input (Poem)"], questions_df["Label (Limerick or Non-Limerick)"], questions_df["Reasoning"])

In [186]:
dataset

Dataset({
    features: ['text', 'label'],
    num_rows: 600
})

In [187]:
# We actually tokenize the dataset and then split the dataset into training and test sets (90-10 split)
tokenized_dataset = dataset.map(lambda examples: tokenize_function(examples, tokenizer), batched=True)
train_test = tokenized_dataset.train_test_split(test_size=0.1)

Map:   0%|          | 0/600 [00:00<?, ? examples/s]

In [188]:
train_test

DatasetDict({
    train: Dataset({
        features: ['text', 'label', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 540
    })
    test: Dataset({
        features: ['text', 'label', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 60
    })
})

In [189]:
dataset[0], tokenized_dataset[0]

({'text': "Poem:\nIf the Limerick's cocktail you 'd quaff,\nStir nonsense with wit, each a half,\nAdd a dash of good fun,\nDrop in a pun-\nAnd then make a noise like a laugh.",
  'label': [0, 1, 1, 0, 1, 0]},
 {'text': "Poem:\nIf the Limerick's cocktail you 'd quaff,\nStir nonsense with wit, each a half,\nAdd a dash of good fun,\nDrop in a pun-\nAnd then make a noise like a laugh.",
  'label': [0, 1, 1, 0, 1, 0],
  'input_ids': [2,
   4629,
   45,
   100,
   14,
   18185,
   22,
   18,
   18816,
   42,
   13,
   22,
   43,
   7131,
   2460,
   15,
   13216,
   13,
   16684,
   29,
   9642,
   15,
   206,
   21,
   519,
   15,
   3547,
   21,
   8405,
   16,
   254,
   2414,
   15,
   2804,
   19,
   21,
   11582,
   8,
   17,
   94,
   233,
   21,
   3406,
   101,
   21,
   3051,
   9,
   3,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,

In [190]:
# We use get_predictions to take in logits and then specifically set the indices corresponding to the highest value in the classification 
# indices (0, 1) to 1 and then the indices corresponding to the two highest values in the reasoning indices (2, 3, 4, 5) to 1 
# and then return this.
classification_indices = [0, 1]
reasoning_indices = [2, 3, 4, 5]

def get_predictions(input):
    input_shape = input.shape
    output = np.zeros(input_shape)
    for i in range(len(input)):
        top_class_index = np.argmax(input[i, classification_indices])
        output[i, classification_indices[top_class_index]] = 1
    for i in range(len(input)):
        reasoning_values = input[i, reasoning_indices]
        top_two_reasoning_indices = np.argsort(reasoning_values)[-2:]
        for index in top_two_reasoning_indices:
            output[i, reasoning_indices[index]] = 1
    return output

In [191]:
# We compute classification metrics (accuracy, precision, recall, and f1) using the sklearn library for both classification and reasoning parts
def compute_metrics(predictions):
    all_logits, all_labels = predictions
    final_predictions = get_predictions(all_logits)
    final_predictions = final_predictions.astype(int)
    all_labels = all_labels.astype(int)
    
    classification_predictions = [np.argmax(row[classification_indices]) for row in final_predictions]
    classification_labels = [np.argmax(row[classification_indices]) for row in all_labels]
    classification_results = {
        "Classification Accuracy": accuracy_score(classification_labels, classification_predictions),
        "Classification Precision": precision_score(classification_labels, classification_predictions, average="binary", zero_division=0),
        "Classification Recall": recall_score(classification_labels, classification_predictions, average="binary", zero_division=0),
        "Classification F1": f1_score(classification_labels, classification_predictions, average="binary", zero_division=0)
    }

    reasoning_predictions = final_predictions[:, reasoning_indices].flatten()
    reasoning_labels = all_labels[:, reasoning_indices].flatten()
    reasoning_results = {
        "Reasoning Accuracy": accuracy_score(reasoning_labels, reasoning_predictions),
        "Reasoning Precision": precision_score(reasoning_labels, reasoning_predictions, average="micro", zero_division=0),
        "Reasoning Recall": recall_score(reasoning_labels, reasoning_predictions, average="micro", zero_division=0),
        "Reasoning F1": f1_score(reasoning_labels, reasoning_predictions, average="micro", zero_division=0)
    }

    return {**classification_results, **reasoning_results}

In [192]:
# We have a custom trainer for binary class and multi-label/reason classification with weighted loss computations.
classification_indices = [0, 1]
reasoning_indices = [2, 3, 4, 5]

class BinaryClassMultiLabelTrainer(Trainer):
    def __init__(self, weights=None, **kwargs):
        super().__init__(**kwargs)
        self.weights = weights
        
    def compute_loss(self, model, inputs, return_outputs=False, **kwargs):
        target_labels = inputs.pop("labels")
        outputs = model(**inputs)
        predicted_logits = outputs[0]
        classification_loss = torch.nn.functional.cross_entropy(predicted_logits[:, classification_indices], target_labels[:, classification_indices])
        reasoning_loss = torch.nn.functional.binary_cross_entropy_with_logits(predicted_logits[:, reasoning_indices], target_labels[:, reasoning_indices])
        loss = self.weights[0] * classification_loss + self.weights[1] * reasoning_loss
        if return_outputs:
            return (loss, outputs)
        else:
            return loss

In [193]:
# Define training arguments
output_dir = "./fine_tuned_albert"

# Set up training arguments
training_args = TrainingArguments(
    output_dir=output_dir,
    per_device_train_batch_size=8,
    num_train_epochs=5,
    save_steps=1000,
    save_total_limit=2,
    logging_dir=f"{output_dir}/logs",
    logging_steps=10,
    learning_rate=2e-5,
    warmup_steps=5,
    weight_decay=0.01,
    evaluation_strategy="epoch"
)



In [194]:
# We initialize trainer
trainer = BinaryClassMultiLabelTrainer(
    model=model,
    args=training_args,
    train_dataset=train_test["train"],
    eval_dataset=train_test["test"],
    compute_metrics=compute_metrics,
    weights=(0.7, 4)
)
trainer.can_return_loss = True

# We start training
trainer.train()

# We save the trained model and evaluate the results
trainer.save_model("./fine_tuned_albert")
tokenizer.save_pretrained("./fine_tuned_albert")

test_results = trainer.evaluate()
print("Test Results:", test_results)

Epoch,Training Loss,Validation Loss,Classification accuracy,Classification precision,Classification recall,Classification f1,Reasoning accuracy,Reasoning precision,Reasoning recall,Reasoning f1
1,2.0656,2.333874,0.583333,0.0,0.0,0.0,0.741667,0.741667,0.741667,0.741667
2,1.6325,1.561169,0.733333,0.666667,0.72,0.692308,0.875,0.875,0.875,0.875
3,1.6087,1.516489,0.766667,0.689655,0.8,0.740741,0.85,0.85,0.85,0.85
4,1.2265,1.444254,0.766667,0.689655,0.8,0.740741,0.858333,0.858333,0.858333,0.858333
5,1.1661,1.487968,0.733333,0.666667,0.72,0.692308,0.841667,0.841667,0.841667,0.841667


Test Results: {'eval_loss': 1.4879677295684814, 'eval_Classification Accuracy': 0.7333333333333333, 'eval_Classification Precision': 0.6666666666666666, 'eval_Classification Recall': 0.72, 'eval_Classification F1': 0.6923076923076923, 'eval_Reasoning Accuracy': 0.8416666666666667, 'eval_Reasoning Precision': 0.8416666666666667, 'eval_Reasoning Recall': 0.8416666666666667, 'eval_Reasoning F1': 0.8416666666666667, 'eval_runtime': 1.1682, 'eval_samples_per_second': 51.36, 'eval_steps_per_second': 6.848, 'epoch': 5.0}


In [195]:
# We are setting up the base version of the same model without the fine tuning for comparison purposes
model_name = "albert-base-v2"
finetuned_model_path = "./fine_tuned_albert"

tokenizer = AutoTokenizer.from_pretrained(model_name)

# We test the model on new examples that were not in our dataset
new_test_examples = [
    # Limerick
    '''Poem:
    A cannibal monarch imperial
    Kept his wives on a diet of cereal,
    But he didn't much care
    What the women should wear,
    Nor did they; it was quite immaterial.''',
    # Limerick:
    '''Poem:
    There once was a foppish old beau,
    Who said, "I find walking too sleau.
    So I prances down the street
    And throw out my feet
    And trip my fantastical teau."''',
    # Limerick:
    '''Poem:
    There was a young maid from Japan
    Who married a Hottentot man.
    The girl she was yellow.
    And black was the fellow.
    And their children were all black and tan.''',
    # Limerick:
    '''Poem:
    There was a poor fellow from Lynn,
    By accident sat on a pynn,
    He let out a shriek,
    A howl and a squiek.
    And his language was really a synn.''',
    #Limerick
    '''Poem:
    Professor, you should be commended
    On your theory so geniusly splendid.
    But some say it's luck,
    And you really just suck,
    'Cause your theory's not what you intended!''',
    # Limerick
    '''Poem:
    There once was a classical theory
    Of which quantum disciples were leery.
    They said, ‚ÄúWhy spend so long
    On a theory that‚Äôs wrong?‚Äù
    Well, it works for your everyday query!''',
    # Limerick
    '''Poem:
    Consider, when seeking gestalts,
    The theories that science exalts.
    It's not that they're known
    To be written in stone.
    It's just that we can't say they're false.''',
    # Limerick
    '''Poem:
    God's first tries were hardly ideal,
    You see, complex worlds have no appeal.
    In the present edition,
    He made things Hermitian,
    And this world, it seems, is quite real.''',
    # Non-Limerick
    '''Poem:
    We need to take care of the one world we live in!''',
    # Non-Limerick
    '''Poem:
    In familiar bed,
    hands reaching into the light.
    Soul blossoms tonight.''',
    # Non-Limerick
    '''Poem:
    Prayers are good wishes
    rising up to the realm of
    possibilities.''',
    # Non-Limerick
    '''Poem:
    Once more the storm is howling, and half hid
    Under this cradle-hood and coverlid
    My child sleeps on. There is no obstacle
    But Gregory's wood and one bare hill
    Whereby the haystack- and roof-levelling wind,
    Bred on the Atlantic, can be stayed;
    And for an hour I have walked and prayed
    Because of the great gloom that is in my mind.
    I have walked and prayed for this young child an hour
    And heard the sea-wind scream upon the tower,
    And under the arches of the bridge, and scream
    In the elms above the flooded stream;
    Imagining in excited reverie
    That the future years had come,
    Dancing to a frenzied drum,
    Out of the murderous innocence of the sea.''',
    # Non-Limerick
    '''Poem:
    May she be granted beauty and yet not
    Beauty to make a stranger's eye distraught,
    Or hers before a looking-glass, for such,
    Being made beautiful overmuch,
    Consider beauty a sufficient end,
    Lose natural kindness and maybe
    The heart-revealing intimacy
    That chooses right, and never find a friend.
    Helen being chosen found life flat and dull
    And later had much trouble from a fool,
    While that great Queen, that rose out of the spray,
    Being fatherless could have her way
    Yet chose a bandy-legg√®d smith for man.
    It's certain that fine women eat
    A crazy salad with their meat
    Whereby the Horn of Plenty is undone.''',
    # Non-Limerick
    '''Poem:
    A cannibal monarch
    Kept his wives on a diet,
    But he didn't much care
    What the women should look like
    Nor did they; it was quite immaterial.''',
    # Non-Limerick
    '''Poem:
    There was a poor fellow,
    By accident sat on a pynn,
    He yelled out loud,
    A howl and a squiek.
    And his language was really a curse.''',
    # Non-Limerick
    '''Poem:
    There once was a
    Of which quantum.
    They said,
    On a theory
    Well, it works'''
]

id2label={0: "Non-Limerick", 1: "Limerick", 2: "5 Lines", 3: "Not 5 Lines", 4: "AABBA Rhyme Scheme", 5: "Not AABBA Rhyme Scheme"}
label2id={"Non-Limerick": 0, "Limerick": 1, "5 Lines": 2, "Not 5 Lines": 3, "AABBA Rhyme Scheme": 4, "Not AABBA Rhyme Scheme": 5}

# We need to have the model make the predictions and then return these predictions as output
def decode_predictions(model, tokenizer, new_test_examples, id2label):
    tokenized_input = tokenizer(new_test_examples, truncation=True, padding="max_length", max_length=256, return_tensors="pt")
    predictions = get_predictions(model(**tokenized_input).logits.detach().numpy())
    decoded_predictions = []
    for row in predictions:
        decoded_row = []
        for i, label in enumerate(row):
            if label == 1:
                decoded_row.append(id2label[i])
        decoded_predictions.append(decoded_row)
    return decoded_predictions

# Our baseline model
base_model = AutoModelForSequenceClassification.from_pretrained(model_name, id2label=id2label, label2id=label2id)
# Our finetuned model
finetuned_model = AutoModelForSequenceClassification.from_pretrained(finetuned_model_path)

# We get predictions for both models
base_model_predictions = decode_predictions(base_model, tokenizer, new_test_examples, id2label)
finetuned_model_predictions = decode_predictions(finetuned_model, tokenizer, new_test_examples, id2label)

# We print the results
print("Base Model Predictions:")
for text, prediction in zip(new_test_examples, base_model_predictions):
    print(f"Input: {text}")
    print(f"Output: {prediction}\n")

print("Fine-Tuned Model Predictions:")
for text, prediction in zip(new_test_examples, finetuned_model_predictions):
    print(f"Input: {text}")
    print(f"Output: {prediction}\n")

Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert-base-v2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Base Model Predictions:
Input: Poem:
    A cannibal monarch imperial
    Kept his wives on a diet of cereal,
    But he didn't much care
    What the women should wear,
    Nor did they; it was quite immaterial.
Output: ['Non-Limerick', '5 Lines', 'Not AABBA Rhyme Scheme']

Input: Poem:
    There once was a foppish old beau,
    Who said, "I find walking too sleau.
    So I prances down the street
    And throw out my feet
    And trip my fantastical teau."
Output: ['Non-Limerick', '5 Lines', 'Not AABBA Rhyme Scheme']

Input: Poem:
    There was a young maid from Japan
    Who married a Hottentot man.
    The girl she was yellow.
    And black was the fellow.
    And their children were all black and tan.
Output: ['Non-Limerick', '5 Lines', 'Not AABBA Rhyme Scheme']

Input: Poem:
    There was a poor fellow from Lynn,
    By accident sat on a pynn,
    He let out a shriek,
    A howl and a squiek.
    And his language was really a synn.
Output: ['Non-Limerick', '5 Lines', 'Not AABBA Rh

In [197]:
# We are setting up the base version of the same model without the fine tuning for comparison purposes
model_name = "albert-base-v2"
finetuned_model_path = "./fine_tuned_albert"

tokenizer = AutoTokenizer.from_pretrained(model_name)

# We test the model on new examples that were not in our dataset
new_test_examples = [
    # Limerick
    '''Poem:
    God's first tries were hardly ideal,
    You see, complex worlds have no appeal.
    In the present edition,
    He made things Hermitian,
    And this world, it seems, is quite real.''',
    # Limerick:
    '''Poem:
    God's first tries were hardly ideal,
    You see, complex worlds have no appeal.
    In the present edition,
    He made things Hermitian,
    And this world, it seems, is quite real.''',
    # Limerick:
    '''Poem:
    God's first tries were hardly ideal,
    You see, complex worlds have no appeal.
    In the present edition,
    He made things Hermitian,
    And this world, it seems, is quite real.''',
    # Limerick:
    '''Poem:
    God's first tries were hardly ideal,
    You see, complex worlds have no appeal.
    In the present edition,
    He made things Hermitian,
    And this world, it seems, is quite real.''',
    #Limerick
    '''Poem:
    God's first tries were hardly ideal,
    You see, complex worlds have no appeal.
    In the present edition,
    He made things Hermitian,
    And this world, it seems, is quite real.''',
    # Limerick
    '''Poem:
    God's first tries were hardly ideal,
    You see, complex worlds have no appeal.
    In the present edition,
    He made things Hermitian,
    And this world, it seems, is quite real.''',
    # Limerick
    '''Poem:
    God's first tries were hardly ideal,
    You see, complex worlds have no appeal.
    In the present edition,
    He made things Hermitian,
    And this world, it seems, is quite real.''',
    # Limerick
    '''Poem:
    God's first tries were hardly ideal,
    You see, complex worlds have no appeal.
    In the present edition,
    He made things Hermitian,
    And this world, it seems, is quite real.''',
    # Non-Limerick
    '''Poem:
    There once was a
    Of which quantum.
    They said,
    On a theory
    Well, it works''',
    # Non-Limerick
    '''Poem:
    There once was a
    Of which quantum.
    They said,
    On a theory
    Well, it works''',
    # Non-Limerick
    '''Poem:
    There once was a
    Of which quantum.
    They said,
    On a theory
    Well, it works''',
    # Non-Limerick
    '''Poem:
    There once was a
    Of which quantum.
    They said,
    On a theory
    Well, it works''',
    # Non-Limerick
    '''Poem:
    There once was a
    Of which quantum.
    They said,
    On a theory
    Well, it works''',
    # Non-Limerick
    '''Poem:
    There once was a
    Of which quantum.
    They said,
    On a theory
    Well, it works''',
    # Non-Limerick
    '''Poem:
    There once was a
    Of which quantum.
    They said,
    On a theory
    Well, it works''',
    # Non-Limerick
    '''Poem:
    There once was a
    Of which quantum.
    They said,
    On a theory
    Well, it works''',
]

id2label={0: "Non-Limerick", 1: "Limerick", 2: "5 Lines", 3: "Not 5 Lines", 4: "AABBA Rhyme Scheme", 5: "Not AABBA Rhyme Scheme"}
label2id={"Non-Limerick": 0, "Limerick": 1, "5 Lines": 2, "Not 5 Lines": 3, "AABBA Rhyme Scheme": 4, "Not AABBA Rhyme Scheme": 5}

# We need to have the model make the predictions and then return these predictions as output
def decode_predictions(model, tokenizer, new_test_examples, id2label):
    tokenized_input = tokenizer(new_test_examples, truncation=True, padding="max_length", max_length=256, return_tensors="pt")
    predictions = get_predictions(model(**tokenized_input).logits.detach().numpy())
    decoded_predictions = []
    for row in predictions:
        decoded_row = []
        for i, label in enumerate(row):
            if label == 1:
                decoded_row.append(id2label[i])
        decoded_predictions.append(decoded_row)
    return decoded_predictions

# Our baseline model
base_model = AutoModelForSequenceClassification.from_pretrained(model_name, id2label=id2label, label2id=label2id)
# Our finetuned model
finetuned_model = AutoModelForSequenceClassification.from_pretrained(finetuned_model_path)

# We get predictions for both models
base_model_predictions = decode_predictions(base_model, tokenizer, new_test_examples, id2label)
finetuned_model_predictions = decode_predictions(finetuned_model, tokenizer, new_test_examples, id2label)

# We print the results
print("Base Model Predictions:")
for text, prediction in zip(new_test_examples, base_model_predictions):
    print(f"Input: {text}")
    print(f"Output: {prediction}\n")

print("Fine-Tuned Model Predictions:")
for text, prediction in zip(new_test_examples, finetuned_model_predictions):
    print(f"Input: {text}")
    print(f"Output: {prediction}\n")

Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert-base-v2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Base Model Predictions:
Input: Poem:
    God's first tries were hardly ideal,
    You see, complex worlds have no appeal.
    In the present edition,
    He made things Hermitian,
    And this world, it seems, is quite real.
Output: ['Limerick', 'Not 5 Lines', 'AABBA Rhyme Scheme']

Input: Poem:
    God's first tries were hardly ideal,
    You see, complex worlds have no appeal.
    In the present edition,
    He made things Hermitian,
    And this world, it seems, is quite real.
Output: ['Limerick', 'Not 5 Lines', 'AABBA Rhyme Scheme']

Input: Poem:
    God's first tries were hardly ideal,
    You see, complex worlds have no appeal.
    In the present edition,
    He made things Hermitian,
    And this world, it seems, is quite real.
Output: ['Limerick', 'Not 5 Lines', 'AABBA Rhyme Scheme']

Input: Poem:
    God's first tries were hardly ideal,
    You see, complex worlds have no appeal.
    In the present edition,
    He made things Hermitian,
    And this world, it seems, is quite r

In [102]:
# We are setting up the model, google/electra-base-discriminator, below specifically using SequenceClassification and id2label and label2id to go back and forth between labels and their encoding
model_name = "google/electra-base-discriminator"

tokenizer = AutoTokenizer.from_pretrained(model_name)

model = AutoModelForSequenceClassification.from_pretrained(model_name, id2label={0: "Non-Limerick", 1: "Limerick", 2: "5 Lines", 3: "Not 5 Lines", 4: "AABBA Rhyme Scheme", 5: "Not AABBA Rhyme Scheme"}, label2id={"Non-Limerick": 0, "Limerick": 1, "5 Lines": 2, "Not 5 Lines": 3, "AABBA Rhyme Scheme": 4, "Not AABBA Rhyme Scheme": 5})


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-base-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [103]:
# We prepare the dataset by creating encodings specified below for classification labels and reasoning labels, tokenize the input information, etc. below
id2label={0: "Non-Limerick", 1: "Limerick", 2: "5 Lines", 3: "Not 5 Lines", 4: "AABBA Rhyme Scheme", 5: "Not AABBA Rhyme Scheme"}
encoding_length = len(id2label)
encoding_elements = id2label.items()

def prepare_dataset(poems, classification_labels, reasoning_labels):
    label_encoding = []
    for classification_label, reasoning_label in zip(classification_labels, reasoning_labels):
        labels = [0] * encoding_length
        for index, label in encoding_elements:
            if label == classification_label or label in reasoning_label.split(", "):
                labels[index] = 1
        label_encoding.append(labels)
    return Dataset.from_dict({"text": [f"Poem:\n{p}" for p in poems], "label": label_encoding})

def tokenize_function(examples, tokenizer, max_length=256):
    return tokenizer(examples["text"], truncation=True, padding="max_length", max_length=max_length)

questions_df = pd.read_csv("Copy of Fine_Tuning_Assignment - Limerick Classification.csv")

dataset = prepare_dataset(questions_df["Input (Poem)"], questions_df["Label (Limerick or Non-Limerick)"], questions_df["Reasoning"])

In [104]:
dataset

Dataset({
    features: ['text', 'label'],
    num_rows: 600
})

In [105]:
# We actually tokenize the dataset and then split the dataset into training and test sets (90-10 split)
tokenized_dataset = dataset.map(lambda examples: tokenize_function(examples, tokenizer), batched=True)
train_test = tokenized_dataset.train_test_split(test_size=0.1)

Map:   0%|          | 0/600 [00:00<?, ? examples/s]

In [106]:
train_test

DatasetDict({
    train: Dataset({
        features: ['text', 'label', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 540
    })
    test: Dataset({
        features: ['text', 'label', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 60
    })
})

In [107]:
dataset[0], tokenized_dataset[0]

({'text': "Poem:\nIf the Limerick's cocktail you 'd quaff,\nStir nonsense with wit, each a half,\nAdd a dash of good fun,\nDrop in a pun-\nAnd then make a noise like a laugh.",
  'label': [0, 1, 1, 0, 1, 0]},
 {'text': "Poem:\nIf the Limerick's cocktail you 'd quaff,\nStir nonsense with wit, each a half,\nAdd a dash of good fun,\nDrop in a pun-\nAnd then make a noise like a laugh.",
  'label': [0, 1, 1, 0, 1, 0],
  'input_ids': [101,
   5961,
   1024,
   2065,
   1996,
   15679,
   1005,
   1055,
   18901,
   2017,
   1005,
   1040,
   24209,
   10354,
   2546,
   1010,
   16130,
   14652,
   2007,
   15966,
   1010,
   2169,
   1037,
   2431,
   1010,
   5587,
   1037,
   11454,
   1997,
   2204,
   4569,
   1010,
   4530,
   1999,
   1037,
   26136,
   1011,
   1998,
   2059,
   2191,
   1037,
   5005,
   2066,
   1037,
   4756,
   1012,
   102,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
  

In [108]:
# We use get_predictions to take in logits and then specifically set the indices corresponding to the highest value in the classification 
# indices (0, 1) to 1 and then the indices corresponding to the two highest values in the reasoning indices (2, 3, 4, 5) to 1 
# and then return this.
classification_indices = [0, 1]
reasoning_indices = [2, 3, 4, 5]

def get_predictions(input):
    input_shape = input.shape
    output = np.zeros(input_shape)
    for i in range(len(input)):
        top_class_index = np.argmax(input[i, classification_indices])
        output[i, classification_indices[top_class_index]] = 1
    for i in range(len(input)):
        reasoning_values = input[i, reasoning_indices]
        top_two_reasoning_indices = np.argsort(reasoning_values)[-2:]
        for index in top_two_reasoning_indices:
            output[i, reasoning_indices[index]] = 1
    return output

In [109]:
# We compute classification metrics (accuracy, precision, recall, and f1) using the sklearn library for both classification and reasoning parts
def compute_metrics(predictions):
    all_logits, all_labels = predictions
    final_predictions = get_predictions(all_logits)
    final_predictions = final_predictions.astype(int)
    all_labels = all_labels.astype(int)
    
    classification_predictions = [np.argmax(row[classification_indices]) for row in final_predictions]
    classification_labels = [np.argmax(row[classification_indices]) for row in all_labels]
    classification_results = {
        "Classification Accuracy": accuracy_score(classification_labels, classification_predictions),
        "Classification Precision": precision_score(classification_labels, classification_predictions, average="binary", zero_division=0),
        "Classification Recall": recall_score(classification_labels, classification_predictions, average="binary", zero_division=0),
        "Classification F1": f1_score(classification_labels, classification_predictions, average="binary", zero_division=0)
    }

    reasoning_predictions = final_predictions[:, reasoning_indices].flatten()
    reasoning_labels = all_labels[:, reasoning_indices].flatten()
    reasoning_results = {
        "Reasoning Accuracy": accuracy_score(reasoning_labels, reasoning_predictions),
        "Reasoning Precision": precision_score(reasoning_labels, reasoning_predictions, average="micro", zero_division=0),
        "Reasoning Recall": recall_score(reasoning_labels, reasoning_predictions, average="micro", zero_division=0),
        "Reasoning F1": f1_score(reasoning_labels, reasoning_predictions, average="micro", zero_division=0)
    }

    return {**classification_results, **reasoning_results}

In [110]:
# We have a custom trainer for binary class and multi-label/reason classification with weighted loss computations.
classification_indices = [0, 1]
reasoning_indices = [2, 3, 4, 5]

class BinaryClassMultiLabelTrainer(Trainer):
    def __init__(self, weights=None, **kwargs):
        super().__init__(**kwargs)
        self.weights = weights
        
    def compute_loss(self, model, inputs, return_outputs=False, **kwargs):
        target_labels = inputs.pop("labels")
        outputs = model(**inputs)
        predicted_logits = outputs[0]
        classification_loss = torch.nn.functional.cross_entropy(predicted_logits[:, classification_indices], target_labels[:, classification_indices])
        reasoning_loss = torch.nn.functional.binary_cross_entropy_with_logits(predicted_logits[:, reasoning_indices], target_labels[:, reasoning_indices])
        loss = self.weights[0] * classification_loss + self.weights[1] * reasoning_loss
        if return_outputs:
            return (loss, outputs)
        else:
            return loss

In [111]:
# Define training arguments
output_dir = "./fine_tuned_electra"

# Set up training arguments
training_args = TrainingArguments(
    output_dir=output_dir,
    per_device_train_batch_size=8,
    num_train_epochs=5,
    save_steps=1000,
    save_total_limit=2,
    logging_dir=f"{output_dir}/logs",
    logging_steps=10,
    learning_rate=2e-5,
    warmup_steps=5,
    weight_decay=0.01,
    evaluation_strategy="epoch"
)



In [112]:
# We initialize trainer
trainer = BinaryClassMultiLabelTrainer(
    model=model,
    args=training_args,
    train_dataset=train_test["train"],
    eval_dataset=train_test["test"],
    compute_metrics=compute_metrics,
    weights=(0.7, 4)
)
trainer.can_return_loss = True

# We start training
trainer.train()

# We save the trained model and evaluate the results
trainer.save_model("./fine_tuned_electra")
tokenizer.save_pretrained("./fine_tuned_electra")

test_results = trainer.evaluate()
print("Test Results:", test_results)

Epoch,Training Loss,Validation Loss,Classification accuracy,Classification precision,Classification recall,Classification f1,Reasoning accuracy,Reasoning precision,Reasoning recall,Reasoning f1
1,2.4253,2.309034,0.633333,0.8,0.16,0.266667,0.75,0.75,0.75,0.75
2,1.7609,1.715516,0.8,0.685714,0.96,0.8,0.85,0.85,0.85,0.85
3,1.6729,1.463872,0.8,0.685714,0.96,0.8,0.891667,0.891667,0.891667,0.891667
4,1.2949,1.419877,0.8,0.685714,0.96,0.8,0.891667,0.891667,0.891667,0.891667
5,1.3713,1.432367,0.8,0.685714,0.96,0.8,0.875,0.875,0.875,0.875


Test Results: {'eval_loss': 1.432367205619812, 'eval_Classification Accuracy': 0.8, 'eval_Classification Precision': 0.6857142857142857, 'eval_Classification Recall': 0.96, 'eval_Classification F1': 0.7999999999999999, 'eval_Reasoning Accuracy': 0.875, 'eval_Reasoning Precision': 0.875, 'eval_Reasoning Recall': 0.875, 'eval_Reasoning F1': 0.875, 'eval_runtime': 0.9763, 'eval_samples_per_second': 61.459, 'eval_steps_per_second': 8.195, 'epoch': 5.0}


In [113]:
# We are setting up the base version of the same model without the fine tuning for comparison purposes
model_name = "google/electra-base-discriminator"
finetuned_model_path = "./fine_tuned_electra"

tokenizer = AutoTokenizer.from_pretrained(model_name)

# We test the model on new examples that were not in our dataset
new_test_examples = [
    # Limerick
    '''Poem:
    A cannibal monarch imperial
    Kept his wives on a diet of cereal,
    But he didn't much care
    What the women should wear,
    Nor did they; it was quite immaterial.''',
    # Limerick:
    '''Poem:
    There once was a foppish old beau,
    Who said, "I find walking too sleau.
    So I prances down the street
    And throw out my feet
    And trip my fantastical teau."''',
    # Limerick:
    '''Poem:
    There was a young maid from Japan
    Who married a Hottentot man.
    The girl she was yellow.
    And black was the fellow.
    And their children were all black and tan.''',
    # Limerick:
    '''Poem:
    There was a poor fellow from Lynn,
    By accident sat on a pynn,
    He let out a shriek,
    A howl and a squiek.
    And his language was really a synn.''',
    #Limerick
    '''Poem:
    Professor, you should be commended
    On your theory so geniusly splendid.
    But some say it's luck,
    And you really just suck,
    'Cause your theory's not what you intended!''',
    # Limerick
    '''Poem:
    There once was a classical theory
    Of which quantum disciples were leery.
    They said, ‚ÄúWhy spend so long
    On a theory that‚Äôs wrong?‚Äù
    Well, it works for your everyday query!''',
    # Limerick
    '''Poem:
    Consider, when seeking gestalts,
    The theories that science exalts.
    It's not that they're known
    To be written in stone.
    It's just that we can't say they're false.''',
    # Limerick
    '''Poem:
    God's first tries were hardly ideal,
    You see, complex worlds have no appeal.
    In the present edition,
    He made things Hermitian,
    And this world, it seems, is quite real.''',
    # Non-Limerick
    '''Poem:
    We need to take care of the one world we live in!''',
    # Non-Limerick
    '''Poem:
    In familiar bed,
    hands reaching into the light.
    Soul blossoms tonight.''',
    # Non-Limerick
    '''Poem:
    Prayers are good wishes
    rising up to the realm of
    possibilities.''',
    # Non-Limerick
    '''Poem:
    Once more the storm is howling, and half hid
    Under this cradle-hood and coverlid
    My child sleeps on. There is no obstacle
    But Gregory's wood and one bare hill
    Whereby the haystack- and roof-levelling wind,
    Bred on the Atlantic, can be stayed;
    And for an hour I have walked and prayed
    Because of the great gloom that is in my mind.
    I have walked and prayed for this young child an hour
    And heard the sea-wind scream upon the tower,
    And under the arches of the bridge, and scream
    In the elms above the flooded stream;
    Imagining in excited reverie
    That the future years had come,
    Dancing to a frenzied drum,
    Out of the murderous innocence of the sea.''',
    # Non-Limerick
    '''Poem:
    May she be granted beauty and yet not
    Beauty to make a stranger's eye distraught,
    Or hers before a looking-glass, for such,
    Being made beautiful overmuch,
    Consider beauty a sufficient end,
    Lose natural kindness and maybe
    The heart-revealing intimacy
    That chooses right, and never find a friend.
    Helen being chosen found life flat and dull
    And later had much trouble from a fool,
    While that great Queen, that rose out of the spray,
    Being fatherless could have her way
    Yet chose a bandy-legg√®d smith for man.
    It's certain that fine women eat
    A crazy salad with their meat
    Whereby the Horn of Plenty is undone.''',
    # Non-Limerick
    '''Poem:
    A cannibal monarch
    Kept his wives on a diet,
    But he didn't much care
    What the women should look like
    Nor did they; it was quite immaterial.''',
    # Non-Limerick
    '''Poem:
    There was a poor fellow,
    By accident sat on a pynn,
    He yelled out loud,
    A howl and a squiek.
    And his language was really a curse.''',
    # Non-Limerick
    '''Poem:
    There once was a
    Of which quantum.
    They said,
    On a theory
    Well, it works'''
]

id2label={0: "Non-Limerick", 1: "Limerick", 2: "5 Lines", 3: "Not 5 Lines", 4: "AABBA Rhyme Scheme", 5: "Not AABBA Rhyme Scheme"}
label2id={"Non-Limerick": 0, "Limerick": 1, "5 Lines": 2, "Not 5 Lines": 3, "AABBA Rhyme Scheme": 4, "Not AABBA Rhyme Scheme": 5}

# We need to have the model make the predictions and then return these predictions as output
def decode_predictions(model, tokenizer, new_test_examples, id2label):
    tokenized_input = tokenizer(new_test_examples, truncation=True, padding="max_length", max_length=256, return_tensors="pt")
    predictions = get_predictions(model(**tokenized_input).logits.detach().numpy())
    decoded_predictions = []
    for row in predictions:
        decoded_row = []
        for i, label in enumerate(row):
            if label == 1:
                decoded_row.append(id2label[i])
        decoded_predictions.append(decoded_row)
    return decoded_predictions

# Our baseline model
base_model = AutoModelForSequenceClassification.from_pretrained(model_name, id2label=id2label, label2id=label2id)
# Our finetuned model
finetuned_model = AutoModelForSequenceClassification.from_pretrained(finetuned_model_path)

# We get predictions for both models
base_model_predictions = decode_predictions(base_model, tokenizer, new_test_examples, id2label)
finetuned_model_predictions = decode_predictions(finetuned_model, tokenizer, new_test_examples, id2label)

# We print the results
print("Base Model Predictions:")
for text, prediction in zip(new_test_examples, base_model_predictions):
    print(f"Input: {text}")
    print(f"Output: {prediction}\n")

print("Fine-Tuned Model Predictions:")
for text, prediction in zip(new_test_examples, finetuned_model_predictions):
    print(f"Input: {text}")
    print(f"Output: {prediction}\n")

Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-base-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Base Model Predictions:
Input: Poem:
    A cannibal monarch imperial
    Kept his wives on a diet of cereal,
    But he didn't much care
    What the women should wear,
    Nor did they; it was quite immaterial.
Output: ['Non-Limerick', 'Not 5 Lines', 'AABBA Rhyme Scheme']

Input: Poem:
    There once was a foppish old beau,
    Who said, "I find walking too sleau.
    So I prances down the street
    And throw out my feet
    And trip my fantastical teau."
Output: ['Non-Limerick', 'Not 5 Lines', 'AABBA Rhyme Scheme']

Input: Poem:
    There was a young maid from Japan
    Who married a Hottentot man.
    The girl she was yellow.
    And black was the fellow.
    And their children were all black and tan.
Output: ['Non-Limerick', 'Not 5 Lines', 'AABBA Rhyme Scheme']

Input: Poem:
    There was a poor fellow from Lynn,
    By accident sat on a pynn,
    He let out a shriek,
    A howl and a squiek.
    And his language was really a synn.
Output: ['Limerick', 'Not 5 Lines', 'AABBA Rhyme 

In [198]:
# We are setting up the base version of the same model without the fine tuning for comparison purposes
model_name = "google/electra-base-discriminator"
finetuned_model_path = "./fine_tuned_electra"

tokenizer = AutoTokenizer.from_pretrained(model_name)

# We test the model on new examples that were not in our dataset
new_test_examples = [
    # Limerick
    '''Poem:
    God's first tries were hardly ideal,
    You see, complex worlds have no appeal.
    In the present edition,
    He made things Hermitian,
    And this world, it seems, is quite real.''',
    # Limerick:
    '''Poem:
    God's first tries were hardly ideal,
    You see, complex worlds have no appeal.
    In the present edition,
    He made things Hermitian,
    And this world, it seems, is quite real.''',
    # Limerick:
    '''Poem:
    God's first tries were hardly ideal,
    You see, complex worlds have no appeal.
    In the present edition,
    He made things Hermitian,
    And this world, it seems, is quite real.''',
    # Limerick:
    '''Poem:
    God's first tries were hardly ideal,
    You see, complex worlds have no appeal.
    In the present edition,
    He made things Hermitian,
    And this world, it seems, is quite real.''',
    #Limerick
    '''Poem:
    God's first tries were hardly ideal,
    You see, complex worlds have no appeal.
    In the present edition,
    He made things Hermitian,
    And this world, it seems, is quite real.''',
    # Limerick
    '''Poem:
    God's first tries were hardly ideal,
    You see, complex worlds have no appeal.
    In the present edition,
    He made things Hermitian,
    And this world, it seems, is quite real.''',
    # Limerick
    '''Poem:
    God's first tries were hardly ideal,
    You see, complex worlds have no appeal.
    In the present edition,
    He made things Hermitian,
    And this world, it seems, is quite real.''',
    # Limerick
    '''Poem:
    God's first tries were hardly ideal,
    You see, complex worlds have no appeal.
    In the present edition,
    He made things Hermitian,
    And this world, it seems, is quite real.''',
    # Non-Limerick
    '''Poem:
    There once was a
    Of which quantum.
    They said,
    On a theory
    Well, it works''',
    # Non-Limerick
    '''Poem:
    There once was a
    Of which quantum.
    They said,
    On a theory
    Well, it works''',
    # Non-Limerick
    '''Poem:
    There once was a
    Of which quantum.
    They said,
    On a theory
    Well, it works''',
    # Non-Limerick
    '''Poem:
    There once was a
    Of which quantum.
    They said,
    On a theory
    Well, it works''',
    # Non-Limerick
    '''Poem:
    There once was a
    Of which quantum.
    They said,
    On a theory
    Well, it works''',
    # Non-Limerick
    '''Poem:
    There once was a
    Of which quantum.
    They said,
    On a theory
    Well, it works''',
    # Non-Limerick
    '''Poem:
    There once was a
    Of which quantum.
    They said,
    On a theory
    Well, it works''',
    # Non-Limerick
    '''Poem:
    There once was a
    Of which quantum.
    They said,
    On a theory
    Well, it works''',
]

id2label={0: "Non-Limerick", 1: "Limerick", 2: "5 Lines", 3: "Not 5 Lines", 4: "AABBA Rhyme Scheme", 5: "Not AABBA Rhyme Scheme"}
label2id={"Non-Limerick": 0, "Limerick": 1, "5 Lines": 2, "Not 5 Lines": 3, "AABBA Rhyme Scheme": 4, "Not AABBA Rhyme Scheme": 5}

# We need to have the model make the predictions and then return these predictions as output
def decode_predictions(model, tokenizer, new_test_examples, id2label):
    tokenized_input = tokenizer(new_test_examples, truncation=True, padding="max_length", max_length=256, return_tensors="pt")
    predictions = get_predictions(model(**tokenized_input).logits.detach().numpy())
    decoded_predictions = []
    for row in predictions:
        decoded_row = []
        for i, label in enumerate(row):
            if label == 1:
                decoded_row.append(id2label[i])
        decoded_predictions.append(decoded_row)
    return decoded_predictions

# Our baseline model
base_model = AutoModelForSequenceClassification.from_pretrained(model_name, id2label=id2label, label2id=label2id)
# Our finetuned model
finetuned_model = AutoModelForSequenceClassification.from_pretrained(finetuned_model_path)

# We get predictions for both models
base_model_predictions = decode_predictions(base_model, tokenizer, new_test_examples, id2label)
finetuned_model_predictions = decode_predictions(finetuned_model, tokenizer, new_test_examples, id2label)

# We print the results
print("Base Model Predictions:")
for text, prediction in zip(new_test_examples, base_model_predictions):
    print(f"Input: {text}")
    print(f"Output: {prediction}\n")

print("Fine-Tuned Model Predictions:")
for text, prediction in zip(new_test_examples, finetuned_model_predictions):
    print(f"Input: {text}")
    print(f"Output: {prediction}\n")

Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-base-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Base Model Predictions:
Input: Poem:
    God's first tries were hardly ideal,
    You see, complex worlds have no appeal.
    In the present edition,
    He made things Hermitian,
    And this world, it seems, is quite real.
Output: ['Limerick', 'Not 5 Lines', 'AABBA Rhyme Scheme']

Input: Poem:
    God's first tries were hardly ideal,
    You see, complex worlds have no appeal.
    In the present edition,
    He made things Hermitian,
    And this world, it seems, is quite real.
Output: ['Limerick', 'Not 5 Lines', 'AABBA Rhyme Scheme']

Input: Poem:
    God's first tries were hardly ideal,
    You see, complex worlds have no appeal.
    In the present edition,
    He made things Hermitian,
    And this world, it seems, is quite real.
Output: ['Limerick', 'Not 5 Lines', 'AABBA Rhyme Scheme']

Input: Poem:
    God's first tries were hardly ideal,
    You see, complex worlds have no appeal.
    In the present edition,
    He made things Hermitian,
    And this world, it seems, is quite r