# Clickbait Spoiler Generation using GPT-3

In [1]:
# This is necessary to fix the imports
import os
import sys
sys.path.append(os.path.abspath(os.path.join('../src')))

In [2]:
import models.gpt3 as gpt3
import utils.openai

from prepare_data_openai import OPENAI_MODEL

In [3]:
MODEL_ID = "ada:ft-personal:clickbait-spoiler-2023-06-11-16-56-34"
OPENAI_MAX_EVAL_LEN = 20

## Validation

In [4]:
validation = gpt3.read_data("../data/parsed/openai/validation.jsonl")

In [5]:
utils.openai.estimate_costs_fine_tune_usage("../data/parsed/openai/validation.jsonl", OPENAI_MODEL)

0.2368816

Predicting

In [6]:
# for val in validation:
#     val["prediction"] = gpt3.predict([val["prompt"]], MODEL_ID, sleep_time=1.2)[0][0]

In [7]:
import utils
# utils.write_results("../data/results/openai/validation.csv", validation)

In [8]:
validation = utils.read_results("../data/results/openai/validation.csv")

Evaluation

In [9]:
import evaluate
meteor = evaluate.load("meteor")
bleu = evaluate.load("bleu")
bertscore = evaluate.load("bertscore")

  from .autonotebook import tqdm as notebook_tqdm
[nltk_data] Downloading package wordnet to
[nltk_data]     /Users/ddsantos/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt to /Users/ddsantos/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     /Users/ddsantos/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


Total

In [10]:
meteor_results = meteor.compute(predictions=[ val["prediction"] for val in validation ], references=[ val["completion"] for val in validation ])
bleu_results = bleu.compute(predictions=[ val["prediction"] for val in validation ], references=[ val["completion"] for val in validation ])
bertscore_results = bertscore.compute(predictions=[ val["prediction"] for val in validation ], references=[ val["completion"] for val in validation ], lang="en")

print(f"Meteor: {meteor_results['meteor']}\nBLEU-4: {bleu_results['bleu']}\nBERTscore Mean F1: {sum(bertscore_results['f1'])/len(bertscore_results['f1'])}")

Meteor: 0.3800449631079426
BLEU-4: 0.3316278882234632
BERTscore Mean F1: 0.8889159325397376


Phrase

In [11]:
validation_phrase = list(filter(lambda x: x["type"]=="phrase", validation))

meteor_results = meteor.compute(predictions=[ val["prediction"] for val in validation_phrase ], references=[ val["completion"] for val in validation_phrase ])
bleu_results = bleu.compute(predictions=[ val["prediction"] for val in validation_phrase ], references=[ val["completion"] for val in validation_phrase ])
bertscore_results = bertscore.compute(predictions=[ val["prediction"] for val in validation_phrase ], references=[ val["completion"] for val in validation_phrase ], lang="en")

print(f"Meteor: {meteor_results['meteor']}\nBLEU-4: {bleu_results['bleu']}\nBERTscore Mean F1: {sum(bertscore_results['f1'])/len(bertscore_results['f1'])}")

Meteor: 0.4438928251692932
BLEU-4: 0.03932456483142731
BERTscore Mean F1: 0.9037742036492077


Passage

In [12]:
validation_passage = list(filter(lambda x: x["type"]=="passage", validation))

meteor_results = meteor.compute(predictions=[ val["prediction"] for val in validation_passage ], references=[ val["completion"] for val in validation_passage ])
bleu_results = bleu.compute(predictions=[ val["prediction"] for val in validation_passage ], references=[ val["completion"] for val in validation_passage ])
bertscore_results = bertscore.compute(predictions=[ val["prediction"] for val in validation_passage ], references=[ val["completion"] for val in validation_passage ], lang="en")

print(f"Meteor: {meteor_results['meteor']}\nBLEU-4: {bleu_results['bleu']}\nBERTscore Mean F1: {sum(bertscore_results['f1'])/len(bertscore_results['f1'])}")

Meteor: 0.30812061937505264
BLEU-4: 0.22363759309880255
BERTscore Mean F1: 0.8730579776528441


Multi

In [13]:
validation_multi = list(filter(lambda x: x["type"]=="multi", validation))

meteor_results = meteor.compute(predictions=[ val["prediction"] for val in validation_multi ], references=[ val["completion"] for val in validation_multi ])
bleu_results = bleu.compute(predictions=[ val["prediction"] for val in validation_multi ], references=[ val["completion"] for val in validation_multi ])
bertscore_results = bertscore.compute(predictions=[ val["prediction"] for val in validation_multi ], references=[ val["completion"] for val in validation_multi ], lang="en")

print(f"Meteor: {meteor_results['meteor']}\nBLEU-4: {bleu_results['bleu']}\nBERTscore Mean F1: {sum(bertscore_results['f1'])/len(bertscore_results['f1'])}")

Meteor: 0.4110062647930146
BLEU-4: 0.529826208234148
BERTscore Mean F1: 0.8946957504749298


## Test

In [14]:
test = gpt3.read_data("../data/parsed/openai/test.jsonl")

In [15]:
utils.openai.estimate_costs_fine_tune_usage("../data/parsed/openai/test.jsonl", OPENAI_MODEL)

0.6292256

Predicting

In [16]:
# for t in test:
#     t["prediction"] = gpt3.predict([t["prompt"]], MODEL_ID, sleep_time=1.2)[0][0]

In [17]:
# utils.write_results("../data/results/openai/test.csv", test)

In [18]:
test = utils.read_results("../data/results/openai/test.csv")

Evaluation

Total

In [19]:
meteor_results = meteor.compute(predictions=[ t["prediction"] for t in test ], references=[ t["completion"] for t in test ])
bleu_results = bleu.compute(predictions=[ t["prediction"] for t in test ], references=[ t["completion"] for t in test ])
bertscore_results = bertscore.compute(predictions=[ t["prediction"] for t in test ], references=[ t["completion"] for t in test ], lang="en")

print(f"Meteor: {meteor_results['meteor']}\nBLEU-4: {bleu_results['bleu']}\nBERTscore Mean F1: {sum(bertscore_results['f1'])/len(bertscore_results['f1'])}")

Meteor: 0.3748973172668995
BLEU-4: 0.23299473243685423
BERTscore Mean F1: 0.8906126411452245


In [20]:
test_phrase = list(filter(lambda x: x["type"]=="phrase", test))

meteor_results = meteor.compute(predictions=[ t["prediction"] for t in test_phrase ], references=[ t["completion"] for t in test_phrase ])
bleu_results = bleu.compute(predictions=[ t["prediction"] for t in test_phrase ], references=[ t["completion"] for t in test_phrase ])
bertscore_results = bertscore.compute(predictions=[ t["prediction"] for t in test_phrase ], references=[ t["completion"] for t in test_phrase ], lang="en")

print(f"Meteor: {meteor_results['meteor']}\nBLEU-4: {bleu_results['bleu']}\nBERTscore Mean F1: {sum(bertscore_results['f1'])/len(bertscore_results['f1'])}")

Meteor: 0.4578975768614709
BLEU-4: 0.061002572768710336
BERTscore Mean F1: 0.9107595882309016


In [21]:
test_passage = list(filter(lambda x: x["type"]=="passage", test))

meteor_results = meteor.compute(predictions=[ t["prediction"] for t in test_passage ], references=[ t["completion"] for t in test_passage ])
bleu_results = bleu.compute(predictions=[ t["prediction"] for t in test_passage ], references=[ t["completion"] for t in test_passage ])
bertscore_results = bertscore.compute(predictions=[ t["prediction"] for t in test_passage ], references=[ t["completion"] for t in test_passage ], lang="en")

print(f"Meteor: {meteor_results['meteor']}\nBLEU-4: {bleu_results['bleu']}\nBERTscore Mean F1: {sum(bertscore_results['f1'])/len(bertscore_results['f1'])}")

Meteor: 0.2828155192860309
BLEU-4: 0.20985733289455827
BERTscore Mean F1: 0.8703298328823074


In [22]:
test_multi = list(filter(lambda x: x["type"]=="multi", test))

meteor_results = meteor.compute(predictions=[ t["prediction"] for t in test_multi ], references=[ t["completion"] for t in test_multi ])
bleu_results = bleu.compute(predictions=[ t["prediction"] for t in test_multi ], references=[ t["completion"] for t in test_multi ])
bertscore_results = bertscore.compute(predictions=[ t["prediction"] for t in test_multi ], references=[ t["completion"] for t in test_multi ], lang="en")

print(f"Meteor: {meteor_results['meteor']}\nBLEU-4: {bleu_results['bleu']}\nBERTscore Mean F1: {sum(bertscore_results['f1'])/len(bertscore_results['f1'])}")

Meteor: 0.37433434675124583
BLEU-4: 0.3864844930392026
BERTscore Mean F1: 0.8851155984786249


# Clickbait Spoiler Generation using LLaMa

In [23]:
# !python ../src/models/llama/alpaca-lora/generate.py \
#     --load_8bit \
#     --base_model $MODEL_CHECKPOINT  \
#     --lora_weights $SAVE_CHECKPOINT_PATH

In [24]:
# utils.write_results("../data/results/validation.csv", validation)

# Clickbait Spoiler Generation using BERT

In [25]:
BATCH_SIZE = 3
SAVE_CHECKPOINT_PATH = "../src/models/bert/clickbait"

In [26]:
from utils.bert import TOKENIZER
from transformers import TFAutoModelForQuestionAnswering

model = TFAutoModelForQuestionAnswering.from_pretrained(SAVE_CHECKPOINT_PATH)

In [27]:
from datasets import load_dataset

raw_datasets = load_dataset("../data/parsed/bert/clickbait_data.py", data_files={
    "train": "train.jsonl", 
    "test": "test.jsonl", 
    "validation": "validation.jsonl"
})

## Validation

In [28]:
import utils.bert

raw_validation = raw_datasets["validation"].filter(lambda x: x["type"]=="phrase")
validation_dataset = raw_validation.map(
    utils.bert.preprocess_validation,
    batched=True,
    remove_columns=raw_validation.column_names,
)
len(raw_validation), len(validation_dataset)

In [29]:
from transformers import DefaultDataCollator
data_collator = DefaultDataCollator(return_tensors="tf")

tf_val_dataset = model.prepare_tf_dataset(
    validation_dataset,
    collate_fn=data_collator,
    shuffle=False,
    batch_size=BATCH_SIZE
)

Predicting

In [30]:
val_predictions = model.predict(tf_val_dataset)

In [31]:
val_metrics = utils.bert.compute_metrics(
    val_predictions["start_logits"],
    val_predictions["end_logits"],
    validation_dataset,
    raw_validation,
)
val_metrics

In [32]:
from transformers import pipeline
question_answerer = pipeline("question-answering", model=SAVE_CHECKPOINT_PATH, tokenizer=TOKENIZER)

In [33]:
val_predictions = question_answerer(question=raw_validation["question"], context=raw_validation["context"])

val_predicted_answers = [prediction["answer"] for prediction in val_predictions]
val_expected_answers = [answer["text"][0] for answer in raw_validation["answers"]]

In [34]:
utils.write_results("../data/results/bert/validation.csv", [{
    "prediction": prediction,
    "completion": completion
} for prediction, completion in zip(val_predicted_answers, val_expected_answers)])

## Test

In [35]:
raw_test = raw_datasets["test"].filter(lambda x: x["type"]=="phrase")
test_dataset = raw_test.map(
    utils.bert.preprocess_validation,
    batched=True,
    remove_columns=raw_test.column_names,
)
len(raw_test), len(test_dataset)

In [36]:
tf_test_dataset = model.prepare_tf_dataset(
    test_dataset,
    collate_fn=data_collator,
    shuffle=False,
    batch_size=BATCH_SIZE
)

Predicting

In [37]:
test_predictions = model.predict(tf_test_dataset)

In [38]:
test_metrics = utils.bert.compute_metrics(
    test_predictions["start_logits"],
    test_predictions["end_logits"],
    test_dataset,
    raw_test,
)
test_metrics

In [39]:
test_predictions = question_answerer(question=raw_test["question"], context=raw_test["context"])

test_predicted_answers = [prediction["answer"] for prediction in test_predictions]
test_expected_answers = [answer["text"][0] for answer in raw_test["answers"]]

In [40]:
utils.write_results("../data/results/bert/test.csv", [{
    "prediction": prediction,
    "completion": completion
} for prediction, completion in zip(test_predicted_answers, test_expected_answers)])