In [6]:
from transformers import T5Tokenizer, T5ForConditionalGeneration
tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-large")

model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-large")

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


In [7]:
input_text = "translate English to German: How old are you?"
input_ids = tokenizer(input_text, return_tensors="pt").input_ids
outputs = model.generate(input_ids)
print(tokenizer.decode(outputs[0]))



<pad> Wie alte sind Sie?</s>


In [8]:
import json

data = []
with open("TimeTravel/dev_data.json", "r") as file:
    for line in file:
        data.append(json.loads(line.strip()))

In [9]:
selected_stories = data[:2]

for i, story in enumerate(selected_stories, start=1):
    print(f"STORY {i}")
    print(f"premise: {story['premise']}")
    print(f"initial: {story['initial']}")
    print(f"counterfactual: {story['counterfactual']}")
    print(f"original ending: {story['original_ending']}")
    print("edited endings:")
    for j, edited in enumerate(story['edited_endings'], start=1):
        print(f"  edited ending {j}: {edited}")
    print("\n")

STORY 1
premise: Ryan was called by his friend to skip work one day.
initial: He missed his train to work and instead went to the park.
counterfactual: But Ryan had an important project at work and went in to finish it.
original ending: Ryan and his friend played with birds at the park all day. At the end of the day, they left the park and saw Ryan's boss. Ryan got fired.
edited endings:
  edited ending 1: ['After he finished it Ryan and his friend played with birds at the park the rest of the day.', "At the end of the day, they left the park and saw Ryan's boss.", "Ryan's boss congratulated him on the great work he did on the project."]
  edited ending 2: ["Ryan's friend played with birds at the park all day.", "At the end of the day, Ryan's friend left the park and saw Ryan's boss.", 'Ryan was glad he went to work.']
  edited ending 3: ['Ryan and his friend fell out about it.', "At the end of the day, Ryan told his friend's boss.", 'His friend got fired.']


STORY 2
premise: Neil had

In [10]:
def generate_edited_ending(story, prompt_type):
    if prompt_type == 1:
        prompt = f"""Given the following story, rewrite the ending based on the Counterfactual.
        Premise: {story['premise']}
        Initial sentence: {story['initial']}
        Counterfactual: {story['counterfactual']}

        Please rewrite the ending to align with the Counterfactual."""
        
    elif prompt_type == 2:
        prompt = f"""Rewrite the ending of this story to match the change:
        Premise: {story['premise']}
        Initial: {story['initial']}
        Counterfactual: {story['counterfactual']}
        Original Ending: {story['original_ending']}

        Provide a new ending to match the Counterfactual."""

    input_ids = tokenizer(prompt, return_tensors="pt").input_ids

    outputs = model.generate(input_ids, max_length=150, num_beams=5)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

In [11]:
for i, story in enumerate(selected_stories, start=1):
    print(f"STORY {i}")
    print(f"prompt 1 edited ending: {generate_edited_ending(story, prompt_type=1)}\n")
    print(f"prompt 2 edited ending: {generate_edited_ending(story, prompt_type=2)}\n")

STORY 1
prompt 1 edited ending: He missed his train to work and instead went to the park.

prompt 2 edited ending: Ryan and his friend played with birds at the park all day. At the end of the day, they left the park and saw Ryan's boss. Ryan got fired.

STORY 2
prompt 1 edited ending: Now he had worked his way south into Australia.

prompt 2 edited ending: Neil was so excited to see Australian culture. He was thrilled at the prospect of exotic animals and people! His favorite moment was when he got to feed a baby koala bear.



In [12]:
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction

In [13]:
story_1_references = [
    ['After he finished it Ryan and his friend played with birds at the park the rest of the day.',
     "At the end of the day, they left the park and saw Ryan's boss.",
     "Ryan's boss congratulated him on the great work he did on the project."],
    ["Ryan's friend played with birds at the park all day.",
     "At the end of the day, Ryan's friend left the park and saw Ryan's boss.",
     'Ryan was glad he went to work.'],
    ['Ryan and his friend fell out about it.',
     "At the end of the day, Ryan told his friend's boss.",
     'His friend got fired.']
]

story_1_prompt_1_hypothesis = ["He missed his train to work and instead went to the park."]
story_1_prompt_2_hypothesis = [
    "Ryan and his friend played with birds at the park all day.",
    "At the end of the day, they left the park and saw Ryan's boss.",
    "Ryan got fired."
]

story_2_references = [
    ['Neil was upset that he would experience the Thai island culture.',
     'He had been thrilled at the prospect of exotic animals and people.',
     'He was hoping he would be able to feed exotic animals.'],
    ['Neil was so disappointed not to see Australian culture.',
     'He was saddened that he would miss out on seeing the exotic animals and people!',
     'His least favorite moment was leaving Asia to fly back home.'],
    ['Neil was upset about his trip to Asia.',
     'He had been looking forward to seeing exotic animals and people.',
     'He would have loved to feed an exotic animal.']
]

story_2_prompt_1_hypothesis = ["Now he had worked his way south into Australia."]
story_2_prompt_2_hypothesis = [
    "Neil was so excited to see Australian culture.",
    "He was thrilled at the prospect of exotic animals and people!",
    "His favorite moment was when he got to feed a baby koala bear."
]

In [14]:
def calculate_bleu_score(references, hypothesis):
    smoothing_fn = SmoothingFunction().method1  # Apply smoothing
    return sentence_bleu(references, hypothesis, smoothing_function=smoothing_fn)

In [15]:
story_1_prompt_1_bleu = calculate_bleu_score(story_1_references, story_1_prompt_1_hypothesis)
story_1_prompt_2_bleu = calculate_bleu_score(story_1_references, story_1_prompt_2_hypothesis)
story_2_prompt_1_bleu = calculate_bleu_score(story_2_references, story_2_prompt_1_hypothesis)
story_2_prompt_2_bleu = calculate_bleu_score(story_2_references, story_2_prompt_2_hypothesis)

In [16]:
print(f"Story 1 Prompt 1 BLEU Score: {story_1_prompt_1_bleu:.4f}")
print(f"Story 1 Prompt 2 BLEU Score: {story_1_prompt_2_bleu:.4f}")
print(f"Story 2 Prompt 1 BLEU Score: {story_2_prompt_1_bleu:.4f}")
print(f"Story 2 Prompt 2 BLEU Score: {story_2_prompt_2_bleu:.4f}")

Story 1 Prompt 1 BLEU Score: 0.0000
Story 1 Prompt 2 BLEU Score: 0.1136
Story 2 Prompt 1 BLEU Score: 0.0000
Story 2 Prompt 2 BLEU Score: 0.0000
