# START


In [2]:
import math
import re
import pickle
import pandas as pd
import matplotlib.pyplot as plt
from tqdm.auto import tqdm

import torch
from datasets import load_dataset
from transformers import (
    TrainerCallback,
    GPT2Config,
    GPT2Tokenizer,
    GPT2LMHeadModel,
    AutoConfig,
    AutoTokenizer,
    AutoModelForCausalLM,
    pipeline,
    AdamW,
    TrainingArguments,
    Trainer,
)

In [5]:
# Load pretrained tokenizer and model
finetuned_model_name = 'pranavpsv/gpt2-story-gen'
config=AutoConfig.from_pretrained(finetuned_model_name)
tokenizer = AutoTokenizer.from_pretrained(finetuned_model_name)
model = AutoModelForCausalLM.from_pretrained(finetuned_model_name, config=config)

Downloading:   0%|          | 0.00/666 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/878k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/446k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/90.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/487M [00:00<?, ?B/s]

In [6]:
generator = pipeline("text-generation", model=model, tokenizer=tokenizer)
stories = generator("<BOS> <action> Shrek in the swamp <SEP> He was in the ", max_length=200, num_return_sequences=2)
print(*[story['generated_text'] + "\n\n\n------------------------\n" for story in stories])

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<BOS> <action> Shrek in the swamp <SEP> He was in the urn that day. He is still alive. When he is on his way to rescue him, he leaves the urn behind, telling everyone to go where they take him. As they are leaving, they are attacked by BOS, who is having some fun with his friends. Back at the hotel, Doraemon, the maid, is being taken hostage by BOS and is trying to make out with him. Suddenly he realizes he is in the urn. At that moment, BOS appears in front of the hotel to attack her with guns and the group takes shelter in a hotel room for a time. BOS takes the elevator to the top floor, where he and the others are waiting for him.
They climb into the elevator and run into a woman, who turns out to be BEST (Forbidden Love Triangle). She tells them that her fiancé Ritsuko


------------------------
 <BOS> <action> Shrek in the swamp <SEP> He was in the urn. It's his destiny that he'll meet his dad and ask him for a good ol' handjob. But his dad wants him to take him across the border 

# Load test data

In [98]:
# Load dataset from text file called "data.txt" and split into train/val
datasets = load_dataset("text", data_files="test.txt")
datasets['test'] = datasets.pop('train')
test_dataset = datasets['test']
del datasets
test_dataset

Using custom data configuration default-7e85f0f498f7a3fa
Reusing dataset text (/Users/antonclaesson/.cache/huggingface/datasets/text/default-7e85f0f498f7a3fa/0.0.0/e16f44aa1b321ece1f87b07977cc5d70be93d69b20486d6dacd62e12cf25c9a5)


  0%|          | 0/1 [00:00<?, ?it/s]

Dataset({
    features: ['text'],
    num_rows: 1000
})

In [47]:
# Example
print(*[dataset[i]['text'] + '\n' for i in range(10)])

<BOS> <drama> <romantic drama> <romance film> <action> Days of Thunder <SEP> 
 <BOS> <romance film> Krishnagudiyil Oru Pranayakalathu <SEP> 
 <BOS> <drama> <comedy> <romance film> Annie Hall <SEP> 
 <BOS> <romance film> <comedy> <indie> Mighty Aphrodite <SEP> 
 <BOS> <thriller> <crime fiction> Phone Booth <SEP> 
 <BOS> <romantic drama> <romance film> <drama> Anna Lucasta <SEP> 
 <BOS> <drama> <comedy> Meet Me Tonight <SEP> 
 <BOS> <thriller> <short film> The Cat Piano <SEP> 
 <BOS> <short film> <family film> Hare-Abian Nights <SEP> 
 <BOS> <drama> Distant Thunder <SEP> 



In [101]:
import re

text_inputs = []
generated_plots = [] 

for i in range(2): #range(len(dataset)):
    text_input = dataset[i]['text']
    result = generator(text_input, max_length=80, num_return_sequences=1, device=0)[0]['generated_text']
    plot = re.sub(text_input,'',result)

    text_inputs.append(text_input)
    generated_plots.append(plot)

print(text_inputs)
print(generated_plots)
    

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


['<BOS> <drama> <romantic drama> <romance film> <action> Days of Thunder <SEP> ', '<BOS> <romance film> Krishnagudiyil Oru Pranayakalathu <SEP> ']
['介阿 冐制語\nThe film opens in 1980s Seoul with the gang fighting gangs and trying to survive in an urban environment. As the four gangs enter a cafe, they are ambushed by a gangster.', '������������������������������ࠂ) (4\xa0) (1.) a famous dancer from a poor family on the path']
