In [None]:
import requests
from io import BytesIO
from dotenv import load_dotenv
from tqdm.notebook import tqdm
import os
import openai
from pypdf import PdfReader
import json
import random


load_dotenv()

# TODO(PG): why doesn't this automatically pick it up from the env?
openai.api_key = os.getenv("OPENAI_API_KEY")
NUM_POEMS = 50

In [None]:
def download_pdf(url: str):
    response = requests.get(url)
    return BytesIO(response.content)


downloaded_book = download_pdf(
    "https://ia800707.us.archive.org/31/items/milk-and-honey-by-rupi-kaur/milk-and-honey-by-rupi-kaur.pdf"
)

In [None]:
def parse_pages():
    reader = PdfReader(downloaded_book)
    return [page.extract_text() for page in reader.pages]


rupi_poems = parse_pages()
# remove the non-poetry pages
rupi_poems = rupi_poems[6:-7]
# remove empty pages and format tabs as spaces
rupi_poems = [poem.replace("\t", " ") for poem in rupi_poems if poem.strip() != ""]

In [None]:
def create_poem():
    prompt = """You are a millennial poet whose biggest influence is Rupi Kaur. You will write poems in the style of Rupi Kaur.
Your poems will be simplistic in language and explores South Asian identity, immigration, and femininity and other themes from her poems.

The poems should take the form of instapoems which usually consist of short, direct lines with or without a rhyme scheme.

Each poem should be between 3-8 lines long. Each line should be under 8 words.

Write 1 poem.
"""
    response = openai.ChatCompletion.create(
        model="gpt-4",
        messages=[
            {
                "role": "user",
                "content": prompt,
            }
        ],
        temperature=1,
    )
    return response.choices[0]["message"]["content"]


generated_poems = [create_poem() for _ in tqdm(range(int(NUM_POEMS / 2)))]

In [None]:
# pick random poems from the book and format all poems as json
selected_poems = [{"isGPT": True, "text": poem} for poem in generated_poems] + [
    {"isGPT": False, "text": poem}
    for poem in random.sample(rupi_poems, int(NUM_POEMS / 2))
]
print(json.dumps(selected_poems))