In [107]:
import json
from pathlib import Path
from random import shuffle

from openai import OpenAI
from pydantic import BaseModel

In [2]:
llm = OpenAI()

In [3]:
files = list(Path('../data').rglob('*.md'))

In [134]:
test = files[4]

In [136]:
with open(test, "r") as f:
    email = f.read()

In [162]:
with open("../data/reading_history_filtered.txt", "r") as f:
    reading_history = f.read()

with open("prompts/extraction.txt", "r") as f:
    prompt_extraction = f.read()

with open("prompts/infer_interests.txt", "r") as f:
    prompt_interest = f.read()

with open("prompts/personalization.txt", "r") as f:
    prompt_personalization = f.read()

In [163]:
chunks = email.split("* * *")

In [138]:
class NewsletterItem(BaseModel):
    headline: str
    url: str
    description: str

In [164]:
structured = []

for c in chunks:
    resp = llm.beta.chat.completions.parse(
        model="gpt-4o-mini",
        messages=[
            {
                "role": "system",
                "content": prompt_extraction
            },
            {
                "role": "user",
                "content": c
            }
        ],
        response_format=NewsletterItem
    )
    structured.append(resp.choices[0].message.content)

In [165]:
structured

['{"headline":"Democrats Are Upbeat, but Face a Tough Race Ahead","url":"https://www.nytimes.com/2024/08/23/briefing/democrats-after-convention-fed-rate-cut.html","description":"In Chicago, confidence ran high among Democrats after Kamala Harris\'s nomination, but the party recognizes the need to convert that optimism into votes for the upcoming election."}',
 '{"headline":"Powell said the ‘time has come’ for interest rate cuts","url":"https://www.nytimes.com/2024/08/23/business/economy/fed-rates-powell-jackson-hole.html","description":"Jerome Powell, the chair of the Federal Reserve, indicated that the central bank was on track to lower borrowing costs at its meeting next month after holding rates at a 20-year high to combat inflation."}',
 '{"headline":"Ukraine ramped up its attacks in Russia","url":"https://www.nytimes.com/2024/08/23/world/europe/ukraine-russia-war-crimea.html","description":"Ukrainian forces have expanded their control in the Kursk region of Russia while launching 

In [140]:
json_string = resp.choices[0].message.content
json_parsed = json.loads(json_string)

In [141]:
# randomly order the items
formatted_items = [f"HEADLINE: {i['headline']}\nDESCRIPTION: {i['description']}" for i in json_parsed["items"]]
shuffle(formatted_items)

In [142]:
formatted_items

["HEADLINE: Zoë Kravitz was frustrated, so she made a movie\nDESCRIPTION: Actress Zoë Kravitz channels her frustrations into her directorial debut, the horror-mystery film 'Blink Twice', which opens today.",
 'HEADLINE: Powell said the ‘time has come’ for interest rate cuts\nDESCRIPTION: Jerome Powell announced that the Federal Reserve is likely to lower interest rates next month to support the job market after holding them steady for over a year.',
 'HEADLINE: Democrats Are Upbeat, but Face a Tough Race Ahead\nDESCRIPTION: Despite a jubilant atmosphere at the Democratic convention in Chicago, Kamala Harris and fellow Democrats face a tough electoral challenge ahead.',
 "HEADLINE: Ukraine ramped up its attacks in Russia\nDESCRIPTION: Ukrainian forces have gained control over more territory in Russia's Kursk region and have intensified attacks on key Russian military infrastructure."]

In [149]:
personalization_input_format = f"""USER READING HISTORY: {reading_history}

NEWSLETTER ITEMS: {'\n'.join(formatted_items)}
"""

In [150]:
personalization_resp = llm.chat.completions.create(
    model="gpt-4o-mini",
    messages=[
        {
            "role": "system",
            "content": prompt_personalization
        },
        {
            "role": "user",
            "content": personalization_input_format
        }
    ]
)

In [151]:
print(personalization_resp.choices[0].message.content)

==!BEGIN INTERNAL REASONING!==
Analyzing the reader's recent reading history shows an eclectic mix of interests, ranging from pop culture and entertainment (e.g., articles about Keanu Reeves, Martin Mull, and Bo Burnham) to more serious topics concerning social issues (e.g., autistic employees navigating workplaces and the housing situation in Sweden). This reader seems to appreciate a blend of quirky, insightful storytelling and significant global stories, particularly those that highlight societal changes or evoke emotional engagement.

For the newsletter items, I sense that I need to align a selected headline with themes that resonate with this reader. The movie directed by Zoë Kravitz stands out as it combines the entertainment industry (which the reader has shown interest in) with a personal touch regarding how frustrations can lead to artistic expression. Additionally, topics surrounding societal issues, like the race dynamics within the Democratic party or ongoing conflicts, cou