In [None]:
import requests
import json
import uuid
from tqdm import tqdm
import os

In [None]:
# Generated by o3

# | Rank\* | Typical opener            | Why it shows up so often                                                                                                                                             |
# | ------ | ------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
# | 1      | **The**                   | Sets up a specific noun phrase (“The FDA…”, “The study…”)—still the single most common first token in mainstream-news headlines and story texts. ([BuzzSumo.com][1]) |
# | 2      | **This**                  | Demonstrative that promises immediacy or a reveal (“This graph shows…”) and tops click-through studies of social headlines. ([Medium][2])                            |
# | 3      | **How**                   | Signals an explainer or guide; “How …” leads consistently sit in the top 5 of Facebook-shared headlines. ([BuzzSumo.com][1])                                         |
# | 4      | **Why**                   | Frames a causal narrative or opinion piece; performs well on both Facebook and Twitter. ([BuzzSumo.com][1])                                                          |
# | 5      | **What**                  | Teaser for a fact list (“What you need to know …”). ([BuzzSumo.com][1])                                                                                              |
# | 6      | **A / An**                | Indefinite article; opens softer features or profile pieces (“A small town in …”). Corpus counts put it just behind *the*.                                           |
# | 7      | **In**                    | Preposition for situating events in time or place (“In 2025,” “In Chicago,”). Common in hard-news ledes.                                                             |
# | 8      | **New**                   | Evokes novelty; BuzzSumo finds “new” + noun headlines among the most reshared. ([BuzzSumo.com][1])                                                                   |
# | 9      | **Here**                  | Deictic opener for listicles/resource posts (“Here are the 7 …”). ([BuzzSumo.com][1])                                                                                |
# | 10     | **Numerals (10, 5, 7 …)** | List-post convention—numbers trigger higher engagement and scanning. ([BuzzSumo.com][1])                                                                             |

# [1]: https://buzzsumo.com/blog/most-shared-headlines-study/ "
#           100m Articles Analyzed: What You Need To Write The Best Headlines | BuzzSumo.com      "
# [2]: https://medium.com/the-mission/this-new-data-will-make-you-rethink-how-you-write-headlines-751358f6639a "This New Data Will Make You Rethink How You Write Headlines | by Larry Kim | Mission.org | Medium"

In [None]:
common_beginning_words = [
    "The",
    "This",
    "How",
    "Why",
    "What",
    "A",
    "An",
    "In",
    "New",
]

In [None]:
total = 16

for begin_with in tqdm(common_beginning_words):
    if not os.path.exists(f"../outputs/{begin_with}"):
        os.makedirs(f"../outputs/{begin_with}")

    num_existing_files = len(os.listdir(f"../outputs/{begin_with}"))
    for _ in tqdm(range(total - num_existing_files)):
        import time

        max_retries = 3
        timeout = 30  # seconds

        for attempt in range(max_retries):
            try:
                response = requests.post(
                    url="https://openrouter.ai/api/v1/completions",
                    headers={
                        "Authorization": "Bearer YOUR_API_KEY",
                        "Content-Type": "application/json",
                    },
                    data=json.dumps(
                        {
                            "model": "openai/gpt-oss-20b",
                            "prompt": begin_with,
                            "max_tokens": 1000,
                            "temperature": 1.0,
                        }
                    ),
                    timeout=timeout,
                )
                break  # Success, exit retry loop
            except (requests.exceptions.Timeout, requests.exceptions.ConnectionError) as e:
                if attempt < max_retries - 1:
                    time.sleep(2 ** attempt)  # Exponential backoff
                else:
                    raise  # Re-raise on final attempt
        with open(f"../outputs/{begin_with}/{uuid.uuid4()}.txt", "w") as f:
            f.write(begin_with + response.json()["choices"][0]["text"])