## Helleverse Sythetic Character Generator

In [52]:

from dotenv import load_dotenv
from openai import OpenAI
import os, json, requests
import gradio as gr
from IPython.display import Markdown, display, update_display

In [53]:
load_dotenv()

openai_api_key = os.getenv("OPENAI_API_KEY")

if openai_api_key:
    print("OPENAI_API_KEY is set")
else:
    print("OPENAI_API_KEY is not set")  


OPENAI_MODEL = "gpt-5-mini"

# -- Reasoning:
# (1) OpenAI's client should be created using the API key if it is set and should fail gracefully if OPENAI_API_KEY is missing.
# (2) Avoid creating the client instance if the key is missing, to prevent cryptic errors downstream.
# (3) Allows for easier substitution/testing and clearer error behavior.

if openai_api_key:
    client = OpenAI(api_key=openai_api_key)
else:
    raise EnvironmentError(
        "OPENAI_API_KEY is not set in the environment. Please set it in your .env file or system environment."
    )




OPENAI_API_KEY is set


In [55]:
def search_for_new_characters(query: str, k: int = 5):
    """
    Perform a DuckDuckGo web search and return the top k results.
    Uses the DuckDuckGo Instant Answer API (lite), which does not require API keys.
    """
    resp = requests.get(
        "https://duckduckgo.com/html/",
        params={"q": query},
        headers={"User-Agent": "Mozilla/5.0"},
        timeout=10
    )
    resp.raise_for_status()
    # DuckDuckGo HTML results are simple; we'll extract text snippets from the response.
    # For simplicity, extract <a class="result__a">... and <a class="result__snippet">...
    from bs4 import BeautifulSoup
    soup = BeautifulSoup(resp.text, "html.parser")
    result_blocks = soup.find_all("div", class_="result")[:k]
    results = []
    for block in result_blocks:
        link = block.find("a", class_="result__a")
        snippet = block.find("a", class_="result__snippet")
        if not snippet:
            snippet = block.find("div", class_="result__snippet")  # fallback
        if link:
            results.append({
                "title": link.get_text(strip=True),
                "href": link.get("href"),
                "snippet": snippet.get_text(strip=True) if snippet else ""
            })
    return {"results": results}

tools = [
    {
        "type": "function",
        "function": {
            "name": "search_for_new_characters",
            "description": "Search the web for fresh information using DuckDuckGo.",
            "parameters": {
                "type": "object",
                "properties": {
                    "query": {"type": "string"},
                    "k": {"type": "integer", "default": 5},
                },
                "required": ["query"],
            },
        },
    }
]

messages = [{"role": "user", "content": "who are the new characters that were introduced in Season 2 of Hazbin Hotel?"}]

resp = client.chat.completions.create(
    model=OPENAI_MODEL,
    messages=messages,
    tools=tools,
    tool_choice="auto",  # let the model call web_search when needed
)

if resp.choices[0].message.tool_calls:
    for call in resp.choices[0].message.tool_calls:
        if call.function.name == "search_for_new_characters":
            args = json.loads(call.function.arguments)
            search_result = search_for_new_characters(**args)
            messages.append(resp.choices[0].message)
            messages.append(
                {"role": "tool", "tool_call_id": call.id, "name": "search_for_new_characters", "content": json.dumps(search_result)}
            )
    final = client.chat.completions.create(model=OPENAI_MODEL, messages=messages)
    print(final.choices[0].message.content)
else:
    print(resp.choices[0].message.content)

I don't have up-to-date episode/cast data past June 2024. Would you like me to look this up now and list the new Season 2 characters (and optionally their voice actors and brief descriptions)?


In [57]:
system_message = """You are a helpful assistant whose main purpose is to generate a large dataset in ljson format for all Hazbin Hotel characters in the series from Season 1 and 2.
Use the search_for_new_characters tool whenever you are uncertain about Season 2 additions or need up-to-date names; do not ask the user for permission before searching.
The character profile should include the following fields:
- name
- description
- appearance
- personality
- backstory
Do not make up characters; only use official ones.
"""

user_message = f"""
The character dataset is expected to be in ljson format and
contain at least 30 characters but if more are found, please include as many as you can.
Only output the dataset, and no other explanations.
Include all main characters, villians, supporting characters
and background characters 
"""



In [11]:
def generate_character_profile(file_format, character_name):
    stream = client.chat.completions.create(
        model=OPENAI_MODEL,
        messages=[
            {"role": "system", "content": system_message},
            {"role": "user", "content": user_message.format(file_format=file_format, character_name=character_name)}
        ],
        stream=True
    )
    accumulated_response = ""
    for chunk in stream:
        accumulated_response += chunk.choices[0].delta.content or ""
        yield accumulated_response
    return accumulated_response 



In [60]:
def generate_dataset(file_format, character_name):
    result = generate_character_profile(file_format, character_name)
    # Get the final (latest) JSON text from the streaming generator:

    if(file_format=="json"):
        for stream_so_far in result:
            final_json_text = stream_so_far

        if final_json_text:
            try:
                #remove ```json
                final_json_text = final_json_text.replace("```json", "").replace("```", "")
                parsed = json.loads(final_json_text)
            #  print(json.dumps(parsed, indent=2))
                return (json.dumps(parsed, indent=2))
            except Exception as e:
            #  print("Could not parse JSON:", e)
            #   print(final_json_text)
                print(e.message)
                return None
        else:
            print("No output generated.")
            return None
    elif(file_format=="csv"):
        for stream_so_far in result:
            final_csv_text = stream_so_far

        if final_csv_text:
            try:
                # remove ```csv or ``` if present
                final_csv_text = final_csv_text.replace("```csv", "").replace("```", "")
                # Optionally, you could further check the CSV or validate it
                return final_csv_text.strip()
            except Exception as e:
                print(str(e))
                return None
        else:
            print("No output generated.")
            return None

  

In [65]:
def generate_character_profiles():
    messages = [
        {"role": "system", "content": system_message},
        {"role": "user", "content": user_message},
    ]

    while True:
        response = client.chat.completions.create(
            model=OPENAI_MODEL,
            messages=messages,
            tools=tools,
            tool_choice="auto"
        )

        message = response.choices[0].message

        if message.tool_calls:
            messages.append(message)
            for call in message.tool_calls:
                if call.function.name == "search_for_new_characters":
                    args = json.loads(call.function.arguments or "{}")
                    result = search_for_new_characters(**args)
                else:
                    result = {"error": "unknown tool"}
                messages.append(
                    {
                        "role": "tool",
                        "tool_call_id": call.id,
                        "name": call.function.name,
                        "content": json.dumps(result),
                    }
                )
            continue

        # Instead of just returning the content, parse it as JSON lines if possible
        content = message.content or "" 
        try:
            data = json.loads(content)
        except Exception:
            # If not valid JSON, just use the text
            return content

        # Convert to LJSON (each object as a JSON line string)
        ljson_lines = []
        if isinstance(data, list):
            for item in data:
                ljson_lines.append(json.dumps(item, ensure_ascii=False))
        elif isinstance(data, dict):
            ljson_lines.append(json.dumps(data, ensure_ascii=False))
        else:
            ljson_lines.append(json.dumps(data, ensure_ascii=False))

        # Return a string with each object as a line
        return "\n".join(ljson_lines)

In [67]:
def generate_ljson_dataset():
    profiles = generate_character_profiles()

    if profiles:
        try:
            data = json.loads(profiles)
        except Exception:
            data = profiles

        with open("hazbin_character_profiles.ljson", "w", encoding="utf-8") as f:
            # LJSON: JSON objects, one per line
            if isinstance(data, list):
                for item in data:
                    f.write(json.dumps(item, ensure_ascii=False) + "\n")
            elif isinstance(data, dict):
                f.write(json.dumps(data, ensure_ascii=False) + "\n")
            elif isinstance(data, str):
                f.write(data + "\n")
            else:
                f.write(json.dumps(data, ensure_ascii=False) + "\n")
        print("Saved character_profiles.ljson")
    else:
        print("No profiles were generated to save.")

In [68]:
generate_ljson_dataset()

Saved character_profiles.ljson


In [61]:
result_json = generate_dataset("json", "Alastor")
result_csv = generate_dataset("csv", "Alastor")


print(result_json + "\n\n") 
print (result_csv)



{
  "name": "Alastor",
  "description": "Alastor, also known as The Radio Demon, is a powerful and manipulative demon who enjoys causing chaos and fear. He has a charismatic and sinister presence, marked by his creepy smile and unsettling demeanor.",
  "appearance": "Alastor has a tall and slender figure, with a vintage radio theme in his design. He has a deer skull-like head, red eyes, and sharp teeth. His body is primarily black and white with red accents, wearing a pinstripe suit and a bow tie. He often carries a microphone and has antennae protruding from his head.",
  "personality": "Alastor is cheerful and charismatic, often speaking in a singsong voice. However, he has a sadistic streak, taking pleasure in others' misery and suffering. He is highly intelligent and cunning, always scheming and manipulating those around him. Despite his cheerful facade, he is deeply unpredictable and dangerous.",
  "backstory": "Alastor was once a human who lived in the early 20th century, known f

In [65]:
with gr.Blocks() as ui:
    gr.Markdown("## Create a dataset for a Hazbin Hotel or Helluva Boss character")
    character_name = gr.Textbox(label="Character Name")
    file_format = gr.Dropdown(["json", "csv"], label="Dataset Format")
    generate_button = gr.Button("Generate")
    output = gr.Textbox(label="Output", lines=20)
    generate_button.click(fn=generate_dataset, inputs=[file_format, character_name], outputs=output)


ui.launch()

* Running on local URL:  http://127.0.0.1:7871
* To create a public link, set `share=True` in `launch()`.


