#Hazbin Hotel Character RAG System

In [17]:
import os
import json
import gradio as gr
from pathlib import Path
from langchain_core.documents import Document
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_chroma import Chroma
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_core.messages import SystemMessage, HumanMessage, convert_to_messages
from dotenv import load_dotenv


In [22]:
DB_NAME = "hazbin_character.db"

MODEL = "gpt-4.1-nano"
RETRIEVAL_K = 3
load_dotenv(override=True)

embeddings = OpenAIEmbeddings(model="text-embedding-3-small")





In [6]:
def fetch_hazbin_data():
    # __file__ is not defined in notebooks; instead, resolve relative to notebook location or use cwd
    # We'll assume the notebook is run from within the "implementation" folder, or adjust as needed
    hazbin_base = Path.cwd().parent / "knowledge-base"
    ljson_path = hazbin_base / "hazbin_character_profiles.ljson"
    documents = []
    if ljson_path.exists():
        with open(ljson_path, "r", encoding="utf-8") as f:
            # Each line is a JSON object
            for line in f:
                line = line.strip()
                if not line:
                    continue
                data = json.loads(line)
                content = data.get("description") or data.get("bio") or json.dumps(data)
                metadata = {k: v for k, v in data.items() if k != "description" and k != "bio"}
                documents.append(Document(page_content=content, metadata=metadata))
    return documents

In [7]:
# Display documents as a test
documents = fetch_hazbin_data()
for i, doc in enumerate(documents[:5]):
    print(f"Document {i+1}:")
    print("Content:", doc.page_content)
    print("Metadata:", doc.metadata)
    print("-" * 40)


Document 1:
Content: Charlotte "Charlie" Morningstar is the protagonist and princess of Hell who founds the Hazbin Hotel to rehabilitate sinners and reduce Hell's overpopulation.
Metadata: {'name': 'Charlie Morningstar', 'appearance': 'Tall humanoid demoness with pale skin, long pink hair with orange tips, a red dress with star motifs and a crown-like hair accessory; generally bright, theatrical design.', 'personality': 'Optimistic, compassionate, idealistic, earnest and determined; deeply empathetic and often naively hopeful about redemption.', 'backstory': "Daughter of Lucifer Morningstar and Lilith, Charlie grew up in Hell and became determined to find a nonviolent solution to Hell's problems. She opens the Hazbin Hotel to rehabilitate sinners and seeks a path to Heaven for redeemed souls."}
----------------------------------------
Document 2:
Content: Vaggie (often spelled Vaggi) is Charlie's girlfriend and the manager/defender of the Hazbin Hotel; a fallen angel and former Exorcis

In [10]:

def create_chunks(documents):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=50)
    chunks = text_splitter.split_documents(documents)
    return chunks

In [11]:
chunks = create_chunks(documents)

chunks

[Document(metadata={'name': 'Charlie Morningstar', 'appearance': 'Tall humanoid demoness with pale skin, long pink hair with orange tips, a red dress with star motifs and a crown-like hair accessory; generally bright, theatrical design.', 'personality': 'Optimistic, compassionate, idealistic, earnest and determined; deeply empathetic and often naively hopeful about redemption.', 'backstory': "Daughter of Lucifer Morningstar and Lilith, Charlie grew up in Hell and became determined to find a nonviolent solution to Hell's problems. She opens the Hazbin Hotel to rehabilitate sinners and seeks a path to Heaven for redeemed souls."}, page_content='Charlotte "Charlie" Morningstar is the protagonist and princess of Hell who founds the Hazbin Hotel to rehabilitate sinners and reduce Hell\'s overpopulation.'),
 Document(metadata={'name': 'Vaggie', 'appearance': 'Humanoid with grayish-lavender skin, short white hair with a dark streak, an eyepatch over her right eye (lost in past conflict), and 

In [15]:
def create_embeddings(chunks):
    if os.path.exists(DB_NAME):
        Chroma(persist_directory=DB_NAME, embedding_function=embeddings).delete_collection()

    vectorstore = Chroma.from_documents(
        documents=chunks, embedding=embeddings, persist_directory=DB_NAME
    )

    collection = vectorstore._collection
    count = collection.count()

    sample_embedding = collection.get(limit=1, include=["embeddings"])["embeddings"][0]
    dimensions = len(sample_embedding)
    print(f"There are {count:,} vectors with {dimensions:,} dimensions in the vector store")
    return vectorstore

In [19]:
documents = fetch_hazbin_data()
chunks = create_chunks(documents)
vectorstore = create_embeddings(chunks)
print("Ingestion complete")

There are 33 vectors with 1,536 dimensions in the vector store
Ingestion complete


In [23]:

SYSTEM_PROMPT = """
You are a knowledgeable, friendly assistant who helps fans of Hazbin Hotel.
You are chatting with a user about Hazbin Hotel Characters.
If relevant, use the given context to answer any question.
If you don't know the answer, say so.
Context:
{context}
"""
retriever = vectorstore.as_retriever()
llm = ChatOpenAI(temperature=0, model_name=MODEL)


In [24]:
def fetch_context(question: str) -> list[Document]:
    """
    Retrieve relevant context documents for a question.
    """
    return retriever.invoke(question, k=RETRIEVAL_K)

In [25]:

def combined_question(question: str, history: list[dict] = []) -> str:
    """
    Combine all the user's messages into a single string.
    """
    prior = "\n".join(m["content"] for m in history if m["role"] == "user")
    return prior + "\n" + question

In [26]:
def answer_question(question: str, history: list[dict] = []) -> tuple[str, list[Document]]:
    """
    Answer the given question with RAG; return the answer and the context documents.
    """
    combined = combined_question(question, history)
    docs = fetch_context(combined)
    context = "\n\n".join(doc.page_content for doc in docs)
    system_prompt = SYSTEM_PROMPT.format(context=context)
    messages = [SystemMessage(content=system_prompt)]
    messages.extend(convert_to_messages(history))
    messages.append(HumanMessage(content=question))
    response = llm.invoke(messages)
    return response.content, docs


In [31]:
def format_context(context):
    result = "<h2 style='color: #ff7800;'>Relevant Context</h2>\n\n"
    for doc in context:
        result += f"<span style='color: #ff7800;'>Name: {doc.metadata['name']}</span>\n\n"
        result += doc.page_content + "\n\n"
    return result


def chat(history):
    last_message = history[-1]["content"]
    prior = history[:-1]
    answer, context = answer_question(last_message, prior)
    history.append({"role": "assistant", "content": answer})
    return history, format_context(context)


def main():
    def put_message_in_chatbot(message, history):
        return "", history + [{"role": "user", "content": message}]

    theme = gr.themes.Soft(font=["Inter", "system-ui", "sans-serif"])

    with gr.Blocks(title="Insurellm Expert Assistant", theme=theme) as ui:
        gr.Markdown("# üè¢ Insurellm Expert Assistant\nAsk me anything about Hazbin Hotel Characters!")

        with gr.Row():
            with gr.Column(scale=1):
                chatbot = gr.Chatbot(
                    label="üí¨ Conversation", height=600, type="messages", show_copy_button=True
                )
                message = gr.Textbox(
                    label="Your Question",
                    placeholder="Ask anything about Hazbin Hotel Characters...",
                    show_label=False,
                )

            with gr.Column(scale=1):
                context_markdown = gr.Markdown(
                    label="üìö Retrieved Context",
                    value="*Retrieved context will appear here*",
                    container=True,
                    height=600,
                )

        message.submit(
            put_message_in_chatbot, inputs=[message, chatbot], outputs=[message, chatbot]
        ).then(chat, inputs=chatbot, outputs=[chatbot, context_markdown])

    ui.launch(inbrowser=True)


In [32]:
main()

* Running on local URL:  http://127.0.0.1:7862
* To create a public link, set `share=True` in `launch()`.
