In [17]:
import os
import json
import textwrap
import sys
from typing_extensions import TypedDict, Annotated

from dotenv import load_dotenv
from langchain_core.messages import HumanMessage, SystemMessage
from langchain_core.prompts import PromptTemplate
from langchain_core.documents import Document
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_groq import ChatGroq

from langgraph.graph import StateGraph, START
from langgraph.checkpoint.memory import MemorySaver
from langgraph.graph.message import add_messages

In [18]:
load_dotenv()

True

In [19]:
# Load scene chunks
scene_chunks_path = "../data/scene_chunks.jsonl"
scene_chunks = []
with open(scene_chunks_path, "r", encoding="utf-8") as f:
    for line in f:
        scene_chunks.append(json.loads(line))

In [20]:
# Create LangChain Documents
documents = [
    Document(
        page_content=scene["text"],
        metadata={
            "scene_id": scene.get("scene_id", idx),
            "speakers": scene.get("speakers", [])
            }
    )
    for idx, scene in enumerate(scene_chunks)
]

In [21]:
print(f"Loaded {len(documents)} documents.")
print("Scene Text:", documents[0].page_content)
print("Metadata:", documents[0].metadata)

Loaded 8157 documents.
Scene Text: Michael: All right Jim. Your quarterlies look very good. How are things at the library?
Jim: Oh, I told you. I couldn't close it. So...
Michael: So you've come to the master for guidance? Is this what you're saying, grasshopper?
Jim: Actually, you called me in here, but yeah.
Michael: All right. Well, let me show you how it's done.
Metadata: {'scene_id': 'S1E1_Scene1', 'speakers': ['Michael', 'Jim']}


In [22]:
# Load HuggingFace Embedding Model
embedding_model = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)

In [23]:
# Load or create FAISS vector database
output_folder = "../data/vector_databases"
vectorstore_path = os.path.join(output_folder, "scene_db")

if not os.path.exists(vectorstore_path):
    print("Creating FAISS vectorstore...")
    vectorstore = FAISS.from_documents(documents=documents, embedding=embedding_model)
    vectorstore.save_local(vectorstore_path)
else:
    print("Loading existing FAISS vectorstore...")
    vectorstore = FAISS.load_local(
        folder_path=vectorstore_path,
        embeddings=embedding_model,
        allow_dangerous_deserialization=True
    )

Loading existing FAISS vectorstore...


In [24]:
# Filter only lines spoken by a given character
def get_character_lines(text: str, character: str) -> str:
    return "\n".join([
        line for line in text.split("\n")
        if line.startswith(f"{character}:")
    ])

# Get relevant scenes for a query where the character appears
def get_relevant_docs(character: str, query: str):
    retriever = vectorstore.as_retriever(search_kwargs={"k": 10})
    docs = retriever.invoke(query)
    filtered = [doc for doc in docs if character in doc.metadata.get("speakers", [])]
    for doc in filtered:
        doc.page_content = get_character_lines(doc.page_content, character)
    return filtered

In [None]:
# Load the llm
llm = ChatGroq(
    model="llama3-8b-8192",
    temperature=0.6,
    max_tokens=None,
    timeout=None,
    max_retries=2
)

prompt_template = PromptTemplate(
    input_variables=["context", "question", "character", "user_name", "history"],
    template="""
    You are a character from the TV series THE OFFICE (US).
    You are having a conversation with a user named {user_name}.

    Stay strictly in-character as {character}.
    Respond with their unique personality, tone, and humor:
    - Pam: warm, hesitant, supportive, avoids conflict.
    - Jim: sarcastic, observant, uses dry humor.
    - Dwight: intense, rule-driven, loyal to authority, suspicious.
    - Michael: insecure, tries too hard to be funny, emotional.
    - Angela: judgmental, blunt, uptight, religious.
    - Creed: weird, vague, mysterious.

    Respond naturally. Do NOT always use the same phrases (like *sigh*, *smile*, or *laugh*).
    Make your responses personal and reactive — engage in the back-and-forth.
    Don’t break character unless explicitly told to.
    Don’t answer things your character wouldn’t know.

    Previous Chat:
    ------------------------
    {history}
    
    Context from the show:
    ------------------------
    {context}

    User: {question}
    Character ({character}):"""
)

In [26]:
# Define memory schema
class ChatState(TypedDict):
    messages: Annotated[list, add_messages]
    character: str
    query: str
    user_name: str

In [27]:
# LangGraph node function
def call_character_bot(state: ChatState) -> ChatState:
    query = state["query"]
    character = state["character"]
    user_name = state["user_name"]

    context = "\n\n".join(doc.page_content for doc in get_relevant_docs(character, query))

    state["messages"].append(HumanMessage(content=query))

    # Create readable history
    recent = state["messages"][-8:]
    chat_history = ""
    for msg in recent:
        speaker = user_name if msg.type == "human" else character
        chat_history += f"{speaker}: {msg.content}\n"

    full_prompt = prompt_template.format(
        context=context,
        question=query, 
        character=character, 
        user_name= user_name,
        history=chat_history
    )
    
    response = llm.invoke([HumanMessage(content=full_prompt)])

    state["messages"].append(response)
    return {"messages": state["messages"]}

# LangGraph workflow
graph = StateGraph(ChatState)
graph.add_node("model", call_character_bot)
graph.set_entry_point("model")
workflow = graph.compile(checkpointer=MemorySaver())

In [None]:
# User interaction loop
character = "Pam"
thread_id = f"{character.lower()}-chat-thread"
chat_memory = {}  

print("="*80)
print("🎮 WELCOME TO THE OFFICE CHARACTER CHATBOT 🎮")
print("="*80)
print("📜 Rules:")
print("- You'll start a conversation with Pam.")
print("- Type anything to chat with the character.")
print("- Type '/switch <Character>' to talk to someone else.")
print("- Type '/summary' to see the chat history.")
print("- Type 'exit' or 'quit' to end the session.")
print("- Characters won't break role and will respond as if you're in the show.")
print("- They’ll try to remember your name — be nice!")
print("="*80)
print(f"\n\nYou're now chatting with {character}!")

# Start conversation
print(f"\n{character}: Hi! I'm Pam Beesly, the receptionist at Dunder Mifflin.")
user_name = input("Pam: What's your name? ").strip().title()
print(f"{character}: Nice to meet you, {user_name}! How can I help you today?\n")


🎮 WELCOME TO THE OFFICE CHARACTER CHATBOT 🎮
📜 Rules:
- You'll start a conversation with Pam.
- Type anything to chat with the character.
- Type '/switch <Character>' to talk to someone else.
- Type '/summary' to see the chat history.
- Type 'exit' or 'quit' to end the session.
- Characters won't break role and will respond as if you're in the show.
- They’ll try to remember your name — be nice!


You're now chatting with Pam!

Pam: Hi! I'm Pam Beesly, the receptionist at Dunder Mifflin.
Pam: Nice to meet you, Ibra! How can I help you today?



In [None]:
while True:
    user_input = input(f"{user_name}: ").strip()

    if user_input.lower() in ["/exit", "/quit"]:
        workflow.checkpointer.delete_thread(thread_id)
        chat_memory.pop(thread_id, None)
        print("👋 Goodbye!")
        break

    if user_input.lower() == "/summary":
        print("🧠 Memory so far:")
        messages = chat_memory.get(thread_id, [])
        for msg in messages:
            role = user_name if msg.type == "human" else character
            print(f"{role}: {msg.content}")
        continue

    if user_input.lower().startswith("/switch "):
        new_char = user_input[8:].strip().title()
        character = new_char
        thread_id = f"{character.lower()}-chat-thread"
        print(f"/n✅ You're now chatting with {character}!")
        continue

    try:
        result = workflow.invoke(
            {
                "query": user_input, 
                "character": character,
                "user_name": user_name,
            },
            config={"configurable": {"thread_id": thread_id}},
        )

        chat_memory[thread_id] = result["messages"]
        
        # Save chat logs to JSON file under /data/chat_logs
        # os.makedirs("../data/chat_logs", exist_ok=True)
        # log_path = os.path.join("../data/chat_logs", f"{character.lower()}_{user_name.lower()}.json")
        # log_data = []
        # for msg in result["messages"]:
        #     role = user_name if msg.type == "human" else character
        #     log_data.append({"role": role, "content": msg.content})

        # with open(log_path, "w", encoding="utf-8") as f:
        #     json.dump(log_data, f, indent=2, ensure_ascii=False)
        
        wrapped = textwrap.fill(result["messages"][-1].content, width=100)
        print(f"{character}:\n{wrapped}\n")
        sys.stdout.flush()
        continue
    except Exception as e:
        print(f"⚠️ Error: {e}")

🧠 Memory so far:
Pam:
Aw, hi Ibra! What's up?

👋 Goodbye!
