In [None]:
%pip install openai langchain faiss-cpu pandas python-dotenv
%pip install openai==0.28
%pip install tiktoken
%pip install -U langchain langchain-community openai faiss-cpu python-dotenv pandas

In [None]:
import os

os.environ["OPENAI_API_KEY"] = "" #put api key here, not for production pls

In [None]:
import json, glob
import pandas as pd
from langchain.docstore.document import Document

ROOT_DIR = "Case_Closed_Files"

def json_to_document(path):
    with open(path, encoding="utf-8") as f:
        j = json.load(f)

    title = j.get("title", "Untitled")
    url = j.get("url", "")
    infobox = j.get("infobox", {})
    sections = j.get("sections", {})

    parts = [f"# {title}", url]
    
    if infobox:
        parts.append("\n".join(f"{k.strip()}: {v}" for k, v in infobox.items()))
    if sections:
        parts.append("\n\n".join(
            f"## {sec.strip()}\n{body.strip()}" 
            for sec, body in sections.items() 
            if isinstance(body, str) and body.strip()
        ))
    
    content = "\n\n".join(parts)
    return Document(page_content=content, metadata={"source": os.path.basename(path)})

def load_all_json_docs():
    docs = []
    for path in glob.glob(f"{ROOT_DIR}/**/*.json", recursive=True):
        try:
            docs.append(json_to_document(path))
        except Exception as e:
            print(f"Error with {path}: {e}")
    return docs

def load_csv_docs(csv_path):
    df = pd.read_csv(csv_path)
    docs = []
    for row in df.to_dict(orient="records"):
        text = "\n".join(f"{k}: {v}" for k, v in row.items())
        docs.append(Document(page_content=text, metadata={"source": os.path.basename(csv_path)}))
    return docs

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS

DB_DIR = "conan_vector_db"

json_docs = load_all_json_docs()
csv_docs = load_csv_docs(f"{ROOT_DIR}/detective_conan_all_seasons.csv")
all_docs = json_docs + csv_docs

splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
chunks = splitter.split_documents(all_docs)

embedding_model = OpenAIEmbeddings()
vector_db = FAISS.from_documents(chunks, embedding_model)
vector_db.save_local(DB_DIR)

print(f"Vector DB saved to '{DB_DIR}' with {len(chunks)} chunks.")

In [None]:
from langchain_community.vectorstores import FAISS
from langchain.embeddings import OpenAIEmbeddings
from langchain.chat_models import ChatOpenAI
from langchain.chains import ConversationalRetrievalChain

# === Load Vector Store ===
DB_DIR = "conan_vector_db"

db = FAISS.load_local(
    folder_path = DB_DIR,
    embeddings = OpenAIEmbeddings(),
    allow_dangerous_deserialization = True  # Not for production
)
retriever = db.as_retriever(search_kwargs={"k": 6})

# === Define Superprompt ===
SUPERPROMPT = (
    "You are a helpful, knowledgeable Detective Conan assistant. "
    "You always answer clearly and concisely using accurate details from the Conan universe, "
    "including characters, episodes, and gadgets. "
    "If something is unclear or unknown, say so honestly rather than guessing."
)

# === Create Conversational Chain ===
chatbot = ConversationalRetrievalChain.from_llm(
    llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0.3),
    retriever = retriever,
    return_source_documents = False
)

# === Initialize Chat History ===
chat_history = [(SUPERPROMPT, "I'm ready to help with anything about Detective Conan!")]

# === Start Chat Loop ===
print("Detective Conan Chatbot — Start chatting! (type 'quit' to stop)\n")
while True:
    user_input = input("You: ").strip()
    if user_input.lower() in {"quit", "exit"}:
        print("Goodbye, Detective!")
        break

    result = chatbot({"question": user_input, "chat_history": chat_history})
    response = result["answer"]
    chat_history.append((user_input, response))
    print("User:", user_input, flush = True)
    print("Bot:", response, flush = True)