### Setup

In [1]:
# Make `scripts` importable and pull in embedder + endpoint
import sys, os

# ensure project root (one level up from notebooks/) is on sys.path
project_root = os.path.abspath(os.path.join(os.getcwd(), ".."))
if project_root not in sys.path:
    sys.path.append(project_root)

from scripts.process_data import LocalServerEmbeddings, EMBED_ENDPOINT

# instantiate with the endpoint from your script
embedder = LocalServerEmbeddings(endpoint=EMBED_ENDPOINT)

In [5]:
# Connect to existing ChromaDB store
import chromadb
from langchain_chroma import Chroma

# Configuration
COLLECTION_NAME = "marvel_films"
CHROMA_DIR = os.path.join(project_root, "data", "chroma", "chroma_marvel_wiki")

try:
    # Create ChromaDB client
    client = chromadb.PersistentClient(path=CHROMA_DIR)
    collection = client.get_collection(name=COLLECTION_NAME)
    total_direct = collection.count()
    print(f"Connected to ChromaDB - Total documents: {total_direct}")
    
    # Create LangChain Chroma vectorstore
    vectordb = Chroma(
        client=client,
        collection_name=COLLECTION_NAME,
        embedding_function=embedder,
    )

    # Create retriever from the vectorstore
    retriever = vectordb.as_retriever(
        search_type="similarity",
        search_kwargs={"k": 5}
    )
    
except Exception as e:
    print(f"Error setting up retriever: {e}")
    raise

Connected to ChromaDB - Total documents: 3556


In [6]:
# Use the ChatOpenAI from langchain_openai
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(
    model="gpt-3.5-turbo",    # or model_name if that’s the signature in your version
    temperature=0.0,
)

In [7]:
# Build the RetrievalQA chain and run your test question
from langchain.chains import RetrievalQA

qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
)

### Q&A testing

In [8]:
question = "In Avengers: Endgame, what sacrifice does Iron Man make?"
answer = qa(question)
print("Q:", question)
print("A:", answer)

  answer = qa(question)


Q: In Avengers: Endgame, what sacrifice does Iron Man make?
A: {'query': 'In Avengers: Endgame, what sacrifice does Iron Man make?', 'result': 'In "Avengers: Endgame," Iron Man sacrifices himself by using the Infinity Stones to snap his fingers and defeat Thanos and his army. This act ultimately leads to his death as the stones fatally burn and irradiate his body.'}


In [9]:
question = "Who is Ant-Man?"
answer = qa(question)
print("Q:", question)
print("A:", answer)

Q: Who is Ant-Man?
A: {'query': 'Who is Ant-Man?', 'result': "Ant-Man is a superhero character from Marvel Comics who has been portrayed by actors like Michael Douglas and Paul Rudd in the Marvel Cinematic Universe. The character's real name is Hank Pym, who is the original Ant-Man and the inventor of the Ant-Man suit and Pym Particles. Another character who takes on the Ant-Man mantle is Scott Lang, a thief hired by Hank Pym in the movies."}


In [11]:
question = "Give me 10 members of the cast of Deadpool 2."
answer = qa(question)
print("Q:", question)
print("A:", answer)

Q: Give me 10 members of the cast of Deadpool 2.
A: {'query': 'Give me 10 members of the cast of Deadpool 2.', 'result': 'Here are 10 members of the cast of Deadpool 2:\n\n1. Ryan Reynolds as Deadpool / Wade Wilson\n2. Tammy Chipman\n3. Kevin Chipman\n4. Elliot Chipman\n5. Dopinder\n6. Colossus (Peter Rasputin)\n7. Negasonic Teenage Warhead (Ellie Phimister)\n8. Yukio\n9. Shatterstar (Rusty)\n10. Buck'}


In [13]:
import os

# Define the user's question
query = "In Avengers: Endgame, what sacrifice does Iron Man make?"

relevant_docs = retriever.invoke(query)

# Display the relevant results with metadata
print("\n--- Relevant Documents ---")
for i, doc in enumerate(relevant_docs, 1):
    print(f"Document {i}:\n{doc.page_content}\n")
    print(f"Source: {doc.metadata['source']}\n")


--- Relevant Documents ---
Document 1:
Man use the gamma-ray projector on the mothership core and send it crashing to Earth. Giant Man is mortally wounded in the battle and dies in Wasp's arms. Iron Man, near death, is revived by Thor's summoning of the heavens, confirming to his allies that he is a genuine god. The Avengers hold a celebration in tribute to Giant Man and it is revealed that Banner survived by transforming into the Hulk at the last moment and he leaves. The film ends with Captain America and Black Widow furthering their relationship and sharing a passionate kiss.

Source: 441565_Ultimate+Avengers+2%3A+Rise+of+the+Panther.json

Document 2:
On the platform, Stark goes to save Potts, and Rhodes saves the president. Stark summons each of his Iron Man suits, controlled remotely by J.A.R.V.I.S., to provide air support. Rhodes secures the president and leads him to safety, while Stark discovers Potts has survived the Extremis procedure. However, before he can save her, a rig 