### Setup

In [14]:
# Make `scripts` importable and pull in embedder + endpoint
import sys, os

# ensure project root (one level up from notebooks/) is on sys.path
project_root = os.path.abspath(os.path.join(os.getcwd(), ".."))
if project_root not in sys.path:
    sys.path.append(project_root)

from scripts.process_data import LocalServerEmbeddings, EMBED_ENDPOINT

# instantiate with the endpoint from your script
embedder = LocalServerEmbeddings(endpoint=EMBED_ENDPOINT)

In [23]:
# Connect to existing ChromaDB store
import chromadb
from langchain_chroma import Chroma

# Configuration
COLLECTION_NAME = "marvel_films"
CHROMA_DIR = os.path.join(project_root, "data", "chroma", "chroma_wikipedia")

try:
    # Create ChromaDB client
    client = chromadb.PersistentClient(path=CHROMA_DIR)
    collection = client.get_collection(name=COLLECTION_NAME)
    total_direct = collection.count()
    print(f"Connected to ChromaDB - Total documents: {total_direct}")
    
    # Create LangChain Chroma vectorstore
    vectordb = Chroma(
        client=client,
        collection_name=COLLECTION_NAME,
        embedding_function=embedder,
    )

    # Create retriever from the vectorstore
    retriever = vectordb.as_retriever(
        search_type="similarity",
        search_kwargs={"k": 10}
    )
    
except Exception as e:
    print(f"Error setting up retriever: {e}")
    raise

Connected to ChromaDB - Total documents: 3417


In [17]:
# Use the ChatOpenAI from langchain_openai
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(
    model="gpt-3.5-turbo",    # or model_name if that’s the signature in your version
    temperature=0.0,
)

In [18]:
# Build the RetrievalQA chain and run your test question
from langchain.chains import RetrievalQA

qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
)

### Q&A testing

In [19]:
question = "In Avengers: Endgame, what sacrifice does Iron Man make?"
answer = qa(question)
print("Q:", question)
print("A:", answer)

Q: In Avengers: Endgame, what sacrifice does Iron Man make?
A: {'query': 'In Avengers: Endgame, what sacrifice does Iron Man make?', 'result': 'In Avengers: Endgame, Iron Man, portrayed by Robert Downey Jr., makes the ultimate sacrifice by using the Infinity Stones to snap his fingers and defeat Thanos. This act leads to his death, saving the universe but costing him his life.'}


In [20]:
question = "Who is Ant-Man?"
answer = qa(question)
print("Q:", question)
print("A:", answer)

Q: Who is Ant-Man?
A: {'query': 'Who is Ant-Man?', 'result': 'Ant-Man is a superhero character from Marvel Comics who can shrink in size while increasing in strength. In the Marvel Cinematic Universe, Ant-Man is portrayed by actor Paul Rudd as Scott Lang, a former electrical engineer turned petty criminal who acquires a suit that allows him to shrink or grow in scale. He works alongside Hank Pym, the original Ant-Man, to protect the shrinking technology and carry out heists with global consequences.'}


In [21]:
question = "Give me 10 members of the cast of Deadpool 2."
answer = qa(question)
print("Q:", question)
print("A:", answer)

Q: Give me 10 members of the cast of Deadpool 2.
A: {'query': 'Give me 10 members of the cast of Deadpool 2.', 'result': '1. Ryan Reynolds as Deadpool\n2. Josh Brolin as Cable\n3. Morena Baccarin as Vanessa Carlysle\n4. Julian Dennison\n5. Zazie Beetz\n6. T.J. Miller\n7. Brianna Hildebrand as Negasonic Teenage Warhead\n8. Jack Kesy\n9. Leslie Uggams as Blind Al\n10. Karan Soni as Dopinder'}


In [24]:
import os

# Define the user's question
query = "In Avengers: Endgame, what sacrifice does Iron Man make?"

relevant_docs = retriever.invoke(query)

# Display the relevant results with metadata
print("\n--- Relevant Documents ---")
for i, doc in enumerate(relevant_docs, 1):
    print(f"Document {i}:\n{doc.page_content}\n")
    print(f"Source: {doc.metadata['source']}\n")


--- Relevant Documents ---
Document 1:
Robert Downey Jr. as Tony Stark / Iron Man: The leader of a faction of Avengers in support of regulation; a self-described genius, billionaire, playboy, and philanthropist with electromechanical suits of armor of his own invention. Anthony Russo said that Stark's egomania allowed the writers "to bring him to a point in his life where he was willing to submit to an authority, where he felt it was the right thing to do." Joe Russo added that because of the visions Stark saw in Age of Ultron, he now has a guilt complex which "drives him to make very specific decisions," calling his emotional arc "very complicated". Downey's personal trainer Eric Oram stated that the trick to pitting Rogers against Stark, "is to show Iron Man using the 'minimum force' necessary to win the fight". Marvel initially wanted Downey's part to be smaller, but "Downey wanted Stark to have a more substantial role in the film's plot." Variety noted that Downey would receive $4