In [1]:
import pandas as pd
import json
from langchain.prompts import PromptTemplate
from langchain.schema import Document
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.prompts import ChatPromptTemplate

In [2]:
# Load dataset
df = pd.read_csv('imdb_top_1000.csv')

# Convert rows to LangChain documents
documents = [
    Document(
        page_content="\n".join(f"{k}: {v}" for k, v in row.items())
    )
    for _, row in df.iterrows()
]

In [3]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

# Vector store with embeddings
embedding_function = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
vectorstore = FAISS.from_documents(documents, embedding_function)

  embedding_function = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")


In [None]:
format_instruction = """Respond with a JSON object containing:
- "ai_message": brief text response
- "movie_list": array of movie objects with "title", "description", "rating", "url\""""

template = f"""
You are a movie recommender expert. Use this context:
{{context}}

User question: {{question}}
Chat history: {{chat_history}}

{format_instruction}

Return ONLY valid JSON, no extra text or markdown.
"""

# Create question rephrasing prompt
condense_question_prompt = ChatPromptTemplate.from_template("""
Given this chat history:
{chat_history}

And a follow-up question: {question}
Rephrase the follow-up question to be standalone, incorporating any necessary context from the chat history.

Standalone question:""")

In [None]:
# Prompt Template
prompt = PromptTemplate(
    input_variables=["question", "chat_history" , "context"],  # Remove format_instruction
    template=template
)

# Memory Setup
memory = ConversationBufferMemory(
    memory_key="chat_history",
    return_messages=True,
    k=100,
    output_key="raw_output"
)

# LLM setup
llm = ChatGoogleGenerativeAI(
    google_api_key="AIzaSyCpg4ohn2ORebEQY_p7Fvwz1PB4yppxE5k",
    model="gemini-2.0-flash"
)

# QA Chain
qa_chain = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=vectorstore.as_retriever(),
    memory=memory,
    return_source_documents=True,
    output_key="raw_output",
    condense_question_prompt=condense_question_prompt,  # Use your structured prompt
    # verbose=True,  # <-- Add this line
    combine_docs_chain_kwargs={"prompt": prompt}  # <-- Correct parameter
)


  memory = ConversationBufferMemory(


In [6]:
def generate_answer(question: str) -> str:
    result = qa_chain({"question": question})
    print(question)
    result = result.get("raw_output")
    substring = result[8:-4]
    data = json.loads(substring)
    print('ai_message : ' , data["ai_message"])
    print('\n')
    print('movie_list : ')
    print('\n')
    for movie in data["movie_list"]:
        for key in movie:
            print( "    " ,key , " : " , movie[key])
        print('\n')

In [7]:
generate_answer("Which movies directed by Christopher Nolan are in the top 1000?")

  result = qa_chain({"question": question})


Which movies directed by Christopher Nolan are in the top 1000?
ai_message :  Here are some of the top-rated movies directed by Christopher Nolan that are in the top 1000:


movie_list : 


     title  :  The Prestige
     description  :  Two stage magicians engage in a battle to create the ultimate illusion.
     rating  :  8.5
     url  :  https://m.media-amazon.com/images/M/MV5BMjA4NDI0MTIxNF5BMl5BanBnXkFtZTYwNTM0MzY2._V1_UX67_CR0,0,67,98_AL_.jpg


     title  :  Memento
     description  :  A man with short-term memory loss attempts to track down his wife's murderer.
     rating  :  8.4
     url  :  https://m.media-amazon.com/images/M/MV5BZTcyNjk1MjgtOWI3Mi00YzQwLWI5MTktMzY4ZmI2NDAyNzYzXkEyXkFqcGdeQXVyNjU0OTQ0OTY@._V1_UX67_CR0,0,67,98_AL_.jpg


     title  :  Batman Begins
     description  :  Batman begins his fight to free crime-ridden Gotham City from corruption.
     rating  :  8.2
     url  :  https://m.media-amazon.com/images/M/MV5BOTY4YjI2N2MtYmFlMC00ZjcyLTg3YjEtMDQyM2ZjYzQ5

In [8]:
generate_answer("Which is the highest rated movie by that director?")


Which is the highest rated movie by that director?
ai_message :  The Christopher Nolan movie with the highest rating from the provided list is The Dark Knight.


movie_list : 


     title  :  The Dark Knight
     description  :  When the menace known as the Joker wreaks havoc and chaos on the people of Gotham, Batman must accept one of the greatest psychological and physical tests of his ability to fight injustice.
     rating  :  9.0
     url  :  https://m.media-amazon.com/images/M/MV5BMTMxNTMwODM0NF5BMl5BanBnXkFtZTcwODAyMTk2Mw@@._V1_UX67_CR0,0,67,98_AL_.jpg




In [9]:
generate_answer("What is decription of that movie?")


What is decription of that movie?
ai_message :  Here is the description of "The Dark Knight":


movie_list : 


     title  :  The Dark Knight
     description  :  When the menace known as the Joker wreaks havoc and chaos on the people of Gotham, Batman must accept one of the greatest psychological and physical tests of his ability to fight injustice.
     rating  :  9.0
     url  :  https://m.media-amazon.com/images/M/MV5BMTMxNTMwODM0NF5BMl5BanBnXkFtZTcwODAyMTk2Mw@@._V1_UX67_CR0,0,67,98_AL_.jpg




In [10]:
generate_answer("What is name of that movie?")

What is name of that movie?
ai_message :  The name of the highest-rated Christopher Nolan movie from the provided list is The Dark Knight.


movie_list : 


     title  :  The Dark Knight
     description  :  When the menace known as the Joker wreaks havoc and chaos on the people of Gotham, Batman must accept one of the greatest psychological and physical tests of his ability to fight injustice.
     rating  :  9.0
     url  :  https://m.media-amazon.com/images/M/MV5BMTMxNTMwODM0NF5BMl5BanBnXkFtZTcwODAyMTk2Mw@@._V1_UX67_CR0,0,67,98_AL_.jpg


