In [27]:
import pandas as pd
import os
from dotenv import load_dotenv
from langchain.embeddings import HuggingFaceEmbeddings
from pinecone import Pinecone as PineconeClient
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from langchain.vectorstores import Pinecone as PineconeLang
from langchain.prompts import PromptTemplate
from langchain import PromptTemplate
from langchain.chat_models import ChatOpenAI

In [28]:
# Load CSV
df = pd.read_csv("../data/imdb_movie_dataset.csv")

In [29]:
df.columns

Index(['Rank', 'Title', 'Genre', 'Description', 'Director', 'Actors', 'Year',
       'Runtime (Minutes)', 'Rating', 'Votes', 'Revenue (Millions)',
       'Metascore'],
      dtype='object')

In [30]:
# This function will make it easier for us to process the data that way we have all the information in one column
def create_movie_chunk(row):
    chunk = f"""
    Title: {row['Title']}
    Genre: {row['Genre']}
    Director: {row['Director']}
    Actors: {row['Actors']}
    Description: {row['Description']}
    Year: {row['Year']}
    Rating: {row['Rating']}/10
    Metascore: {row['Metascore']}/100
    """
    return chunk.strip()  # Remove leading/trailing whitespace

In [31]:
# Apply to the dataframe
df["chunk"] = df.apply(create_movie_chunk, axis=1)

# Preview a chunk, these will then each be transformed into a vector (each row  in the df its own vector)
print(df["chunk"][0])

Title: Guardians of the Galaxy
    Genre: Action,Adventure,Sci-Fi
    Director: James Gunn
    Actors: Chris Pratt, Vin Diesel, Bradley Cooper, Zoe Saldana
    Description: A group of intergalactic criminals are forced to work together to stop a fanatical warrior from taking control of the universe.
    Year: 2014
    Rating: 8.1/10
    Metascore: 76.0/100


In [32]:
# 1. Prepare Texts from our dataframe into one big chunk
texts = df["chunk"].tolist()  # Our structured movie chunks


In [34]:
from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(model='text-embedding-3-small')

FileNotFoundError: [Errno 2] No such file or directory

In [17]:
load_dotenv()
pinecone_api_key = os.getenv("PINECONE_API_KEY")
print(pinecone_api_key)

pcsk_5jRVqy_KMSsgNvdoujgzDxFbHULRx6JiMScHN9t95KswjU7g7qdfdWmpeUa4S8E4sfsHn5


In [None]:
# DO NOT RUN AS VECTOR DATABASE HAS ALREADY BEEN CREATED AND POPULATED
# This cell creates the indexes for each movie
pc = PineconeClient(api_key=pinecone_api_key)
index_name = "lumaa"
docsearch = PineconeLang.from_texts(
    texts=texts,
    embedding=embeddings,
    index_name=index_name,
)

In [None]:
#run this if pinecone indexes are already created and stored in our database (they have already been created for you.)
pc = PineconeClient(api_key=pinecone_api_key)
docsearch = PineconeLang.from_existing_index(index_name="lumaa", embedding=embeddings)

In [19]:
prompt_template="""

You are an expert movie recommender who can understand what the person is asking and what kind of movies they like based on the given information that they give you.
Context: {context}
Question: {question}

Return the helpful answer that gives them 5 total recommendations that they should look at. 
Helpful answer:
"""

In [None]:
# Binds our prompt to this varaible "PROMPT that will be fed to the llm later"
PROMPT = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
chain_type_kwargs = {"question_prompt": PROMPT}

In [21]:
memory = ConversationBufferMemory(
    memory_key="chat_history",  # This key is used in the prompt template for conversation history.
    return_messages=True         # Returns the chat history as a list of messages.
)

  memory = ConversationBufferMemory(


In [26]:
from langchain.chat_models import ChatOpenAI

API_KEY = os.getenv("OPENAI_API_KEY")

# This chain binds together both our llm and the vector database we made 

qa = ConversationalRetrievalChain.from_llm(
    llm = ChatOpenAI(model="gpt-4", temperature=0, openai_api_key=API_KEY),
    retriever=docsearch.as_retriever(search_kwargs={'k': 2}),
    memory=memory,
    combine_docs_chain_kwargs={"prompt": PROMPT},
)


PackageNotFoundError: No package metadata was found for openai

In [23]:
query = "I love romantic novels"
response = qa.run(query)
print(response)

  response = qa.run(query)


Based on your interest in romantic novels, I would recommend the following movies:

1. "The Notebook" - Directed by Nick Cassavetes, this movie is a classic romantic drama based on Nicholas Sparks' novel of the same name.

2. "Pride and Prejudice" - This is a beautiful adaptation of Jane Austen's classic novel, directed by Joe Wright.

3. "Bridget Jones's Diary" - A romantic comedy based on Helen Fielding's novel, directed by Sharon Maguire.

4. "Sense and Sensibility" - Another Jane Austen adaptation, this time directed by Ang Lee.

5. "Me Before You" - A romantic drama based on Jojo Moyes' novel, directed by Thea Sharrock.

These movies, like "Love, Rosie" and "P.S. I Love You", are all based on popular romantic novels and should be right up your alley.


In [25]:
#Conversational Chain that highlights the use of the Conversational Buffer Memmory

while True:
    query = input("You: ")  # Get user input

    if query.lower() in ["exit", "quit", "stop"]:  # Allow user to exit
        print("Goodbye!")
        break

    response = qa.run(query)  # Get response from the chatbot
    print("Bot:", response)


Goodbye!
