In [3]:
# data preprocessing
import pandas as pd

md = pd.read_csv('./movies_metadata.csv', low_memory=False)
md.head()

Unnamed: 0,adult,belongs_to_collection,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,...,release_date,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count
0,False,"{'id': 10194, 'name': 'Toy Story Collection', ...",30000000,"[{'id': 16, 'name': 'Animation'}, {'id': 35, '...",http://toystory.disney.com/toy-story,862,tt0114709,en,Toy Story,"Led by Woody, Andy's toys live happily in his ...",...,1995-10-30,373554033.0,81.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,,Toy Story,False,7.7,5415.0
1,False,,65000000,"[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...",,8844,tt0113497,en,Jumanji,When siblings Judy and Peter discover an encha...,...,1995-12-15,262797249.0,104.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,Roll the dice and unleash the excitement!,Jumanji,False,6.9,2413.0
2,False,"{'id': 119050, 'name': 'Grumpy Old Men Collect...",0,"[{'id': 10749, 'name': 'Romance'}, {'id': 35, ...",,15602,tt0113228,en,Grumpier Old Men,A family wedding reignites the ancient feud be...,...,1995-12-22,0.0,101.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Still Yelling. Still Fighting. Still Ready for...,Grumpier Old Men,False,6.5,92.0
3,False,,16000000,"[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...",,31357,tt0114885,en,Waiting to Exhale,"Cheated on, mistreated and stepped on, the wom...",...,1995-12-22,81452156.0,127.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Friends are the people who let you be yourself...,Waiting to Exhale,False,6.1,34.0
4,False,"{'id': 96871, 'name': 'Father of the Bride Col...",0,"[{'id': 35, 'name': 'Comedy'}]",,11862,tt0113041,en,Father of the Bride Part II,Just when George Banks has recovered from his ...,...,1995-02-10,76578911.0,106.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Just When His World Is Back To Normal... He's ...,Father of the Bride Part II,False,5.7,173.0


In [4]:
import ast

# Convert string representation of dictionaries to actual dictionaries
md['genres'] = md['genres'].apply(ast.literal_eval)

# Transforming the 'genres' column
md['genres'] = md['genres'].apply(lambda x: [genre['name'] for genre in x])

md.head()

Unnamed: 0,adult,belongs_to_collection,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,...,release_date,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count
0,False,"{'id': 10194, 'name': 'Toy Story Collection', ...",30000000,"[Animation, Comedy, Family]",http://toystory.disney.com/toy-story,862,tt0114709,en,Toy Story,"Led by Woody, Andy's toys live happily in his ...",...,1995-10-30,373554033.0,81.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,,Toy Story,False,7.7,5415.0
1,False,,65000000,"[Adventure, Fantasy, Family]",,8844,tt0113497,en,Jumanji,When siblings Judy and Peter discover an encha...,...,1995-12-15,262797249.0,104.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,Roll the dice and unleash the excitement!,Jumanji,False,6.9,2413.0
2,False,"{'id': 119050, 'name': 'Grumpy Old Men Collect...",0,"[Romance, Comedy]",,15602,tt0113228,en,Grumpier Old Men,A family wedding reignites the ancient feud be...,...,1995-12-22,0.0,101.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Still Yelling. Still Fighting. Still Ready for...,Grumpier Old Men,False,6.5,92.0
3,False,,16000000,"[Comedy, Drama, Romance]",,31357,tt0114885,en,Waiting to Exhale,"Cheated on, mistreated and stepped on, the wom...",...,1995-12-22,81452156.0,127.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Friends are the people who let you be yourself...,Waiting to Exhale,False,6.1,34.0
4,False,"{'id': 96871, 'name': 'Father of the Bride Col...",0,[Comedy],,11862,tt0113041,en,Father of the Bride Part II,Just when George Banks has recovered from his ...,...,1995-02-10,76578911.0,106.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Just When His World Is Back To Normal... He's ...,Father of the Bride Part II,False,5.7,173.0


In [5]:
# Calculate weighted rate (IMDb formula)
def calculate_weighted_rate(vote_average, vote_count, min_vote_count=10):
    return (vote_count / (vote_count + min_vote_count)) * vote_average + (
            min_vote_count / (vote_count + min_vote_count)) * 5.0


# Minimum vote count to prevent skewed results
vote_counts = md[md['vote_count'].notnull()]['vote_count'].astype('int')
min_vote_count = vote_counts.quantile(0.95)

# Create a new column 'weighted_rate'
md['weighted_rate'] = md.apply(
    lambda row: calculate_weighted_rate(row['vote_average'], row['vote_count'], min_vote_count), axis=1)
md.head()

Unnamed: 0,adult,belongs_to_collection,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,...,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count,weighted_rate
0,False,"{'id': 10194, 'name': 'Toy Story Collection', ...",30000000,"[Animation, Comedy, Family]",http://toystory.disney.com/toy-story,862,tt0114709,en,Toy Story,"Led by Woody, Andy's toys live happily in his ...",...,373554033.0,81.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,,Toy Story,False,7.7,5415.0,7.499658
1,False,,65000000,"[Adventure, Fantasy, Family]",,8844,tt0113497,en,Jumanji,When siblings Judy and Peter discover an encha...,...,262797249.0,104.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,Roll the dice and unleash the excitement!,Jumanji,False,6.9,2413.0,6.610362
2,False,"{'id': 119050, 'name': 'Grumpy Old Men Collect...",0,"[Romance, Comedy]",,15602,tt0113228,en,Grumpier Old Men,A family wedding reignites the ancient feud be...,...,0.0,101.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Still Yelling. Still Fighting. Still Ready for...,Grumpier Old Men,False,6.5,92.0,5.262357
3,False,,16000000,"[Comedy, Drama, Romance]",,31357,tt0114885,en,Waiting to Exhale,"Cheated on, mistreated and stepped on, the wom...",...,81452156.0,127.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Friends are the people who let you be yourself...,Waiting to Exhale,False,6.1,34.0,5.079915
4,False,"{'id': 96871, 'name': 'Father of the Bride Col...",0,[Comedy],,11862,tt0113041,en,Father of the Bride Part II,Just when George Banks has recovered from his ...,...,76578911.0,106.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Just When His World Is Back To Normal... He's ...,Father of the Bride Part II,False,5.7,173.0,5.199506


In [6]:
md = md.dropna()
md_final = md[['genres', 'title', 'overview', 'weighted_rate']].reset_index(drop=True)
md_final.head()

Unnamed: 0,genres,title,overview,weighted_rate
0,"[Adventure, Action, Thriller]",GoldenEye,James Bond must unmask the mysterious head of ...,6.173464
1,[Comedy],Friday,Craig and Smokey are two guys in Los Angeles h...,6.083421
2,"[Horror, Action, Thriller, Crime]",From Dusk Till Dawn,Seth Gecko and his younger brother Richard are...,6.503176
3,[Comedy],Blue in the Face,"Auggie runs a small tobacco shop in Brooklyn, ...",5.109091
4,"[Action, Adventure, Science Fiction, Family, F...",Mighty Morphin Power Rangers: The Movie,Power up with six incredible teens who out-man...,5.052129


In [11]:
# Create a new column by combining 'title', 'overview', and 'genre'
md_final['combined_info'] = md_final.apply(lambda
                                               row: f"Title: {row['title']}. Overview: {row['overview']} Genres: {', '.join(row['genres'])}. Rating: {row['weighted_rate']}",
                                           axis=1)
md_final['combined_info'][2]

"Title: From Dusk Till Dawn. Overview: Seth Gecko and his younger brother Richard are on the run after a bloody bank robbery in Texas. They escape across the border into Mexico and will be home-free the next morning, when they pay off the local kingpin. They just have to survive 'from dusk till dawn' at the rendezvous point, which turns out to be a Hell of a strip joint. Genres: Horror, Action, Thriller, Crime. Rating: 6.503176130895092"

In [12]:
# Embeddings
import tiktoken
from openai import OpenAI

# embedding model parameters
embedding_model = "text-embedding-ada-002"
embedding_encoding = "cl100k_base"  # Tokenizer name of OpenAI Embedding API for text-embedding-ada-002
max_tokens = 8000  # the maximum for text-embedding-ada-002 is 8191

client = OpenAI()

encoding = tiktoken.get_encoding(embedding_encoding)


def get_embedding(text, engine=embedding_model):
    text = text.replace("\n", " ")
    return client.embeddings.create(input=[text], model=engine).data[0].embedding


# omit reviews that are too long to embed
md_final["n_tokens"] = md_final.combined_info.apply(lambda x: len(encoding.encode(x)))
md_final = md_final[md_final.n_tokens <= max_tokens]
len(md_final)

693

In [13]:
md_final.head()

Unnamed: 0,genres,title,overview,weighted_rate,combined_info,n_tokens
0,"[Adventure, Action, Thriller]",GoldenEye,James Bond must unmask the mysterious head of ...,6.173464,Title: GoldenEye. Overview: James Bond must un...,59
1,[Comedy],Friday,Craig and Smokey are two guys in Los Angeles h...,6.083421,Title: Friday. Overview: Craig and Smokey are ...,52
2,"[Horror, Action, Thriller, Crime]",From Dusk Till Dawn,Seth Gecko and his younger brother Richard are...,6.503176,Title: From Dusk Till Dawn. Overview: Seth Gec...,105
3,[Comedy],Blue in the Face,"Auggie runs a small tobacco shop in Brooklyn, ...",5.109091,Title: Blue in the Face. Overview: Auggie runs...,87
4,"[Action, Adventure, Science Fiction, Family, F...",Mighty Morphin Power Rangers: The Movie,Power up with six incredible teens who out-man...,5.052129,Title: Mighty Morphin Power Rangers: The Movie...,89


In [14]:
md_final["embedding"] = md_final.overview.apply(lambda x: get_embedding(x, engine=embedding_model))
md_final.head()

Unnamed: 0,genres,title,overview,weighted_rate,combined_info,n_tokens,embedding
0,"[Adventure, Action, Thriller]",GoldenEye,James Bond must unmask the mysterious head of ...,6.173464,Title: GoldenEye. Overview: James Bond must un...,59,"[-0.023320907726883888, -0.016039660200476646,..."
1,[Comedy],Friday,Craig and Smokey are two guys in Los Angeles h...,6.083421,Title: Friday. Overview: Craig and Smokey are ...,52,"[0.001543616526760161, -0.01077578030526638, -..."
2,"[Horror, Action, Thriller, Crime]",From Dusk Till Dawn,Seth Gecko and his younger brother Richard are...,6.503176,Title: From Dusk Till Dawn. Overview: Seth Gec...,105,"[-0.008703107945621014, -0.004671914037317038,..."
3,[Comedy],Blue in the Face,"Auggie runs a small tobacco shop in Brooklyn, ...",5.109091,Title: Blue in the Face. Overview: Auggie runs...,87,"[-0.020313598215579987, -0.012282016687095165,..."
4,"[Action, Adventure, Science Fiction, Family, F...",Mighty Morphin Power Rangers: The Movie,Power up with six incredible teens who out-man...,5.052129,Title: Mighty Morphin Power Rangers: The Movie...,89,"[-0.0038929283618927, -0.03924328088760376, -0..."


In [15]:
md_final.rename(columns={'embedding': 'vector'}, inplace=True)
md_final.rename(columns={'combined_info': 'text'}, inplace=True)
md_final.to_pickle('./movies.pkl')

In [16]:
# Start working with LLMs
md = pd.read_pickle('movies.pkl')
md.head(2)

Unnamed: 0,genres,title,overview,weighted_rate,text,n_tokens,vector
0,"[Adventure, Action, Thriller]",GoldenEye,James Bond must unmask the mysterious head of ...,6.173464,Title: GoldenEye. Overview: James Bond must un...,59,"[-0.023320907726883888, -0.016039660200476646,..."
1,[Comedy],Friday,Craig and Smokey are two guys in Los Angeles h...,6.083421,Title: Friday. Overview: Craig and Smokey are ...,52,"[0.001543616526760161, -0.01077578030526638, -..."


In [18]:
md['text'][1]

'Title: Friday. Overview: Craig and Smokey are two guys in Los Angeles hanging out on their porch on a Friday afternoon, smoking and drinking, looking for something to do. Genres: Comedy. Rating: 6.083421330517423'

In [28]:
md

Unnamed: 0,genres,title,overview,weighted_rate,text,n_tokens,vector,metadata
0,"[Adventure, Action, Thriller]",GoldenEye,James Bond must unmask the mysterious head of ...,6.173464,Title: GoldenEye. Overview: James Bond must un...,59,"[-0.023320907726883888, -0.016039660200476646,...","{'genres': ['Adventure', 'Action', 'Thriller']..."
1,[Comedy],Friday,Craig and Smokey are two guys in Los Angeles h...,6.083421,Title: Friday. Overview: Craig and Smokey are ...,52,"[0.001543616526760161, -0.01077578030526638, -...","{'genres': ['Comedy'], 'title': 'Friday', 'ove..."
2,"[Horror, Action, Thriller, Crime]",From Dusk Till Dawn,Seth Gecko and his younger brother Richard are...,6.503176,Title: From Dusk Till Dawn. Overview: Seth Gec...,105,"[-0.008703107945621014, -0.004671914037317038,...","{'genres': ['Horror', 'Action', 'Thriller', 'C..."
3,[Comedy],Blue in the Face,"Auggie runs a small tobacco shop in Brooklyn, ...",5.109091,Title: Blue in the Face. Overview: Auggie runs...,87,"[-0.020313598215579987, -0.012282016687095165,...","{'genres': ['Comedy'], 'title': 'Blue in the F..."
4,"[Action, Adventure, Science Fiction, Family, F...",Mighty Morphin Power Rangers: The Movie,Power up with six incredible teens who out-man...,5.052129,Title: Mighty Morphin Power Rangers: The Movie...,89,"[-0.0038929283618927, -0.03924328088760376, -0...","{'genres': ['Action', 'Adventure', 'Science Fi..."
...,...,...,...,...,...,...,...,...
688,"[Drama, Science Fiction, War]",War for the Planet of the Apes,Caesar and his apes are forced into a deadly c...,6.350166,Title: War for the Planet of the Apes. Overvie...,124,"[0.005834217183291912, -0.031243767589330673, ...","{'genres': ['Drama', 'Science Fiction', 'War']..."
689,[Comedy],Goon: Last of the Enforcers,"During a pro lockout, Doug ""The Thug"" Glatt is...",5.074627,Title: Goon: Last of the Enforcers. Overview: ...,75,"[-0.02065904252231121, -0.0281628780066967, -0...","{'genres': ['Comedy'], 'title': 'Goon: Last of..."
690,"[Adventure, Fantasy, Animation, Action, Family]",Pokémon: Spell of the Unknown,When Molly Hale's sadness of her father's disa...,5.249135,Title: Pokémon: Spell of the Unknown. Overview...,112,"[0.011081325821578503, -0.016071241348981857, ...","{'genres': ['Adventure', 'Fantasy', 'Animation..."
691,"[Action, Science Fiction, Thriller, Adventure]",Transformers: The Last Knight,"Autobots and Decepticons are at war, with huma...",5.922092,Title: Transformers: The Last Knight. Overview...,80,"[0.003477891441434622, -0.05650625005364418, -...","{'genres': ['Action', 'Science Fiction', 'Thri..."


In [29]:
# To avoid the error 'KeyError: 'Field "metadata" does not exist in schema', add a metadata field
md['metadata'] = md.apply(lambda row: {'genres': row['genres'], 'title': row['title'], 'overview': row['overview'],
                                       'weighted_rate': row['weighted_rate']}, axis=1)

In [44]:
md

Unnamed: 0,genres,title,overview,weighted_rate,text,n_tokens,vector,metadata
0,"[Adventure, Action, Thriller]",GoldenEye,James Bond must unmask the mysterious head of ...,6.173464,Title: GoldenEye. Overview: James Bond must un...,59,"[-0.023320907726883888, -0.016039660200476646,...","{'genres': ['Adventure', 'Action', 'Thriller']..."
1,[Comedy],Friday,Craig and Smokey are two guys in Los Angeles h...,6.083421,Title: Friday. Overview: Craig and Smokey are ...,52,"[0.001543616526760161, -0.01077578030526638, -...","{'genres': ['Comedy'], 'title': 'Friday', 'ove..."
2,"[Horror, Action, Thriller, Crime]",From Dusk Till Dawn,Seth Gecko and his younger brother Richard are...,6.503176,Title: From Dusk Till Dawn. Overview: Seth Gec...,105,"[-0.008703107945621014, -0.004671914037317038,...","{'genres': ['Horror', 'Action', 'Thriller', 'C..."
3,[Comedy],Blue in the Face,"Auggie runs a small tobacco shop in Brooklyn, ...",5.109091,Title: Blue in the Face. Overview: Auggie runs...,87,"[-0.020313598215579987, -0.012282016687095165,...","{'genres': ['Comedy'], 'title': 'Blue in the F..."
4,"[Action, Adventure, Science Fiction, Family, F...",Mighty Morphin Power Rangers: The Movie,Power up with six incredible teens who out-man...,5.052129,Title: Mighty Morphin Power Rangers: The Movie...,89,"[-0.0038929283618927, -0.03924328088760376, -0...","{'genres': ['Action', 'Adventure', 'Science Fi..."
...,...,...,...,...,...,...,...,...
688,"[Drama, Science Fiction, War]",War for the Planet of the Apes,Caesar and his apes are forced into a deadly c...,6.350166,Title: War for the Planet of the Apes. Overvie...,124,"[0.005834217183291912, -0.031243767589330673, ...","{'genres': ['Drama', 'Science Fiction', 'War']..."
689,[Comedy],Goon: Last of the Enforcers,"During a pro lockout, Doug ""The Thug"" Glatt is...",5.074627,Title: Goon: Last of the Enforcers. Overview: ...,75,"[-0.02065904252231121, -0.0281628780066967, -0...","{'genres': ['Comedy'], 'title': 'Goon: Last of..."
690,"[Adventure, Fantasy, Animation, Action, Family]",Pokémon: Spell of the Unknown,When Molly Hale's sadness of her father's disa...,5.249135,Title: Pokémon: Spell of the Unknown. Overview...,112,"[0.011081325821578503, -0.016071241348981857, ...","{'genres': ['Adventure', 'Fantasy', 'Animation..."
691,"[Action, Science Fiction, Thriller, Adventure]",Transformers: The Last Knight,"Autobots and Decepticons are at war, with huma...",5.922092,Title: Transformers: The Last Knight. Overview...,80,"[0.003477891441434622, -0.05650625005364418, -...","{'genres': ['Action', 'Science Fiction', 'Thri..."


In [30]:
import lancedb

uri = "data/sample-lancedb"
db = lancedb.connect(uri)
table = db.create_table("movies", md)

In [27]:
# db.drop_table('movies')

In [31]:
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import LanceDB
from langchain.llms import OpenAI
from langchain.chains import RetrievalQA

embeddings = OpenAIEmbeddings()
docsearch = LanceDB(connection=db, embedding=embeddings, table_name="movies")

In [32]:
query = "I'm looking for an animated action movie. What could you suggest to me?"
docs = docsearch.similarity_search(query)
docs

[Document(metadata={'genres': ['Action', 'Crime', 'Thriller'], 'overview': 'An assassin teams up with a woman to help her find her father and uncover the mysteries of her ancestry.', 'title': 'Hitman: Agent 47', 'weighted_rate': 5.365800865800866}, page_content='Title: Hitman: Agent 47. Overview: An assassin teams up with a woman to help her find her father and uncover the mysteries of her ancestry. Genres: Action, Crime, Thriller. Rating: 5.365800865800866'),
 Document(metadata={'genres': ['Action', 'Thriller'], 'overview': 'Iconoclastic, take-no-prisoners cop John McClane, finds himself for the first time on foreign soil after traveling to Moscow to help his wayward son Jack - unaware that Jack is really a highly-trained CIA operative out to stop a nuclear weapons heist. With the Russian underworld in pursuit, and battling a countdown to war, the two McClanes discover that their opposing methods make them unstoppable heroes.', 'title': 'A Good Day to Die Hard', 'weighted_rate': 5.178

In [40]:
docs[0].page_content

'Title: Hitman: Agent 47. Overview: An assassin teams up with a woman to help her find her father and uncover the mysteries of her ancestry. Genres: Action, Crime, Thriller. Rating: 5.365800865800866'

In [45]:
qa = RetrievalQA.from_chain_type(
    llm=OpenAI(),
    chain_type="stuff",
    retriever=docsearch.as_retriever(),
    return_source_documents=True  # True = source chk is available 
)

query = "I'm looking for an animated action movie. What could you suggest to me?"
result = qa({"query": query})
result['result']

' Ice Age: Dawn of the Dinosaurs or Transformers could be good options for an animated action movie.'

In [48]:
result['source_documents']

[Document(metadata={'genres': ['Action', 'Crime', 'Thriller'], 'overview': 'An assassin teams up with a woman to help her find her father and uncover the mysteries of her ancestry.', 'title': 'Hitman: Agent 47', 'weighted_rate': 5.365800865800866}, page_content='Title: Hitman: Agent 47. Overview: An assassin teams up with a woman to help her find her father and uncover the mysteries of her ancestry. Genres: Action, Crime, Thriller. Rating: 5.365800865800866'),
 Document(metadata={'genres': ['Action', 'Thriller'], 'overview': 'Iconoclastic, take-no-prisoners cop John McClane, finds himself for the first time on foreign soil after traveling to Moscow to help his wayward son Jack - unaware that Jack is really a highly-trained CIA operative out to stop a nuclear weapons heist. With the Russian underworld in pursuit, and battling a countdown to war, the two McClanes discover that their opposing methods make them unstoppable heroes.', 'title': 'A Good Day to Die Hard', 'weighted_rate': 5.178

In [49]:
# NOTI: Filtering Method 1st Example
# NOTI: option : search_kwargs={'data': df_filtered}
df_filtered = md[md['genres'].apply(lambda x: 'Comedy' in x)]
qa = RetrievalQA.from_chain_type(
    llm=OpenAI(),
    chain_type="stuff",
    retriever=docsearch.as_retriever(
        search_kwargs={'data': df_filtered}
    ),
    return_source_documents=True)

query = "I'm looking for a movie with animals and an adventurous plot."
result = qa({"query": query})
result

{'query': "I'm looking for a movie with animals and an adventurous plot.",
 'result': ' Ice Age and The Curse of the Were-Rabbit both have animals and adventurous plots.',
 'source_documents': [Document(metadata={'genres': ['Comedy', 'Family'], 'overview': 'The ongoing war between the canine and feline species is put on hold when they join forces to thwart a rogue cat spy with her own sinister plans for conquest.', 'title': 'Cats & Dogs 2 : The Revenge of Kitty Galore', 'weighted_rate': 4.978057553956835}, page_content='Title: Cats & Dogs 2 : The Revenge of Kitty Galore. Overview: The ongoing war between the canine and feline species is put on hold when they join forces to thwart a rogue cat spy with her own sinister plans for conquest. Genres: Comedy, Family. Rating: 4.978057553956835'),
  Document(metadata={'genres': ['Adventure', 'Animation', 'Comedy', 'Family'], 'overview': "Cheese-loving eccentric Wallace and his cunning canine pal, Gromit, investigate a mystery in Nick Park's ani

In [50]:
result['result']

' Ice Age and The Curse of the Were-Rabbit both have animals and adventurous plots.'

In [51]:
# NOTI: Filtering Method 2nd Example
# NOTI: option : search_kwargs={'filter': "weighted_rate > 6.5"}
qa = RetrievalQA.from_chain_type(
    llm=OpenAI(),
    chain_type="stuff",
    retriever=docsearch.as_retriever(
        search_kwargs={'filter': "weighted_rate > 6.5"}
    ),
    return_source_documents=True
)

query = "I'm looking for a movie with animals and an adventurous plot."
result = qa({"query": query})
result

{'query': "I'm looking for a movie with animals and an adventurous plot.",
 'result': ' Ice Age.',
 'source_documents': [Document(metadata={'genres': ['Animation', 'Comedy', 'Family', 'Adventure'], 'overview': 'With the impending ice age almost upon them, a mismatched trio of prehistoric critters – Manny the woolly mammoth, Diego the saber-toothed tiger and Sid the giant sloth – find an orphaned infant and decide to return it to its human parents. Along the way, the unlikely allies become friends but, when enemies attack, their quest takes on far nobler aims.', 'title': 'Ice Age', 'weighted_rate': 6.892297174111213}, page_content='Title: Ice Age. Overview: With the impending ice age almost upon them, a mismatched trio of prehistoric critters – Manny the woolly mammoth, Diego the saber-toothed tiger and Sid the giant sloth – find an orphaned infant and decide to return it to its human parents. Along the way, the unlikely allies become friends but, when enemies attack, their quest takes 

In [52]:
result['result']

' Ice Age.'

In [53]:
# NOTI: Agent Method
from langchain.agents.agent_toolkits import create_retriever_tool, create_conversational_retrieval_agent
from langchain.chat_models import ChatOpenAI

llm = ChatOpenAI(temperature=0)
retriever = docsearch.as_retriever(return_source_documents=True)

tool = create_retriever_tool(
    retriever,
    "movies",
    "Searches and returns recommendations about movies."
)

tools = [tool]

agent_executor = create_conversational_retrieval_agent(llm, tools, verbose=True)

result = agent_executor({"input": "suggest me some action movies"})  # utilizing user input

  llm = ChatOpenAI(temperature=0)




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `movies` with `{'query': 'action'}`


[0m[36;1m[1;3mTitle: [REC]². Overview: The action continues from [REC], with the medical officer and a SWAT team outfitted with video cameras are sent into the sealed off apartment to control the situation. Genres: Thriller, Horror. Rating: 5.800000000000001

Title: The Boondock Saints. Overview: With a God-inspired moral obligation to act against evil, twin brothers Conner and Murphy set out to rid Boston of criminals. However, rather than working within the system, these Irish Americans decide to take swift retribution into their own hands. Genres: Action, Thriller, Crime. Rating: 6.455226209048361

Title: The Gamers. Overview: Sent on a quest to rescue a princess, four clueless players must guide their players through dangerous forests, ancient ruins, and past the girl next door. Genres: Action, Comedy, Thriller, Foreign. Rating: 5.049440715883669

Title: Atlas Shrugged P

In [55]:
result = agent_executor({"input": "I liked a lot kung fu panda 1 and 2. Could you suggest me some similar movies?"})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `movies` with `{'query': 'kung fu panda'}`


[0m[36;1m[1;3mTitle: Kung Fu Panda. Overview: When the Valley of Peace is threatened, lazy Po the panda discovers his destiny as the "chosen one" and trains to become a kung fu hero, but transforming the unsleek slacker into a brave warrior won't be easy. It's up to Master Shifu and the Furious Five -- Tigress, Crane, Mantis, Viper and Monkey -- to give it a try. Genres: Adventure, Animation, Family, Comedy. Rating: 6.675006821282402

Title: Kung Fu Panda 2. Overview: Po is now living his dream as The Dragon Warrior, protecting the Valley of Peace alongside his friends and fellow kung fu masters, The Furious Five - Tigress, Crane, Mantis, Viper and Monkey. But Po’s new life of awesomeness is threatened by the emergence of a formidable villain, who plans to use a secret, unstoppable weapon to conquer China and destroy kung fu. It is up to Po and The Furious Five to jou

In [56]:
# Prompt engineering

# NOTI: to check current prompt
print(qa.combine_documents_chain.llm_chain.prompt.template)

Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

{context}

Question: {question}
Helpful Answer:


In [58]:
from langchain.prompts import PromptTemplate

template = """You are a movie recommender system that help users to find movies that match their preferences. 
Use the following pieces of context to answer the question at the end. 
For each question, suggest three movies, with a short description of the plot and the reason why the user migth like it.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

{context}

Question: {question}
Your response:"""

PROMPT = PromptTemplate(
    template=template, input_variables=["context", "question"]
)

chain_type_kwargs = {"prompt": PROMPT}
qa = RetrievalQA.from_chain_type(
    llm=OpenAI(),
    chain_type="stuff",
    retriever=docsearch.as_retriever(),
    return_source_documents=True,
    chain_type_kwargs=chain_type_kwargs
)

query = "I'm looking for a funny animated movie, any suggestion?"
result = qa({'query': query})
print(result['result'])


1. The Lego Movie - When an ordinary LEGO construction worker is mistakenly identified as the prophesied "special", he is recruited to join a quest to stop an evil tyrant from gluing the LEGO universe into eternal stasis. This movie is filled with clever jokes and humor that will appeal to both kids and adults.

2. Zootopia - In a city of anthropomorphic animals, a rookie bunny cop and a cynical con artist fox must work together to uncover a conspiracy that threatens the peace in their city. This movie has a great balance of witty humor and heartfelt messages that will leave you laughing and touched.

3. Finding Nemo - After his son is captured in the Great Barrier Reef and taken to Sydney, a timid clownfish sets out on a journey to bring him home. Along the way, he meets a forgetful blue tang fish and they embark on a hilarious adventure. This movie has a perfect blend of humor, heart, and stunning animation.


In [60]:
# NOTI: add more user info for more specific movie recommendation in prompt
from langchain.prompts import PromptTemplate

template_prefix = """You are a movie recommender system that help users to find movies that match their preferences. 
Use the following pieces of context to answer the question at the end. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.

{context}"""

user_info = """This is what we know about the user, and you can use this information to better tune your research:
Age: {age}
Gender: {gender}"""

template_suffix = """Question: {question}
Your response:"""

user_info = user_info.format(age=28, gender='female')

COMBINED_PROMPT = template_prefix + '\n' + user_info + '\n' + template_suffix
print(COMBINED_PROMPT)

You are a movie recommender system that help users to find movies that match their preferences. 
Use the following pieces of context to answer the question at the end. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.

{context}
This is what we know about the user, and you can use this information to better tune your research:
Age: 28
Gender: female
Question: {question}
Your response:


In [63]:
PROMPT = PromptTemplate(
    template=COMBINED_PROMPT, input_variables=["context", "question"]
)

chain_type_kwargs = {"prompt": PROMPT}
qa = RetrievalQA.from_chain_type(
    llm=OpenAI(),
    chain_type="stuff",
    retriever=docsearch.as_retriever(),
    return_source_documents=True,
    chain_type_kwargs=chain_type_kwargs
)

query = "Can you suggest me some horror movies?"
result = qa({'query': query})
result['result']

' Based on your age and gender, some horror movies that you might enjoy are "Halloween", "Fright Night", and "A Nightmare on Elm Street." These films have received good ratings and fall under the horror genre, which you have expressed an interest in. You may also enjoy "Boogeyman 2", which has a similar rating and falls under the horror and thriller genres.'

In [64]:
result

{'query': 'Can you suggest me some horror movies?',
 'result': ' Based on your age and gender, some horror movies that you might enjoy are "Halloween", "Fright Night", and "A Nightmare on Elm Street." These films have received good ratings and fall under the horror genre, which you have expressed an interest in. You may also enjoy "Boogeyman 2", which has a similar rating and falls under the horror and thriller genres.',
 'source_documents': [Document(metadata={'genres': ['Horror', 'Thriller'], 'overview': "In John Carpenter's horror classic, a psychotic murderer, institutionalized since childhood for the murder of his sister, escapes and stalks a bookish teenage girl and her friends while his doctor chases him through the streets.", 'title': 'Halloween', 'weighted_rate': 6.7056000000000004}, page_content="Title: Halloween. Overview: In John Carpenter's horror classic, a psychotic murderer, institutionalized since childhood for the murder of his sister, escapes and stalks a bookish tee

In [65]:
result['source_documents']

[Document(metadata={'genres': ['Horror', 'Thriller'], 'overview': "In John Carpenter's horror classic, a psychotic murderer, institutionalized since childhood for the murder of his sister, escapes and stalks a bookish teenage girl and her friends while his doctor chases him through the streets.", 'title': 'Halloween', 'weighted_rate': 6.7056000000000004}, page_content="Title: Halloween. Overview: In John Carpenter's horror classic, a psychotic murderer, institutionalized since childhood for the murder of his sister, escapes and stalks a bookish teenage girl and her friends while his doctor chases him through the streets. Genres: Horror, Thriller. Rating: 6.7056000000000004"),
 Document(metadata={'genres': ['Horror', 'Comedy'], 'overview': 'A teenager suspects his new neighbour is a vampire. Unable to convince anyone, he tries to enlist the help of a self-proclaimed vampire hunter and magician in this remake of the 1985 comedy-horror classic.', 'title': 'Fright Night', 'weighted_rate': 

In [66]:
# Content based Recommendation
import pandas as pd

data = {
    "username": ["Alice", "Bob"],
    "age": [25, 32],
    "gender": ["F", "M"],
    "movies": [
        [("Transformers: The Last Knight", 7), ("Pokémon: Spell of the Unknown", 5)],
        [("Bon Cop Bad Cop 2", 8), ("Goon: Last of the Enforcers", 9)]
    ]
}

# Convert the "movies" column into dictionaries
for i, row_movies in enumerate(data["movies"]):
    movie_dict = {}
    for movie, rating in row_movies:
        movie_dict[movie] = rating
    data["movies"][i] = movie_dict

# Create a pandas DataFrame
df = pd.DataFrame(data)

df.head()

Unnamed: 0,username,age,gender,movies
0,Alice,25,F,"{'Transformers: The Last Knight': 7, 'Pokémon:..."
1,Bob,32,M,"{'Bon Cop Bad Cop 2': 8, 'Goon: Last of the En..."


In [67]:
template_prefix = """You are a movie recommender system that help users to find movies that match their preferences. 
Use the following pieces of context to answer the question at the end. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.

{context}"""

user_info = """This is what we know about the user, and you can use this information to better tune your research:
Age: {age}
Gender: {gender}
Movies already seen alongside with rating: {movies}"""

template_suffix = """Question: {question}
Your response:"""

In [68]:
age = df.loc[df['username'] == 'Alice']['age'][0]
gender = df.loc[df['username'] == 'Alice']['gender'][0]

movies = ''
# Iterate over the dictionary and output movie name and rating
for movie, rating in df['movies'][0].items():
    output_string = f"Movie: {movie}, Rating: {rating}" + "\n"
    movies += output_string
    #print(output_string)

user_info = user_info.format(age=age, gender=gender, movies=movies)

COMBINED_PROMPT = template_prefix + '\n' + user_info + '\n' + template_suffix
print(COMBINED_PROMPT)

You are a movie recommender system that help users to find movies that match their preferences. 
Use the following pieces of context to answer the question at the end. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.

{context}
This is what we know about the user, and you can use this information to better tune your research:
Age: 25
Gender: F
Movies already seen alongside with rating: Movie: Transformers: The Last Knight, Rating: 7
Movie: Pokémon: Spell of the Unknown, Rating: 5

Question: {question}
Your response:


In [69]:
PROMPT = PromptTemplate(
    template=COMBINED_PROMPT, input_variables=["context", "question"]
)

chain_type_kwargs = {"prompt": PROMPT}
qa = RetrievalQA.from_chain_type(
    llm=OpenAI(),
    chain_type="stuff",
    retriever=docsearch.as_retriever(),
    return_source_documents=True,
    chain_type_kwargs=chain_type_kwargs
)

query = "Can you suggest me some action movie based on my background?"
result = qa({'query': query})
result['result']

' Based on your background, I would recommend the following action movies: \n1. Mad Max: Fury Road - This intense and visually stunning film follows a group of rebels trying to escape from a tyrannical ruler in a post-apocalyptic world. (Rating: 7.5)\n2. Wonder Woman - This empowering superhero film follows the journey of a powerful warrior princess as she fights for justice and peace during World War I. (Rating: 7.4)\n3. John Wick - This fast-paced and action-packed film follows a former hitman who seeks revenge against the people who killed his dog. (Rating: 7.4)\n4. Atomic Blonde - Set in the late 1980s, this spy thriller follows a top-level MI6 agent as she navigates a dangerous mission in Berlin. (Rating: 6.7)\n5. Red Sparrow - This espionage film follows a former ballerina who becomes a trained seductress for the Russian intelligence service. (Rating: 6.6)'

In [70]:
result['source_documents']

[Document(metadata={'genres': ['Action', 'Crime', 'Thriller'], 'overview': 'After fighting his way through an apartment building populated by an army of dangerous criminals and escaping with his life, SWAT team member Rama goes undercover, joining a powerful Indonesian crime syndicate to protect his family and uncover corrupt members of his own force.', 'title': 'The Raid 2', 'weighted_rate': 6.7086887835703}, page_content='Title: The Raid 2. Overview: After fighting his way through an apartment building populated by an army of dangerous criminals and escaping with his life, SWAT team member Rama goes undercover, joining a powerful Indonesian crime syndicate to protect his family and uncover corrupt members of his own force. Genres: Action, Crime, Thriller. Rating: 6.7086887835703'),
 Document(metadata={'genres': ['Adventure', 'Action', 'Thriller'], 'overview': 'Moments from death a young man is rescued by a renowned warrior. Realizing unsurpassed physical potential in the young boy he