In [1]:
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import CharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_chroma import Chroma

In [2]:
from dotenv import load_dotenv

load_dotenv()

True

In [5]:
import pandas as pd

movies = pd.read_csv("../data/movies_with_posters.csv")

In [6]:
movies["tagged_overview"]

0       238 Spanning the years 1945 to 1955, a chronic...
1       278 Framed in the 1940s for the double murder ...
2       240 In the continuing saga of the Corleone cri...
3       424 The true story of how businessman Oskar Sc...
4       19404 Raj is a rich, carefree, happy-go-lucky ...
                              ...                        
9001    12142 Edward Carnby is a private investigator ...
9002    5491 In the year 3000, man is no match for the...
9003    11059 Set on an island off the coast, a techno...
9004    14164 On his 18th birthday, Goku receives a my...
9005    40016 A platoon of eagles and vultures attacks...
Name: tagged_overview, Length: 9006, dtype: object

In [7]:
movies["tagged_overview"].to_csv("../data/tagged_overview.txt",
                                 sep="\n",
                                 index=False,
                                 header=False)

In [8]:
raw_documents = TextLoader("../data/tagged_overview.txt").load()
text_splitter = CharacterTextSplitter(chunk_size= 0, chunk_overlap= 0, separator= "\n")
documnets = text_splitter.split_documents(raw_documents)

Created a chunk of size 311, which is longer than the specified 0
Created a chunk of size 392, which is longer than the specified 0
Created a chunk of size 226, which is longer than the specified 0
Created a chunk of size 167, which is longer than the specified 0
Created a chunk of size 391, which is longer than the specified 0
Created a chunk of size 336, which is longer than the specified 0
Created a chunk of size 205, which is longer than the specified 0
Created a chunk of size 171, which is longer than the specified 0
Created a chunk of size 400, which is longer than the specified 0
Created a chunk of size 307, which is longer than the specified 0
Created a chunk of size 286, which is longer than the specified 0
Created a chunk of size 243, which is longer than the specified 0
Created a chunk of size 257, which is longer than the specified 0
Created a chunk of size 241, which is longer than the specified 0
Created a chunk of size 229, which is longer than the specified 0
Created a 

In [9]:
#Hugging Face Embedding model
embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

  embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")


In [11]:
#Chroma vector_db
db_movies = Chroma.from_documents(
    documents=documnets,
    embedding=embedding_model
)

In [12]:
query = "A movie about world war"

docs = db_movies.similarity_search(query, k = 10)
docs

[Document(id='08366a1e-f092-4b7b-8395-fec9ac04ed6f', metadata={'source': '../data/tagged_overview.txt'}, page_content='"11202 ""Patton"" tells the tale of General George S. Patton, famous tank commander of World War II. The film begins with patton\'s career in North Africa and progresses through the invasion of Germany and the fall of the Third Reich. Side plots also speak of Patton\'s numerous faults such his temper and habit towards insubordination."'),
 Document(id='e9d3d524-d3b4-478f-885d-84c89a25e953', metadata={'source': '../data/tagged_overview.txt'}, page_content='"11202 ""Patton"" tells the tale of General George S. Patton, famous tank commander of World War II. The film begins with patton\'s career in North Africa and progresses through the invasion of Germany and the fall of the Third Reich. Side plots also speak of Patton\'s numerous faults such his temper and habit towards insubordination."'),
 Document(id='2bfb69f3-0242-48ba-b22c-d09aabcc0327', metadata={'source': '../dat

In [14]:
movies[movies["id"] == int(docs[0].page_content.split()[0].strip('"'))]

Unnamed: 0.1,Unnamed: 0,id,original_title,overview,genre_ids,genre_names,tagged_overview,poster_url
1167,1167,11202,Patton,"""Patton"" tells the tale of General George S. P...","[10752, 18, 36]","['War', 'Drama', 'History']","11202 ""Patton"" tells the tale of General Georg...",https://image.tmdb.org/t/p/w500/rLM7jIEPTjj4CF...


In [15]:
def retrieve_semantic_recommendations(
        query: str,
        top_k: int = 10
) -> pd.DataFrame:
    recs = db_movies.similarity_search(query, k = 50)

    movie_list = []

    for i in range(0, len(recs)):
        movie_list += [int(recs[i].page_content.strip('"').split()[0])]

    return movies[movies["id"].isin(movie_list)].head(top_k)

In [16]:
retrieve_semantic_recommendations("A superhero movie")

Unnamed: 0.1,Unnamed: 0,id,original_title,overview,genre_ids,genre_names,tagged_overview,poster_url
171,171,755812,"Miraculous World : New York, les héros unis",Teen Parisian superheroes Ladybug and Chat Noi...,"[16, 10770, 14, 28]","['Animation', 'TV Movie', 'Fantasy', 'Action']",755812 Teen Parisian superheroes Ladybug and C...,https://image.tmdb.org/t/p/w500/9YbyvcrHmY2SVb...
1421,1421,487670,The Death of Superman,When a hulking monster arrives on Earth and be...,"[878, 16, 28, 18]","['Science Fiction', 'Animation', 'Action', 'Dr...",487670 When a hulking monster arrives on Earth...,https://image.tmdb.org/t/p/w500/y0uxSHaSFmt6Xa...
1549,1549,76589,Justice League: Doom,"An adaptation of Mark Waid's ""Tower of Babel"" ...","[28, 16, 878]","['Action', 'Animation', 'Science Fiction']","76589 An adaptation of Mark Waid's ""Tower of B...",https://image.tmdb.org/t/p/w500/seCbcjdZYUl8SR...
1785,1785,315635,Spider-Man: Homecoming,Following the events of Captain America: Civil...,"[28, 12, 878, 18]","['Action', 'Adventure', 'Science Fiction', 'Dr...",315635 Following the events of Captain America...,https://image.tmdb.org/t/p/w500/c24sv2weTHPsmD...
1798,1798,474395,Teen Titans Go! To the Movies,All the major DC superheroes are starring in t...,"[16, 28, 35, 878]","['Animation', 'Action', 'Comedy', 'Science Fic...",474395 All the major DC superheroes are starri...,https://image.tmdb.org/t/p/w500/mFHihhE9hlvJEk...
1884,1884,539681,DC League of Super-Pets,When Superman and the rest of the Justice Leag...,"[16, 28, 10751, 35, 878]","['Animation', 'Action', 'Family', 'Comedy', 'S...",539681 When Superman and the rest of the Justi...,https://image.tmdb.org/t/p/w500/qpPMewlugFaejX...
1950,1950,408220,Justice League Dark,When innocent civilians begin committing unthi...,"[16, 28, 14]","['Animation', 'Action', 'Fantasy']",408220 When innocent civilians begin committin...,https://image.tmdb.org/t/p/w500/gWcTaDFXDrOAPf...
2224,2224,211387,Marvel One-Shot: Agent Carter,The film takes place one year after the events...,"[28, 12, 878, 14]","['Action', 'Adventure', 'Science Fiction', 'Fa...",211387 The film takes place one year after the...,https://image.tmdb.org/t/p/w500/4vFKKWPvCVDJTO...
2347,2347,45162,Superman/Batman: Apocalypse,Batman discovers a mysterious teen-aged girl w...,"[878, 28, 12, 16]","['Science Fiction', 'Action', 'Adventure', 'An...",45162 Batman discovers a mysterious teen-aged ...,https://image.tmdb.org/t/p/w500/zYTm9Zjrf4uLL1...
2764,2764,618353,Batman: Death in the Family,Tragedy strikes the Batman's life again when R...,"[16, 28]","['Animation', 'Action']",618353 Tragedy strikes the Batman's life again...,https://image.tmdb.org/t/p/w500/k8Q9ulyRE8fkvZ...
