In [1]:
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import CharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_chroma import Chroma

### TextLoader  - Convert the raw text data of movies to a format langchain can work with.
### CharacterTextSplitter - Converting the whole document of movies's overview to meanningful chunks of data.
### HuggingFaceEmbeddings - Convert the chunks of data into document embeddings and do api call to HuggingFaceEmbedding model.
### Chroma - To store the embedded document into vector database.

In [2]:
from dotenv import load_dotenv

load_dotenv()

True

In [None]:
import pandas as pd

movies = pd.read_csv("data/movies_cleaned.csv")

In [4]:
movies

Unnamed: 0,Poster_Link,Series_Title,Released_Year,Runtime,Genre,IMDB_Rating,Overview,Director,Star1,Star2,Star3,Star4,No_of_Votes,Unique_id,tagged_description,Tagged_description
0,https://m.media-amazon.com/images/M/MV5BMDFkYT...,The Shawshank Redemption,1994,142 min,Drama,9.3,Two imprisoned men bond over a number of years...,Frank Darabont,Tim Robbins,Morgan Freeman,Bob Gunton,William Sadler,2343110,1,1 Two imprisoned men bond over a number of yea...,1 Two imprisoned men bond over a number of yea...
1,https://m.media-amazon.com/images/M/MV5BM2MyNj...,The Godfather,1972,175 min,"Crime, Drama",9.2,An organized crime dynasty's aging patriarch t...,Francis Ford Coppola,Marlon Brando,Al Pacino,James Caan,Diane Keaton,1620367,2,2 An organized crime dynasty's aging patriarch...,2 An organized crime dynasty's aging patriarch...
2,https://m.media-amazon.com/images/M/MV5BMTMxNT...,The Dark Knight,2008,152 min,"Action, Crime, Drama",9.0,When the menace known as the Joker wreaks havo...,Christopher Nolan,Christian Bale,Heath Ledger,Aaron Eckhart,Michael Caine,2303232,3,3 When the menace known as the Joker wreaks ha...,3 When the menace known as the Joker wreaks ha...
3,https://m.media-amazon.com/images/M/MV5BMWMwMG...,The Godfather: Part II,1974,202 min,"Crime, Drama",9.0,The early life and career of Vito Corleone in ...,Francis Ford Coppola,Al Pacino,Robert De Niro,Robert Duvall,Diane Keaton,1129952,4,4 The early life and career of Vito Corleone i...,4 The early life and career of Vito Corleone i...
4,https://m.media-amazon.com/images/M/MV5BMWU4N2...,12 Angry Men,1957,96 min,"Crime, Drama",9.0,A jury holdout attempts to prevent a miscarria...,Sidney Lumet,Henry Fonda,Lee J. Cobb,Martin Balsam,John Fiedler,689845,5,5 A jury holdout attempts to prevent a miscarr...,5 A jury holdout attempts to prevent a miscarr...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,https://m.media-amazon.com/images/M/MV5BNGEwMT...,Breakfast at Tiffany's,1961,115 min,"Comedy, Drama, Romance",7.6,A young New York socialite becomes interested ...,Blake Edwards,Audrey Hepburn,George Peppard,Patricia Neal,Buddy Ebsen,166544,996,996 A young New York socialite becomes interes...,996 A young New York socialite becomes interes...
996,https://m.media-amazon.com/images/M/MV5BODk3Yj...,Giant,1956,201 min,"Drama, Western",7.6,Sprawling epic covering the life of a Texas ca...,George Stevens,Elizabeth Taylor,Rock Hudson,James Dean,Carroll Baker,34075,997,997 Sprawling epic covering the life of a Texa...,997 Sprawling epic covering the life of a Texa...
997,https://m.media-amazon.com/images/M/MV5BM2U3Yz...,From Here to Eternity,1953,118 min,"Drama, Romance, War",7.6,"In Hawaii in 1941, a private is cruelly punish...",Fred Zinnemann,Burt Lancaster,Montgomery Clift,Deborah Kerr,Donna Reed,43374,998,"998 In Hawaii in 1941, a private is cruelly pu...","998 In Hawaii in 1941, a private is cruelly pu..."
998,https://m.media-amazon.com/images/M/MV5BZTBmMj...,Lifeboat,1944,97 min,"Drama, War",7.6,Several survivors of a torpedoed merchant ship...,Alfred Hitchcock,Tallulah Bankhead,John Hodiak,Walter Slezak,William Bendix,26471,999,999 Several survivors of a torpedoed merchant ...,999 Several survivors of a torpedoed merchant ...


In [5]:
# Seperate tagged_description into textfile
# Create document file with raw chunks of document data

movies['tagged_description'].to_csv('tagged_description.txt',
                                    sep='\n',
                                    index=False,
                                    header=False)

In [6]:
raw_documents = TextLoader('tagged_description.txt').load()
text_splitter = CharacterTextSplitter(chunk_size=0, chunk_overlap=0, separator="\n")
documents = text_splitter.split_documents(raw_documents)

Created a chunk of size 120, which is longer than the specified 0
Created a chunk of size 112, which is longer than the specified 0
Created a chunk of size 191, which is longer than the specified 0
Created a chunk of size 166, which is longer than the specified 0
Created a chunk of size 115, which is longer than the specified 0
Created a chunk of size 148, which is longer than the specified 0
Created a chunk of size 145, which is longer than the specified 0
Created a chunk of size 180, which is longer than the specified 0
Created a chunk of size 154, which is longer than the specified 0
Created a chunk of size 136, which is longer than the specified 0
Created a chunk of size 155, which is longer than the specified 0
Created a chunk of size 238, which is longer than the specified 0
Created a chunk of size 141, which is longer than the specified 0
Created a chunk of size 181, which is longer than the specified 0
Created a chunk of size 191, which is longer than the specified 0
Created a 

In [7]:
documents[0]

Document(metadata={'source': 'tagged_description.txt'}, page_content='1 Two imprisoned men bond over a number of years, finding solace and eventual redemption through acts of common decency.')

In [8]:
# Build Vector Search

embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
db_movies = Chroma.from_documents(documents, embedding=embedding_model)

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [9]:
query = "A crime drama movie"
docs = db_movies.similarity_search(query, k=10)
docs

[Document(id='a3c7f30f-1282-4b8b-9ac0-c26a1743773b', metadata={'source': 'tagged_description.txt'}, page_content='815 This Hong Kong-set crime drama follows the lives of a hitman, hoping to get out of the business, and his elusive female partner.'),
 Document(id='efccaaab-0ca8-405c-97af-ba286b44a3db', metadata={'source': 'tagged_description.txt'}, page_content='521 In Detroit, a lonely pop culture geek marries a call girl, steals cocaine from her pimp, and tries to sell it in Hollywood. Meanwhile, the owners of the cocaine, the Mob, track them down in an attempt to reclaim it.'),
 Document(id='dbe706c3-7031-42e6-8446-311bc7e26572', metadata={'source': 'tagged_description.txt'}, page_content='603 A secret agent exacts revenge on a serial killer through a series of captures and releases.'),
 Document(id='cc4c887d-4ed2-41e5-a9ef-60e61f77a396', metadata={'source': 'tagged_description.txt'}, page_content='92 A story of love between a mentally-ill father who was wrongly accused of murder and

In [10]:
movies[movies["Unique_id"] == int(docs[0].page_content.split()[0].strip())]

Unnamed: 0,Poster_Link,Series_Title,Released_Year,Runtime,Genre,IMDB_Rating,Overview,Director,Star1,Star2,Star3,Star4,No_of_Votes,Unique_id,tagged_description,Tagged_description
814,https://m.media-amazon.com/images/M/MV5BZjM4NW...,Do lok tin si,1995,99 min,"Comedy, Crime, Drama",7.7,This Hong Kong-set crime drama follows the liv...,Kar-Wai Wong,Leon Lai,Michelle Reis,Takeshi Kaneshiro,Charlie Yeung,26429,815,815 This Hong Kong-set crime drama follows the...,815 This Hong Kong-set crime drama follows the...


In [11]:
def retrieve_sementic_recommendations(
    query: str,
    top_k: int = 10,
) -> pd.DataFrame:
    recs = db_movies.similarity_search(query, k=50)
    
    movies_list = []
    
    for i in range(0, len(recs)):
        movies_list += [int(recs[i].page_content.strip('"').split()[0])]
        
    return movies[movies["Unique_id"].isin(movies_list)].head(top_k)

In [12]:
retrieve_sementic_recommendations("A movie about crime drama")

Unnamed: 0,Poster_Link,Series_Title,Released_Year,Runtime,Genre,IMDB_Rating,Overview,Director,Star1,Star2,Star3,Star4,No_of_Votes,Unique_id,tagged_description,Tagged_description
17,https://m.media-amazon.com/images/M/MV5BZjA0OW...,One Flew Over the Cuckoo's Nest,1975,133 min,Drama,8.7,A criminal pleads insanity and is admitted to ...,Milos Forman,Jack Nicholson,Louise Fletcher,Michael Berryman,Peter Brocco,918088,18,18 A criminal pleads insanity and is admitted ...,18 A criminal pleads insanity and is admitted ...
73,https://m.media-amazon.com/images/M/MV5BZWFlYm...,The Shining,1980,146 min,"Drama, Horror",8.4,A family heads to an isolated hotel for the wi...,Stanley Kubrick,Jack Nicholson,Shelley Duvall,Danny Lloyd,Scatman Crothers,898237,74,74 A family heads to an isolated hotel for the...,74 A family heads to an isolated hotel for the...
87,https://m.media-amazon.com/images/M/MV5BYmY3Mz...,Drishyam,2013,160 min,"Crime, Drama, Thriller",8.3,A man goes to extreme lengths to save his fami...,Jeethu Joseph,Mohanlal,Meena,Asha Sharath,Ansiba,30722,88,88 A man goes to extreme lengths to save his f...,88 A man goes to extreme lengths to save his f...
91,https://m.media-amazon.com/images/M/MV5BOGE3N2...,Miracle in cell NO.7,2019,132 min,Drama,8.3,A story of love between a mentally-ill father ...,Mehmet Ada Öztekin,Aras Bulut Iynemli,Nisa Sofiya Aksongur,Deniz Baysal,Celile Toyon Uysal,33935,92,92 A story of love between a mentally-ill fath...,92 A story of love between a mentally-ill fath...
119,https://m.media-amazon.com/images/M/MV5BYTE4OD...,Vertigo,1958,128 min,"Mystery, Romance, Thriller",8.3,A former police detective juggles wrestling wi...,Alfred Hitchcock,James Stewart,Kim Novak,Barbara Bel Geddes,Tom Helmore,364368,120,120 A former police detective juggles wrestlin...,120 A former police detective juggles wrestlin...
136,https://m.media-amazon.com/images/M/MV5BYmJhZm...,Drishyam,2015,163 min,"Crime, Drama, Mystery",8.2,Desperate measures are taken by a man who trie...,Nishikant Kamat,Ajay Devgn,Shriya Saran,Tabu,Rajat Kapoor,70367,137,137 Desperate measures are taken by a man who ...,137 Desperate measures are taken by a man who ...
162,https://m.media-amazon.com/images/M/MV5BMDQ2Yz...,L.A. Confidential,1997,138 min,"Crime, Drama, Mystery",8.2,"As corruption grows in 1950s Los Angeles, thre...",Curtis Hanson,Kevin Spacey,Russell Crowe,Guy Pearce,Kim Basinger,531967,163,"163 As corruption grows in 1950s Los Angeles, ...","163 As corruption grows in 1950s Los Angeles, ..."
177,https://m.media-amazon.com/images/M/MV5BOGZiM2...,Sholay,1975,204 min,"Action, Adventure, Comedy",8.2,After his family is murdered by a notorious an...,Ramesh Sippy,Sanjeev Kumar,Dharmendra,Amitabh Bachchan,Amjad Khan,51284,178,178 After his family is murdered by a notoriou...,178 After his family is murdered by a notoriou...
261,https://m.media-amazon.com/images/M/MV5BMGQ5Mz...,Chung Hing sam lam,1994,102 min,"Comedy, Crime, Drama",8.1,Two melancholy Hong Kong policemen fall in lov...,Kar-Wai Wong,Brigitte Lin,Takeshi Kaneshiro,Tony Chiu-Wai Leung,Faye Wong,63122,262,262 Two melancholy Hong Kong policemen fall in...,262 Two melancholy Hong Kong policemen fall in...
283,https://m.media-amazon.com/images/M/MV5BOGMwYm...,Chinatown,1974,130 min,"Drama, Mystery, Thriller",8.1,A private detective hired to expose an adulter...,Roman Polanski,Jack Nicholson,Faye Dunaway,John Huston,Perry Lopez,294230,284,284 A private detective hired to expose an adu...,284 A private detective hired to expose an adu...
