In [1]:
import os
import pandas as pd
from typing import List, Dict
from warnings import simplefilter
from langchain import hub
from langchain_community.llms import HuggingFaceEndpoint
from langchain.prompts import PromptTemplate
from langchain_community.document_loaders import CSVLoader
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma, LanceDB
from langchain_text_splitters import (
    RecursiveCharacterTextSplitter,
)
from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings
from langchain.chains import RetrievalQA
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor
from dotenv import load_dotenv
from langchain.retrievers import BM25Retriever, EnsembleRetriever
from langchain.prompts import ChatPromptTemplate
from langchain.output_parsers import ResponseSchema
from langchain.output_parsers import StructuredOutputParser
from langchain.schema.output_parser import StrOutputParser
from langchain.schema.runnable import RunnablePassthrough
from operator import itemgetter
from tqdm import tqdm

In [4]:
df = pd.read_csv('Input_data/filtered_data.csv')
df.head()

Unnamed: 0,Release Year,Title,Origin/Ethnicity,Director,Cast,Genre,Wiki Page,Plot
0,2010,127 Hours,American,Danny Boyle,"James Franco, Amber Tamblyn, Kate Mara, Clémen...","biography, drama",https://en.wikipedia.org/wiki/127_Hours,Mountaineer Aron Ralston goes hiking at Utah's...
1,2010,The A-Team,American,Joe Carnahan,"Liam Neeson, Bradley Cooper, Quinton ""Rampage""...","action, adventure",https://en.wikipedia.org/wiki/The_A-Team_(film),"John ""Hannibal"" Smith is held captive in Mexic..."
2,2010,A Little Help,American,Michael J. Weithorn,"Jenna Fischer, Chris O'Donnell, Rob Benedict, ...",comedy,https://en.wikipedia.org/wiki/A_Little_Help,Laura Pehlke (Jenna Fischer) is married to Bob...
3,2010,Adventures of Power,American,Ari Gold,"Ari Gold, Michael McKean, Jane Lynch, Shoshann...",comedy,https://en.wikipedia.org/wiki/Adventures_of_Power,The story takes place in the fictional small t...
4,2010,Alice in Wonderland,American,Tim Burton,"Johnny Depp, Anne Hathaway, Helena Bonham Cart...","family, fantasy",https://en.wikipedia.org/wiki/Alice_in_Wonderl...,Troubled by a strange recurring dream and mour...


In [6]:
df = df[df["Release Year"] == 2017]
len(df)

213

In [7]:
df.to_csv('./Input_data/test_data.csv', index=False)

In [8]:
from dotenv import load_dotenv

load_dotenv()

os.environ["HUGGINGFACEHUB_API_TOKEN"] = os.getenv('HF_TOKEN')
os.environ["OPENAI_API_KEY"] = os.getenv('OPENAI_API_KEY')

In [1]:
from langchain_community.document_loaders import CSVLoader
loader = CSVLoader(file_path='./Input_data/test_data.csv')
data = loader.load()

In [2]:
print(data[28].page_content)

Release Year: 2017
Title: Rock Dog
Origin/Ethnicity: American
Director: Ash Brannon
Cast: Ash Brannon (director/screenplay); Kurt Voelker (screenplay); Luke Wilson, Eddie Izzard, J. K. Simmons, Lewis Black, Kenan Thompson, Mae Whitman, Jorge Garcia, Matt Dillon, Sam Elliott
Genre: animation, comedy
Wiki Page: https://en.wikipedia.org/wiki/Rock_Dog
Plot: Bodi (Luke Wilson) is a young Tibetan Mastiff who is expected to be the next guard of the village of Snow Mountain, succeeding his father Khampa (J. K. Simmons) after the latter has driven out a pack of gangster grey wolves led by the villainous Linnux (Lewis Black) years ago, but is sure they will return. Khampa has some of the local sheep dressed up as Mastiffs to give the illusion the village is being guarded by multiple Mastiffs to keep the wolves at bay, but Bodi has trouble perfecting his father's signature move the Iron Paw which projects a powerful blast that as Khampa states can only happen if Bodi "finds the fire." Khampa has 

In [12]:
chunk_size = 1000
chunk_overlap = 100
top_k = 2
embed_fn = SentenceTransformerEmbeddings(model_name="all-minilm-l6-v2")

  warn_deprecated(
  from tqdm.autonotebook import tqdm, trange
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


In [13]:
persist_directory = 'db'

In [16]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
docs = text_splitter.split_documents(data)
bm25_retriever = BM25Retriever.from_documents(docs)
bm25_retriever.k = top_k

vectorstore = Chroma.from_documents(docs, embed_fn, persist_directory = persist_directory)
chroma_retriever = vectorstore.as_retriever(search_kwargs={"k": 3})

ensemble_retriever = EnsembleRetriever(retrievers=[bm25_retriever, chroma_retriever], weights=[0.75, 0.25])

In [21]:
repo_id = 'mistralai/Mistral-7B-Instruct-v0.2'

In [18]:
template = """Based on the provided context, list the movies that match the query. If no relevant movies are found, respond with 'No Movies Found'. Provide the titles in the format: Movie_Title (Release Year). Do not provide any extra details regarding context or answer. Do not add any explanation for your answer.
  ### CONTEXT
  {context}
  ### QUESTION
  Question: {question}
  ### ANSWER
  Answer:
  """

prompt = ChatPromptTemplate.from_template(template)

In [19]:
def create_qa_chain(retriever):
  primary_qa_llm = HuggingFaceEndpoint(
    repo_id=repo_id, max_length=2048, temperature=0.5, huggingfacehub_api_token=os.getenv("HUGGINGFACEHUB_API_TOKEN")
)
  created_qa_chain = (
    {"context": itemgetter("question") | retriever,
     "question": itemgetter("question")
    }
    | RunnablePassthrough.assign(
        context=itemgetter("context")
      )
    | {
         "response": prompt | primary_qa_llm,
         "context": itemgetter("context"),
      }
  )

  return created_qa_chain

In [22]:
ensemble_chain = create_qa_chain(ensemble_retriever)

  warn_deprecated(
                    max_length was transferred to model_kwargs.
                    Please make sure that max_length is what you intended.


The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to C:\Users\Dhruv\.cache\huggingface\token
Login successful


In [27]:
ensemble_chain.invoke({"question" : "Which movies involves a young Tibetan Mastiff who is expected to be the next guard of the village of Snow Mountain?"})

{'response': '1. Rock_Dog (2016)',
 'context': [Document(metadata={'source': './Input_data/test_data.csv', 'row': 28}, page_content='Genre: animation, comedy\nWiki Page: https://en.wikipedia.org/wiki/Rock_Dog\nPlot: Bodi (Luke Wilson) is a young Tibetan Mastiff who is expected to be the next guard of the village of Snow Mountain, succeeding his father Khampa (J. K. Simmons) after the latter has driven out a pack of gangster grey wolves led by the villainous Linnux (Lewis Black) years ago, but is sure they will return. Khampa has some of the local sheep dressed up as Mastiffs to give the illusion the village is being guarded by multiple Mastiffs to keep the wolves at bay, but Bodi has trouble perfecting his father\'s signature move the Iron Paw which projects a powerful blast that as Khampa states can only happen if Bodi "finds the fire." Khampa has also forbidden music in the village since Bodi got distracted from his duties when he was younger.'),
  Document(metadata={'source': './Inp

In [28]:
persisted_vectordb = Chroma(persist_directory=persist_directory, embedding_function=embed_fn)
persisted_chroma_retriever = persisted_vectordb.as_retriever(search_kwargs={"k": 3})

persisted_ensemble_retriever = EnsembleRetriever(retrievers=[bm25_retriever, chroma_retriever], weights=[0.75, 0.25])

  warn_deprecated(


In [29]:
persisted_ensemble_chain = create_qa_chain(persisted_ensemble_retriever)

                    max_length was transferred to model_kwargs.
                    Please make sure that max_length is what you intended.


The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to C:\Users\Dhruv\.cache\huggingface\token
Login successful


In [30]:
persisted_ensemble_chain.invoke({"question" : "Which movies involves a young Tibetan Mastiff who is expected to be the next guard of the village of Snow Mountain?"})

{'response': '1. Rock_Dog (2016)',
 'context': [Document(metadata={'source': './Input_data/test_data.csv', 'row': 28}, page_content='Genre: animation, comedy\nWiki Page: https://en.wikipedia.org/wiki/Rock_Dog\nPlot: Bodi (Luke Wilson) is a young Tibetan Mastiff who is expected to be the next guard of the village of Snow Mountain, succeeding his father Khampa (J. K. Simmons) after the latter has driven out a pack of gangster grey wolves led by the villainous Linnux (Lewis Black) years ago, but is sure they will return. Khampa has some of the local sheep dressed up as Mastiffs to give the illusion the village is being guarded by multiple Mastiffs to keep the wolves at bay, but Bodi has trouble perfecting his father\'s signature move the Iron Paw which projects a powerful blast that as Khampa states can only happen if Bodi "finds the fire." Khampa has also forbidden music in the village since Bodi got distracted from his duties when he was younger.'),
  Document(metadata={'source': './Inp

In [31]:
bm25_file_path = 'bm25_index.pkl'

In [32]:
import pickle 
def save_bm25_index(bm25_retriever, file_path):
    with open(file_path, 'wb') as f:
        pickle.dump(bm25_retriever, f)

def load_bm25_index(file_path):
    with open(file_path, 'rb') as f:
        return pickle.load(f)

In [33]:
save_bm25_index(bm25_retriever, bm25_file_path)

In [2]:
import streamlit as st
from functions import *
from constants import *
from langchain.retrievers import EnsembleRetriever

# Load the model and create the ensemble chain
def load_model():
    embed_fn = load_local_embedding_model(model_directory)
    vectordb = Chroma(persist_directory=persist_directory, embedding_function=embed_fn)
    chroma_retriever = vectordb.as_retriever(search_kwargs={"k": 3})
    bm25_retriever = load_bm25_index(bm25_file_path)
    ensemble_retriever = EnsembleRetriever(retrievers=[bm25_retriever, chroma_retriever], weights=[0.75, 0.25])
    return create_qa_chain(ensemble_retriever)

  from tqdm.autonotebook import tqdm, trange


In [3]:
chain_retreiver = load_model()

  warn_deprecated(
  warn_deprecated(
  warn_deprecated(
                    max_length was transferred to model_kwargs.
                    Please make sure that max_length is what you intended.


In [5]:
chain_retreiver.invoke({"question" : "Which movies involves a young Tibetan Mastiff who is expected to be the next guard of the village of Snow Mountain?"})

{'response': '1. Rock Dog (2016)',
 'context': [Document(metadata={'source': 'c:\\Users\\Dhruv\\OneDrive\\Desktop\\Git Repos\\Movie_Recommandation_App_using_RAG\\Input_data\\test_data.csv', 'row': 28}, page_content='Genre: animation, comedy\nWiki Page: https://en.wikipedia.org/wiki/Rock_Dog\nPlot: Bodi (Luke Wilson) is a young Tibetan Mastiff who is expected to be the next guard of the village of Snow Mountain, succeeding his father Khampa (J. K. Simmons) after the latter has driven out a pack of gangster grey wolves led by the villainous Linnux (Lewis Black) years ago, but is sure they will return. Khampa has some of the local sheep dressed up as Mastiffs to give the illusion the village is being guarded by multiple Mastiffs to keep the wolves at bay, but Bodi has trouble perfecting his father\'s signature move the Iron Paw which projects a powerful blast that as Khampa states can only happen if Bodi "finds the fire." Khampa has also forbidden music in the village since Bodi got distr