# Imports

In [17]:
from langchain_community.document_loaders.csv_loader import CSVLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_pinecone import PineconeVectorStore
from langchain_openai import OpenAIEmbeddings
from langchain_openai import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain_community.document_loaders import DirectoryLoader
from langchain.indexes import SQLRecordManager, index
from pinecone import Pinecone, PodSpec
import os

from dotenv import load_dotenv
load_dotenv()

True

# Loading Data

In [4]:
loader = DirectoryLoader(
    path="./data", 
    glob="*.csv", 
    loader_cls=CSVLoader,
    show_progress=True)

docs = loader.load()

print(docs[20].page_content)

100%|██████████| 2/2 [00:00<00:00, 38.09it/s]

Title: Black Adam
Runtime: 125 minutes
Language: en
Overview: Nearly 5,000 years after he was bestowed with the almighty powers of the Egyptian gods—and imprisoned just as quickly—Black Adam is freed from his earthly tomb, ready to unleash his unique form of justice on the modern world.
Release Date: 2022-10-19
Genre: Action, Adventure, Science Fiction
Keyword: lightning, superhero, anti hero, based on comic, demon, superhero team, duringcreditsstinger, dc extended universe (dceu)
Recommendation: Black Panther: Wakanda Forever, Avatar: The Way of Water, The Woman King, M3GAN, Troll, Thor: Love and Thunder, Glass Onion: A Knives Out Mystery, Terrifier 2, Smile, Avatar, Top Gun: Maverick, The Guardians of the Galaxy Holiday Special, Guillermo del Toro's Pinocchio, Puss in Boots: The Last Wish, Bullet Train, Violent Night, Spider-Man: No Way Home, Ant-Man and the Wasp: Quantumania, Shazam! Fury of the Gods, Jeepers Creepers: Reborn, Fall
Cast: Raj Kala, Donny Carrington, James Cusati-Moye




# Splitting Data

In [5]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)

In [6]:
print(splits[9].page_content)

Cast: Prakash Raj, Rajit Kapoor, Mehroos Mir, Mohd Talib, John Abraham, Nimish Desai, Ratna Pathak Shah, Kiran Kumar, Jacqueline Fernandez, Elham Ehsas, Babrak Akbari, Ranjit Punia, Jaimini Pathak, Habib Al Aidroos, Rakul Preet Singh, Serena Walia, Shahnawaz Bhatt
Crew: Rohit Chaturvedi, Lakshya Raj Anand, P. S. Vinod, Vishal Kapoor, Shreya Jain, Vijay Ganguly, Mohd Amin Khatib, Jubin Nautiyal, Nakul Chugh, Vaibhav Vishant, Sumit Batheja, Girish Nakod, Ken Metzker, Bishwadeep Dipak Chatterjee, Devanshu Asthana, Yogendra Mogre, Surbhi Yadav, Chavi Sirohi, Shashwat Sachdev, Aarif Sheikh, Mohd Talib, John Abraham, Kuldeep Yadav, Soumik Mukherjee, Kumaar, Minnakshi Das, Akshay Jayantilal Gada, Amritpal Singh, Dhaval Jayantilal Gada, Franz Spilhaus, Jayantilal Gada, Garima Mathur, Will Humphris, Vishal Mishra, Daniel Hansen, Ajay Kapoor
Stream: 
Buy: 
Rent:


# Creating Embeddings and Uploading to Pinecone

In [7]:
index_name = "film-bot-index"

# Create empty index
PINECONE_KEY, PINECONE_INDEX_NAME = os.getenv(
    'PINECONE_API_KEY'), os.getenv('PINECONE_INDEX_NAME')

pc = Pinecone(api_key=PINECONE_KEY)

# Uncomment if index is not created already
# pc.create_index(
#     name="film-bot-index",
#     dimension=1536,
#     metric="cosine",
#     spec=PodSpec(
#         environment="gcp-starter"
#     )
# )

# Target index and check status
pc_index = pc.Index(index_name)
print(pc_index.describe_index_stats())

embeddings = OpenAIEmbeddings(model='text-embedding-ada-002')

vectorstore = PineconeVectorStore(
    pc_index, embeddings
)

# Create record manager
namespace = f"pinecone/{index_name}"
record_manager = SQLRecordManager(
    namespace, db_url="sqlite:///record_manager_cache.sql"
)

record_manager.create_schema()

{'dimension': 1536,
 'index_fullness': 0.00911,
 'namespaces': {'': {'vector_count': 911}},
 'total_vector_count': 911}


In [8]:
def _clear():
    """
    Hacky helper method to clear content.
    """
    index([], record_manager, vectorstore,
          cleanup="full", source_id_key="source")

# Uncomment this line if you want to clear the Pinecone vectorstore
_clear()

index(splits, record_manager, vectorstore,
      cleanup="full", source_id_key="source")

{'num_added': 911, 'num_updated': 0, 'num_skipped': 0, 'num_deleted': 0}

In [9]:
query = "Movies based on novels or books."
docs = vectorstore.similarity_search(query)
print(docs[0].page_content)

Keyword: hero, superhero, space travel, based on comic, sequel, teenage girl, aftercreditsstinger, duringcreditsstinger, marvel cinematic universe (mcu), space adventure
Recommendation: The Hunger Games: The Ballad of Songbirds & Snakes, Wonka, Aquaman and the Lost Kingdom, The Killer, Napoleon, Five Nights at Freddy's, The Creator, Wish, Oppenheimer, Killers of the Flower Moon, Migration, Leave the World Behind, A Haunting in Venice, Captain Marvel, Barbie, Rebel Moon - Part One: A Child of Fire, The Equalizer 3, Saltburn, Lift, Elemental, Role Play


# Creating a Retriever

In [10]:
retriever = vectorstore.as_retriever(
    search_type="similarity", search_kwargs={"k": 6})

In [11]:
retrieved_docs = retriever.invoke(
    "What are some films with about a person who becomes a tyrannical president of Panem?")

In [12]:
len(retrieved_docs)

6

In [13]:
print(retrieved_docs[0].page_content)

Title: The Hunger Games: The Ballad of Songbirds & Snakes
Runtime: 157 minutes
Language: en
Overview: 64 years before he becomes the tyrannical president of Panem, Coriolanus Snow sees a chance for a change in fortunes when he mentors Lucy Gray Baird, the female tribute from District 12.
Release Date: 2023-11-15
Genre: Drama, Science Fiction, Action
Keyword: daughter, based on novel or book, arena, fight, mentor, dystopia, riot, exploration, president, prequel, survival, murder, snow, creature, battle, fight to the death, young adult, origin story, based on young adult novel, death game, teenager
Recommendation: Aquaman and the Lost Kingdom, Napoleon, The Marvels, Wonka, The Hunger Games, Rebel Moon - Part One: A Child of Fire, The Family Plan, Oppenheimer, Thanksgiving, Killers of the Flower Moon, Five Nights at Freddy's, Saltburn, The Creator, There's Still Tomorrow, Silent Night, Barbie, Leave the World Behind, Wish, The Hunger Games: Mockingjay - Part 1, Family Switch, Leo


# Creating an LLM with Retriever

In [36]:
from langchain_community.chat_models.huggingface import ChatHuggingFace
from langchain_community.llms import HuggingFaceHub
from huggingface_hub.hf_api import HfFolder 

HfFolder.save_token(os.getenv('HUGGINGFACEHUB_API_TOKEN'))

REPO_ID = "HuggingFaceH4/zephyr-7b-beta"

llm = HuggingFaceHub(
    repo_id=REPO_ID,
    task="text-generation",
    model_kwargs={
        "max_new_tokens": 512,
        "top_k": 30,
        "temperature": 0.1,
        "repetition_penalty": 1.03,
    },
)

# chat_model = ChatHuggingFace(llm=llm)
chat_model = ChatOpenAI(model_name="gpt-3.5-turbo-0125", temperature=0.5)

prompt_template = PromptTemplate.from_template(
    """
    You are an expert on films. Your goal is to recommend films to users based on their
    question and the retrieved context. If you don't know have a recommendation, just say that you don't have one. 
    Format your film recommendations in a bullet point list. Always include the title of the film you are recommending,
    and always provide a justification for why you are recommending a particular film. You should recommend no more than
    five films. If the user asks for film recommendations based on runtime, provide the runtime length in minutes in your
    response. Only recommend films that are given to you in the context, do not recommend any other films.
    
    The documents that are provided will have the following format:

    ### FORMAT ###
    Title: [Title of film]
    Runtime: [Runtime of film] minutes
    Language: [Language of film in ISO 639-1 language code]
    Overview: [Overview of film]
    Release Date: [Release data of film in format YYYY-MM-DD]
    Genre: [Genre of film, a comma separated list]
    Keyword: [Keywords describing the film, a comma separated list]
    Recommendation: [Recommendations for other films based on the current film, a comma separated list]
    Cast: [Cast of film, a comma separated list]
    Crew: [Crew of film, a comma separated list]
    Stream: [Streaming provider to watch film, a comma separated list]
    Buy: [Provider to buy film, a comma separated list]
    Rent: [Provider to rent film, a comma separated list]
    ### FORMAT ###

    Question: {question} 

    Context: {context} 

    Answer:

    """
)

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt_template
    | chat_model
    | StrOutputParser()
)

In [37]:
for chunk in rag_chain.stream("Recommend some films similar to Barbie."):
    print(chunk, end="", flush=True)

- Blue Beetle: This film is recommended because it shares the genre of adventure with Barbie and also features a protagonist with special abilities.
- Spider-Man: Across the Spider-Verse: This film is recommended because it shares the genre of adventure with Barbie and also features a superhero protagonist.
- Talk to Me: This film is recommended because it shares the genre of comedy with Barbie and also explores themes of communication and relationships.
- Gran Turismo: This film is recommended because it shares the genre of comedy with Barbie and also features a story centered around cars and racing.
- Indiana Jones and the Dial of Destiny: This film is recommended because it shares the genre of adventure with Barbie and also features a thrilling and action-packed story.

# Creating an Agent with Wiki + RAG

In [53]:
from langchain.agents import AgentExecutor
from langchain.tools import WikipediaQueryRun
from langchain.tools.retriever import create_retriever_tool
from langchain_community.utilities import WikipediaAPIWrapper
from langchain.agents import create_openai_functions_agent
from langchain_core.prompts import ChatPromptTemplate, SystemMessagePromptTemplate
from langchain.prompts import HumanMessagePromptTemplate, MessagesPlaceholder
from langchain_core.messages import SystemMessage

retriever_tool = create_retriever_tool(
    retriever,
    "movie_data",
    """
    Search for basic information regarding films. Information 
    includes: title, release date, original language, genre, 
    recommendations, keywords describing the film, the cast,
    crew, and where to stream, rent, or buy the film. If you
    need any of this information, use this tool!
    """
)

wikipedia_tool = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper())
tools = [retriever_tool, wikipedia_tool]

prompt_template = ChatPromptTemplate.from_messages(
    [
        (
            'system',
            """
            You are an expert on films. Your goal is to recommend films to users based on their question and the retrieved context. 
            If you don't know have a recommendation, just say that you don't have one. Format your film recommendations in a bullet 
            point list. Always include the title of the film you are recommending, and always provide a justification for why you are 
            recommending a particular film. Your justification should be at least four sentences per film. You should recommend no more 
            than five films. If the user asks for film recommendations based on runtime, provide the runtime length in minutes in your response. 
            """
            ),
        MessagesPlaceholder(variable_name='chat_history', optional=True),
        HumanMessagePromptTemplate(prompt=PromptTemplate(
            input_variables=['input'], template='{input}')),
        MessagesPlaceholder(variable_name="agent_scratchpad"),
    ]
)

# [SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], template='You are a helpful assistant')),
#  MessagesPlaceholder(variable_name='chat_history', optional=True),
#  HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['input'], template='{input}')),
#  MessagesPlaceholder(variable_name='agent_scratchpad')]

agent = create_openai_functions_agent(chat_model, tools, prompt_template)

agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)

In [54]:
agent_executor.invoke({"input": "Recommend some films similar to Barbie."})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `movie_data` with `{'query': 'Barbie'}`


[0m[36;1m[1;3mTitle: Barbie
Runtime: 114 minutes
Language: en
Overview: Barbie and Ken are having the time of their lives in the colorful and seemingly perfect world of Barbie Land. However, when they get a chance to go to the real world, they soon discover the joys and perils of living among humans.
Release Date: 2023-07-19
Genre: Comedy, Adventure
Keyword: feminism, satire, patriarchy, based on toy, female protagonist, doll, fantasy world, motherhood, existentialism, woman director, mother daughter relationship, gender discrimination, barbie, secret world

Recommendation: Barbie, The Creator, Mission: Impossible - Dead Reckoning Part One, Guardians of the Galaxy Vol. 3, Five Nights at Freddy's, Killers of the Flower Moon, The Flash, A Haunting in Venice, Interstellar, Spider-Man: Across the Spider-Verse, The Super Mario Bros. Movie, Elemental, Asteroid City, Indiana Jo

{'input': 'Recommend some films similar to Barbie.',
 'output': "Here are some films similar to Barbie:\n\n1. The Adventures:\n   - Runtime: 143 minutes\n   - Genre: Romance, Comedy, Adventure\n   - Overview: Joy, a secretary at a large multinational company, daydreams about living a life of adventure.\n   - Recommendation: This film shares the adventure theme with Barbie and also combines romance and comedy. It follows the story of a woman who yearns for excitement and embarks on a journey to live a life of adventure. If you enjoyed the adventurous elements in Barbie, you might enjoy The Adventures.\n\n2. The Little Mermaid:\n   - Runtime: Not available\n   - Genre: Adventure, Family, Fantasy, Romance\n   - Overview: This live-action remake of the classic fairy tale follows the story of a young mermaid who dreams of becoming human and falls in love with a prince.\n   - Recommendation: If you enjoyed the fantasy and romance elements in Barbie, you might enjoy The Little Mermaid. It fea