In [7]:
!pip install pinecone-client==2.2.2 openai==0.28.0 tiktoken==0.5.1 langchain==0.0.291



In [8]:
pip install pandas

Note: you may need to restart the kernel to use updated packages.


In [13]:
import pandas as pd

movies = pd.read_csv("IMDB.csv")
movies  = movies.rename(columns={"primaryTitle": "movie_title",
                                "Description": "movie_description"})
movies["source"] = "https://www.imdb.com/title/" + movies["tconst"]
movies = movies.loc[
    movies["titleType"] == "movie",
    ["movie_title", "movie_description", "source", "genres"]
]
movies

movies.head()

Unnamed: 0,movie_title,movie_description,source,genres
0,The Silence of the Lambs,"Jodie Foster stars as Clarice Starling, a top ...",https://www.imdb.com/title/tt0102926,"Crime,Drama,Thriller"
1,Terminator 2: Judgment Day,"In this sequel set eleven years after ""The Ter...",https://www.imdb.com/title/tt0103064,"Action,Sci-Fi"
2,The Lion King,This Disney animated feature follows the adven...,https://www.imdb.com/title/tt0110357,"Adventure,Animation,Drama"
3,Pulp Fiction,Vincent Vega (John Travolta) and Jules Winnfie...,https://www.imdb.com/title/tt0110912,"Crime,Drama"
4,The Shawshank Redemption,Andy Dufresne (Tim Robbins) is sentenced to tw...,https://www.imdb.com/title/tt0111161,Drama


In [14]:
from langchain.document_loaders import DataFrameLoader 

movies ["page_content"] =  ("Title: " + movies["movie_title"] + "\n" + \
                           "Genre: " + movies["genres"] + "\n" + \
                           "Description: " + movies["movie_description"])
movies = movies[["page_content", "source"]]

docs = DataFrameLoader(movies, page_content_column="page_content",).load()

docs [:3]

[Document(page_content="Title: The Silence of the Lambs\nGenre: Crime,Drama,Thriller\nDescription: Jodie Foster stars as Clarice Starling, a top student at the FBI's training academy. Jack Crawford (Scott Glenn) wants Clarice to interview Dr. Hannibal Lecter (Anthony Hopkins), a brilliant psychiatrist who is also a violent psychopath, serving life behind bars for various acts of murder and cannibalism. Crawford believes that Lecter may have insight into a case and that Starling, as an attractive young woman, may be just the bait to draw him out.", metadata={'source': 'https://www.imdb.com/title/tt0102926'}),
 Document(page_content='Title: Terminator 2: Judgment Day\nGenre: Action,Sci-Fi\nDescription: In this sequel set eleven years after "The Terminator," young John Connor (Edward Furlong), the key to civilization\'s victory over a future robot uprising, is the target of the shape-shifting T-1000 (Robert Patrick), a Terminator sent from the future to kill him. Another Terminator, the r

In [15]:
import tiktoken

encoder = tiktoken.get_encoding("cl100k_base")

tokens_per_docs = [len(encoder.encode(doc.page_content)) for doc in docs]

tokens_per_docs

total_tokens = sum(tokens_per_docs)
cost_per_1000_tokens = 0.0001
cost = (total_tokens/1000) * cost_per_1000_tokens
cost

0.0374556

In [16]:
import os
import pinecone

pinecone.init(
    api_key= os.getenv("PINECONE_API_KEY"),
    environment = "gcp-starter",
)

print(pinecone.list_indexes())

index_name = "imdb-movies"

# First check that the given index does not exist yet
if index_name not in pinecone.list_indexes():
    # Create the 'imbd-movies' index if it does not exist
    pinecone.create_index(
        name = index_name,
        metric = "cosine",
        dimension = 1536,
    )

['imdb-movies']


In [19]:
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Pinecone
from pinecone.index import Index

embeddings = OpenAIEmbeddings()

index = Index(index_name)

if index.describe_index_stats()['total_vector_count'] > 0:
    # If there is, use from_existing_index to use the vector store
    docsearch = Pinecone.from_existing_index(
        index_name, 
        embeddings)
else:
    # If there is not, use from_documents to fill the vector store
    docsearch = Pinecone.from_documents(
        docs, 
        embeddings,
        index_name = index_name
    )

question = "What's a good movie about an epic viking?"
    
# Use the vector database as a retriever and get the relevant documents for a quesiton
docsearch.as_retriever().get_relevant_documents(question)

[Document(page_content="Title: Thor: Ragnarok\nGenre: Action,Adventure,Comedy\nDescription: Imprisoned on the other side of the universe, the mighty Thor finds himself in a deadly gladiatorial contest that pits him against the Hulk, his former ally and fellow Avenger. Thor's quest for survival leads him in a race against time to prevent the all-powerful Hela from destroying his home world and the Asgardian civilization.", metadata={'source': 'https://www.imdb.com/title/tt3501632'}),
 Document(page_content='Title: Troy\nGenre: Drama\nDescription: Based on Homer\'s "Iliad," this epic portrays the battle between the ancient kingdoms of Troy and Sparta. While visiting Spartan King Menelaus (Brendan Gleeson), Trojan prince Paris (Orlando Bloom) falls for Menelaus\' wife, Helen (Diane Kruger), and takes her back to Troy. Menelaus\' brother, King Agamemnon (Brian Cox), having already defeated every army in Greece, uses his brother\'s fury as a pretext to declare war against Troy, the last kin

In [22]:
# Import PromptTemplate
from langchain.prompts import PromptTemplate

# Read/adapt the prompts below at will
DOCUMENT_PROMPT = """{page_content}
IMDB link: {source}
========="""

QUESTION_PROMPT = """Given the following extracted parts of a movie database and a question, create a final answer with the IMDB link as source ("SOURCE").
If you don't know the answer, just say that you don't know. Don't try to make up an answer.
ALWAYS return a "SOURCE" part in your answer.

QUESTION: What's a good movie about a robot to watch with my kid?
=========
Title: A.I. Artificial Intelligence
Genre: Drama,Sci-Fi
Description: A robotic boy, the first programmed to love, David (Haley Joel Osment) is adopted as a test case by a Cybertronics employee (Sam Robards) and his wife (Frances O'Connor). Though he gradually becomes their child, a series of unexpected circumstances make this life impossible for David. Without final acceptance by humans or machines, David embarks on a journey to discover where he truly belongs, uncovering a world in which the line between robot and machine is both vast and profoundly thin.
IMDB link: https://www.imdb.com/title/tt0212720
=========
Title: I, Robot
Genre: Action,Mystery,Sci-Fi
Description: In 2035, highly intelligent robots fill public service positions throughout the world, operating under three rules to keep humans safe. Despite his dark history with robotics, Detective Del Spooner (Will Smith) investigates the alleged suicide of U.S. Robotics founder Alfred Lanning (James Cromwell) and believes that a human-like robot (Alan Tudyk) murdered him. With the help of a robot expert (Bridget Moynahan), Spooner discovers a conspiracy that may enslave the human race.
IMDB link: https://www.imdb.com/title/tt0343818
=========
Title: The Iron Giant
Genre: Action,Adventure,Animation
Description: In this animated adaptation of Ted Hughes' Cold War fable, a giant alien robot (Vin Diesel) crash-lands near the small town of Rockwell, Maine, in 1957. Exploring the area, a local 9-year-old boy, Hogarth, discovers the robot, and soon forms an unlikely friendship with him. When a paranoid government agent, Kent Mansley, becomes determined to destroy the robot, Hogarth and beatnik Dean McCoppin (Harry Connick Jr.) must do what they can to save the misunderstood machine.
IMDB link: https://www.imdb.com/title/tt0129167
=========
FINAL ANSWER: 'The Iron Giant' is an animated movie about a friendship between a robot and a kid. It would be a good movie to watch with a kid.
SOURCE: https://www.imdb.com/title/tt0129167

QUESTION: {question}
=========
{summaries}
FINAL ANSWER:"""

# Create prompt template objects
document_prompt = PromptTemplate.from_template(DOCUMENT_PROMPT)
question_prompt = PromptTemplate.from_template(QUESTION_PROMPT)

In [23]:
# Import RetrievalQAWithSourcesChain and ChatOpenAI
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.chat_models import ChatOpenAI

# Create the QA bot LLM chain
qa_with_sources = RetrievalQAWithSourcesChain.from_chain_type(
    chain_type = "stuff",
    llm = ChatOpenAI(model_name = "gpt-3.5-turbo", temperature = 0),
    chain_type_kwargs = {
        "prompt": question_prompt,
        "document_prompt": document_prompt,
    },
    retriever = docsearch.as_retriever(),
)

# Ask the LLM a question about movies
qa_with_sources(question)

{'question': "What's a good movie about an epic viking?",
 'answer': "'Troy' is a good movie about an epic viking. It portrays the battle between the ancient kingdoms of Troy and Sparta. \n",
 'sources': 'https://www.imdb.com/title/tt0332452'}

In [24]:
# Import langchain
import langchain


# Enable debug logging
langchain.debug = True

# Ask the LLM a question about movies
qa_with_sources(question)

[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQAWithSourcesChain] Entering Chain run with input:
[0m{
  "question": "What's a good movie about an epic viking?"
}
[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQAWithSourcesChain > 3:chain:StuffDocumentsChain] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQAWithSourcesChain > 3:chain:StuffDocumentsChain > 4:chain:LLMChain] Entering Chain run with input:
[0m{
  "question": "What's a good movie about an epic viking?",
}
[32;1m[1;3m[llm/start][0m [1m[1:chain:RetrievalQAWithSourcesChain > 3:chain:StuffDocumentsChain > 4:chain:LLMChain > 5:llm:ChatOpenAI] Entering LLM run with input:
[0m{
  "prompts": [
  ]
}
[36;1m[1;3m[llm/end][0m [1m[1:chain:RetrievalQAWithSourcesChain > 3:chain:StuffDocumentsChain > 4:chain:LLMChain > 5:llm:ChatOpenAI] [1.92s] Exiting LLM run with output:
[0m{
  "generations": [
    [
      {
        "text": "'Troy' is a good movie about an epic v

{'question': "What's a good movie about an epic viking?",
 'answer': "'Troy' is a good movie about an epic viking. It portrays the battle between the ancient kingdoms of Troy and Sparta. \n",
 'sources': 'https://www.imdb.com/title/tt0332452'}