# Install Langchain

Langchain is a framework which makes working with LLM easier. It provides different apis to integrate with documents, vector stores, embedding models.

In [1]:
#!pip install openai==1.7.1

In [2]:
#!pip install langchain==0.1.0

In [3]:
#!pip install langchain_openai

In [4]:
import os
import openai

openai.api_key = os.getenv("open_ai_secret_key")

# Basic LLM prompting technique

In [5]:
from langchain_openai import ChatOpenAI
from langchain_core.messages import HumanMessage, SystemMessage

chat = ChatOpenAI(openai_api_key=os.getenv("open_ai_secret_key"))


query = """who is ramakant jadhav in sacred games and which actor portrays ramakant jadhav in sacred games"""

messages = [
    SystemMessage(content="You're a helpful assistant"),
    HumanMessage(content=query),
]
chat.invoke(messages)

AIMessage(content='In the TV series "Sacred Games," Ramakant Jadhav is a police constable and a close associate of Inspector Sartaj Singh. He is portrayed by the actor Rajshri Deshpande. Ramakant Jadhav plays a significant role in assisting Sartaj in his investigation throughout the series.')

# Loading knowledgebase Documents

In [6]:
from langchain_community.document_loaders import TextLoader

text_loader = TextLoader("sacred_games_script.txt")
knowedge_base_documents = text_loader.load()
knowedge_base_documents


[Document(page_content='Squid Game (Korean: 오징어 게임) is a South Korean television series created by Hwang Dong-hyuk for Netflix. Its cast includes Lee Jung-jae, Park Hae-soo, Wi Ha-joon, HoYeon Jung, O Yeong-su, Heo Sung-tae, Anupam Tripathi and Kim Joo-ryoung.\n\nThe series revolves around a secret contest where 456 players, all of whom are in deep financial hardship, risk their lives to play a series of deadly children\'s games for the chance to win a ₩45.6 billion prize. The series\' title draws from a similarly named Korean children\'s game. Hwang conceived the idea based on his own economic struggles, as well as the class disparity in South Korea and capitalism.[4][5] Though he wrote the story in 2009, Hwang could not find a production company to fund the idea until Netflix took an interest around 2019 as part of a drive to expand their foreign programming offerings.\n\nSquid Game was released worldwide on September 17, 2021, to critical acclaim and international attention. It beca

In [7]:

from langchain.text_splitter import CharacterTextSplitter


text_splitter_definition = CharacterTextSplitter(chunk_size=1100)

documents_after_splitting = text_splitter_definition.split_documents(knowedge_base_documents)
documents_after_splitting

[Document(page_content="Squid Game (Korean: 오징어 게임) is a South Korean television series created by Hwang Dong-hyuk for Netflix. Its cast includes Lee Jung-jae, Park Hae-soo, Wi Ha-joon, HoYeon Jung, O Yeong-su, Heo Sung-tae, Anupam Tripathi and Kim Joo-ryoung.\n\nThe series revolves around a secret contest where 456 players, all of whom are in deep financial hardship, risk their lives to play a series of deadly children's games for the chance to win a ₩45.6 billion prize. The series' title draws from a similarly named Korean children's game. Hwang conceived the idea based on his own economic struggles, as well as the class disparity in South Korea and capitalism.[4][5] Though he wrote the story in 2009, Hwang could not find a production company to fund the idea until Netflix took an interest around 2019 as part of a drive to expand their foreign programming offerings.", metadata={'source': 'sacred_games_script.txt'}),
 Document(page_content="Squid Game was released worldwide on Septemb

In [17]:
documents_after_splitting[0]

Document(page_content="Squid Game (Korean: 오징어 게임) is a South Korean television series created by Hwang Dong-hyuk for Netflix. Its cast includes Lee Jung-jae, Park Hae-soo, Wi Ha-joon, HoYeon Jung, O Yeong-su, Heo Sung-tae, Anupam Tripathi and Kim Joo-ryoung.\n\nThe series revolves around a secret contest where 456 players, all of whom are in deep financial hardship, risk their lives to play a series of deadly children's games for the chance to win a ₩45.6 billion prize. The series' title draws from a similarly named Korean children's game. Hwang conceived the idea based on his own economic struggles, as well as the class disparity in South Korea and capitalism.[4][5] Though he wrote the story in 2009, Hwang could not find a production company to fund the idea until Netflix took an interest around 2019 as part of a drive to expand their foreign programming offerings.", metadata={'source': 'sacred_games_script.txt'})

In [24]:
len(documents_after_splitting[1].page_content)

1092

# Embed Documents and store in Vector Store

In [8]:
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings

openai_embeddings = OpenAIEmbeddings(openai_api_key=os.getenv("open_ai_secret_key"))
vector_db = FAISS.from_documents(documents_after_splitting, openai_embeddings)


# Embed user query

In [9]:
query_embedding_vector = openai_embeddings.embed_query(query)

# Retreive similar content from Vector db

In [10]:

relevant_documents = vector_db.similarity_search_by_vector(query_embedding_vector)
relevant_document_text = relevant_documents[0].page_content
relevant_document_text

'There are 4 main actors in sacred games.\n\nThere is no ramakant jadhav in sacred games. It is a fake person mentioned in the show. There is no actor who playes ramakant jadhav in the show. Ramakant jadhav is not a cop. There is no actor who acted as him in the show. There is misinformation that someone might have played the role of Ramakant jadhav , but its not true. Sacred Games is a popular Indian TV series that aired on Netflix, but it does not include a actor who plays the character Ramakant Jadhav. \n\nsacred games was a tv show aired on netflix.'

# Look at most relevant Documents based on index

In [11]:
relevant_documents[0:2]

[Document(page_content='There are 4 main actors in sacred games.\n\nThere is no ramakant jadhav in sacred games. It is a fake person mentioned in the show. There is no actor who playes ramakant jadhav in the show. Ramakant jadhav is not a cop. There is no actor who acted as him in the show. There is misinformation that someone might have played the role of Ramakant jadhav , but its not true. Sacred Games is a popular Indian TV series that aired on Netflix, but it does not include a actor who plays the character Ramakant Jadhav. \n\nsacred games was a tv show aired on netflix.', metadata={'source': 'sacred_games_script.txt'}),
 Document(page_content="Sacred Games is an Indian neo-noir crime thriller streaming television series based on Vikram Chandra's 2006 novel of the same name. India's first Netflix original series, it was produced and directed by Vikramaditya Motwane and Anurag Kashyap as Phantom Films. The novel was adapted by Varun Grover, Smita Singh, and Vasant Nath. Kelly Luege

# Look at least relevant Documents based on index

In [12]:
relevant_documents[-1]

Document(page_content="Squid Game (Korean: 오징어 게임) is a South Korean television series created by Hwang Dong-hyuk for Netflix. Its cast includes Lee Jung-jae, Park Hae-soo, Wi Ha-joon, HoYeon Jung, O Yeong-su, Heo Sung-tae, Anupam Tripathi and Kim Joo-ryoung.\n\nThe series revolves around a secret contest where 456 players, all of whom are in deep financial hardship, risk their lives to play a series of deadly children's games for the chance to win a ₩45.6 billion prize. The series' title draws from a similarly named Korean children's game. Hwang conceived the idea based on his own economic struggles, as well as the class disparity in South Korea and capitalism.[4][5] Though he wrote the story in 2009, Hwang could not find a production company to fund the idea until Netflix took an interest around 2019 as part of a drive to expand their foreign programming offerings.", metadata={'source': 'sacred_games_script.txt'})

# Query Augmentation 

In [13]:
#trial 1
augmented_query = query + relevant_document_text
augmented_query
messages = [
    SystemMessage(content="You're a helpful assistant. "),
    HumanMessage(content=augmented_query),
]
chat.invoke(messages)


AIMessage(content='I apologize for the confusion. In the TV show Sacred Games, Ramakant Jadhav is a fictional character who is a police officer. He is portrayed by actor Jitendra Joshi.')

In [14]:
#better augmentation
augmented_query = query + "\nalso use below text as reference before answering\n" + relevant_document_text
augmented_query
messages = [
    SystemMessage(content="You're a helpful assistant. "),
    HumanMessage(content=augmented_query),
]
chat.invoke(messages)

AIMessage(content="I'm sorry, but I couldn't find any information on a character named Ramakant Jadhav in the TV show Sacred Games. It seems that the information you have might be incorrect. Sacred Games is a popular Indian TV series that aired on Netflix, but it does not include a character named Ramakant Jadhav. The show features 4 main actors, but none of them portray a character with that name.")

In [15]:
#Just for reverse sanity check, look for information not directly in the knowledgebase

In [16]:
new_query = "who plays the character of Khanna Guruji"
augmented_query = new_query + "\nalso use below text as reference before answering\n" + relevant_document_text
augmented_query
messages = [
    SystemMessage(content="You're a helpful assistant. "),
    HumanMessage(content=augmented_query),
]
chat.invoke(messages)

AIMessage(content='In the TV show Sacred Games, the character of Khanna Guruji is played by actor Pankaj Tripathi.')