In [None]:
from dotenv import load_dotenv

load_dotenv('../../.env')

# Write to a Local File

In [3]:
from langchain.document_loaders import TextLoader

# text to write to a local file
# taken from https://www.theverge.com/2023/3/14/23639313/google-ai-language-model-palm-api-challenge-openai
text = """Google opens up its AI language model PaLM to challenge OpenAI and GPT-3
Google is offering developers access to one of its most advanced AI language models: PaLM.
The search giant is launching an API for PaLM alongside a number of AI enterprise tools
it says will help businesses “generate text, images, code, videos, audio, and more from
simple natural language prompts.”

PaLM is a large language model, or LLM, similar to the GPT series created by OpenAI or
Meta’s LLaMA family of models. Google first announced PaLM in April 2022. Like other LLMs,
PaLM is a flexible system that can potentially carry out all sorts of text generation and
editing tasks. You could train PaLM to be a conversational chatbot like ChatGPT, for
example, or you could use it for tasks like summarizing text or even writing code.
(It’s similar to features Google also announced today for its Workspace apps like Google
Docs and Gmail.)
"""

# write text to local file
with open("my_file.txt", "w") as file:
    file.write(text)

# use TextLoader to load text from local file
loader = TextLoader("my_file.txt")
docs_from_file = loader.load()

print(docs_from_file)

[Document(page_content='Google opens up its AI language model PaLM to challenge OpenAI and GPT-3\nGoogle is offering developers access to one of its most advanced AI language models: PaLM.\nThe search giant is launching an API for PaLM alongside a number of AI enterprise tools\nit says will help businesses “generate text, images, code, videos, audio, and more from\nsimple natural language prompts.”\n\nPaLM is a large language model, or LLM, similar to the GPT series created by OpenAI or\nMeta’s LLaMA family of models. Google first announced PaLM in April 2022. Like other LLMs,\nPaLM is a flexible system that can potentially carry out all sorts of text generation and\nediting tasks. You could train PaLM to be a conversational chatbot like ChatGPT, for\nexample, or you could use it for tasks like summarizing text or even writing code.\n(It’s similar to features Google also announced today for its Workspace apps like Google\nDocs and Gmail.)\n', metadata={'source': 'my_file.txt'})]


# Character Splitter to split docs to text

In [5]:
from langchain.text_splitter import CharacterTextSplitter

# create a text splitter
text_splitter = CharacterTextSplitter(chunk_size=200, chunk_overlap=20)

# split documents into chunks
docs = text_splitter.split_documents(docs_from_file)

print(docs)
# 2

Created a chunk of size 373, which is longer than the specified 200


[Document(page_content='Google opens up its AI language model PaLM to challenge OpenAI and GPT-3\nGoogle is offering developers access to one of its most advanced AI language models: PaLM.\nThe search giant is launching an API for PaLM alongside a number of AI enterprise tools\nit says will help businesses “generate text, images, code, videos, audio, and more from\nsimple natural language prompts.”', metadata={'source': 'my_file.txt'}), Document(page_content='PaLM is a large language model, or LLM, similar to the GPT series created by OpenAI or\nMeta’s LLaMA family of models. Google first announced PaLM in April 2022. Like other LLMs,\nPaLM is a flexible system that can potentially carry out all sorts of text generation and\nediting tasks. You could train PaLM to be a conversational chatbot like ChatGPT, for\nexample, or you could use it for tasks like summarizing text or even writing code.\n(It’s similar to features Google also announced today for its Workspace apps like Google\nDocs 

# Set Embeddings

In [6]:
from langchain_community.embeddings import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings(model_name = 'sentence-transformers/all-MiniLM-L6-v2',
                                       model_kwargs = {'device':'cpu'} )

# Create Instance of DeepLake Datasetm

In [7]:
from langchain.vectorstores import DeepLake

# Before executing the following code, make sure to have your
# Activeloop key saved in the “ACTIVELOOP_TOKEN” environment variable.

# create Deep Lake dataset
# TODO: use your organization id here. (by default, org id is your username)
my_activeloop_org_id = "thapabibek1129"
my_activeloop_dataset_name = "langchain_course_indexers_retrievers"
dataset_path = f"hub://{my_activeloop_org_id}/{my_activeloop_dataset_name}"
db = DeepLake(dataset_path=dataset_path, embedding_function=embeddings)

# add documents to our Deep Lake dataset
db.add_documents(docs)

Your Deep Lake dataset has been successfully created!


Creating 2 embeddings in 1 batches of size 2:: 100%|██████████| 1/1 [00:27<00:00, 27.89s/it]

Dataset(path='hub://thapabibek1129/langchain_course_indexers_retrievers', tensors=['text', 'metadata', 'embedding', 'id'])

  tensor      htype     shape     dtype  compression
  -------    -------   -------   -------  ------- 
   text       text      (2, 1)     str     None   
 metadata     json      (2, 1)     str     None   
 embedding  embedding  (2, 384)  float32   None   
    id        text      (2, 1)     str     None   





['d4f2837b-c949-11ee-92d6-a434d9523559',
 'd4f2837c-c949-11ee-af06-a434d9523559']

In [8]:
# create retriever from db
retriever = db.as_retriever()

# QA Model

In [9]:
from langchain import HuggingFaceHub, LLMChain

# initialize Hub LLM
llm_t5 = HuggingFaceHub(
    repo_id='google/flan-t5-large',
    model_kwargs={'temperature':0,"max_length": 64,"max_new_tokens":128}
)

llm_mistral = HuggingFaceHub(
    repo_id='mistralai/Mistral-7B-Instruct-v0.2',
    model_kwargs={'temperature':0.5,"max_length": 64,"max_new_tokens":512}
)



In [10]:
from langchain.chains import RetrievalQA

# create a retrieval chain
qa_t5 = RetrievalQA.from_chain_type(
	llm=llm_t5,
	chain_type="stuff",
	retriever=retriever
)

qa_mistral = RetrievalQA.from_chain_type(
	llm=llm_mistral,
	chain_type="stuff",
	retriever=retriever
)

# Query

In [11]:
query = "How Google plans to challenge OpenAI?"
response = qa_t5.run(query)
print(response)

Google is offering developers access to one of its most advanced AI language models: PaLM


In [12]:
query = "How Google plans to challenge OpenAI?"
response = qa_mistral.run(query)
print(response)

 Google is challenging OpenAI by offering developers access to its advanced AI language model, PaLM, through an API and enterprise tools. These tools aim to help businesses generate various types of content from simple natural language prompts. PaLM is a large language model, similar to OpenAI's GPT series and Meta's LLaMA family of models. It can carry out various text generation and editing tasks, such as being a conversational chatbot or summarizing text. Google first announced PaLM in April 2022.


# Effective Query

In [13]:
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor


# create compressor for the retriever
compressor = LLMChainExtractor.from_llm(llm_t5)
compression_retriever_t5 = ContextualCompressionRetriever(
	base_compressor=compressor,
	base_retriever=retriever
)

compressor = LLMChainExtractor.from_llm(llm_mistral)
compression_retriever_mistral = ContextualCompressionRetriever(
	base_compressor=compressor,
	base_retriever=retriever
)

In [14]:
# retrieving compressed documents
retrieved_docs = compression_retriever_t5.get_relevant_documents(
	"How Google plans to challenge OpenAI?"
)
print(retrieved_docs[0].page_content)



Google is offering developers access to one of its most advanced AI language models: PaLM. The search giant is launching an API for PaLM alongside a number of AI enterprise tools it says will help businesses “generate text, images, code, videos, audio, and more from simple natural language prompts.”


In [15]:
# retrieving compressed documents
retrieved_docs = compression_retriever_mistral.get_relevant_documents(
	"How Google plans to challenge OpenAI?"
)
print(retrieved_docs[0].page_content)

Google is offering developers access to its most advanced AI language model: PaLM. Google is launching an API for PaLM alongside a number of AI enterprise tools. The tools will help businesses generate text, images, code, videos, audio, and more from simple natural language prompts.
