In [2]:
from langchain.document_loaders import TextLoader

In [3]:
# text to write to a local file
# taken from https://www.theverge.com/2023/3/14/23639313/google-ai-language-model-palm-api-challenge-openai
text = """Google opens up its AI language model PaLM to challenge OpenAI and GPT-3
Google is offering developers access to one of its most advanced AI language models: PaLM.
The search giant is launching an API for PaLM alongside a number of AI enterprise tools
it says will help businesses “generate text, images, code, videos, audio, and more from
simple natural language prompts.”

PaLM is a large language model, or LLM, similar to the GPT series created by OpenAI or
Meta’s LLaMA family of models. Google first announced PaLM in April 2022. Like other LLMs,
PaLM is a flexible system that can potentially carry out all sorts of text generation and
editing tasks. You could train PaLM to be a conversational chatbot like ChatGPT, for
example, or you could use it for tasks like summarizing text or even writing code.
(It’s similar to features Google also announced today for its Workspace apps like Google
Docs and Gmail.)
"""

In [4]:
# write text to local file
with open("my_file.txt", "w") as file:
    file.write(text)

# use TextLoader to load text from local file
loader = TextLoader("my_file.txt")
docs_from_file = loader.load()

print(len(docs_from_file))
# 1

1


In [5]:
from langchain.text_splitter import CharacterTextSplitter

#create a text Splitter
text_splitter = CharacterTextSplitter(chunk_size = 200, chunk_overlap = 20)

#Split documents into chunks
docs = text_splitter.split_documents(docs_from_file)
print(len(docs))

Created a chunk of size 373, which is longer than the specified 200


2


In [12]:
import os, sys
sys.path.insert(1, 'D:\Github\DeepLake-Langchain')
import credentials
os.environ["OPENAI_API_KEY"] = credentials.openai
os.environ["ACTIVELOOP_TOKEN"] =  credentials.active_loop

In [13]:
from langchain.embeddings import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(model = "text-embedding-ada-002")

In [14]:
from langchain.vectorstores import DeepLake

my_activeloop_org_id = credentials.active_loop_org_id
my_activeloop_dataset_name = "langchain_course_indexers_retrievers"
dataset_path = f"hub://{my_activeloop_org_id}/{my_activeloop_dataset_name}"

db = DeepLake(dataset_path=dataset_path, embedding_function=embeddings)
db.add_documents(docs)

Your Deep Lake dataset has been successfully created!
The dataset is private so make sure you are logged in!


 

Dataset(path='hub://megatron17/langchain_course_indexers_retrievers', tensors=['embedding', 'id', 'metadata', 'text'])

  tensor      htype      shape     dtype  compression
  -------    -------    -------   -------  ------- 
 embedding  embedding  (2, 1536)  float32   None   
    id        text      (2, 1)      str     None   
 metadata     json      (2, 1)      str     None   
   text       text      (2, 1)      str     None   


['e31f6e75-198d-11ee-b60b-00d861dd19c7',
 'e31f6e76-198d-11ee-98a7-00d861dd19c7']

In [15]:
#create retriever from db
retriever = db.as_retriever()

In [16]:
from langchain.chains import RetrievalQA
from langchain.llms import OpenAI

In [17]:
qa_chain = RetrievalQA.from_chain_type(
    llm = OpenAI(model = "text-davinci-003"),
    chain_type="stuff",
    retriever = retriever
)

In [18]:
query = "How Google plans to challenge OpenAI?"
response = qa_chain.run(query)
print(response)

 Google is offering developers access to its AI language model PaLM, which is similar to the GPT series created by OpenAI. PaLM can be trained for tasks like summarizing text or writing code, which could potentially challenge OpenAI.


## Extract only relevent documents using document compressor

In [19]:
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor

In [20]:
llm = OpenAI(model = "text-davinci-003", temperature=0)

In [21]:
compressor = LLMChainExtractor.from_llm(llm)
compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor,
    base_retriever=retriever
)


In [22]:
retrieved_docs = compression_retriever.get_relevant_documents(
    "How Google plans to challenge OpenAI?"
)
print(retrieved_docs[0].page_content)



Google is offering developers access to one of its most advanced AI language models: PaLM. The search giant is launching an API for PaLM alongside a number of AI enterprise tools it says will help businesses “generate text, images, code, videos, audio, and more from simple natural language prompts.”
