In [1]:
!pip install langchain==0.0.350 openai faiss-gpu tiktoken
!pip install wandb

Collecting langchain==0.0.350
  Downloading langchain-0.0.350-py3-none-any.whl (809 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m809.1/809.1 kB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting openai
  Downloading openai-1.6.1-py3-none-any.whl (225 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m225.4/225.4 kB[0m [31m8.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting faiss-gpu
  Downloading faiss_gpu-1.7.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (85.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m85.5/85.5 MB[0m [31m9.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting tiktoken
  Downloading tiktoken-0.5.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m44.2 MB/s[0m eta [36m0:00:00[0m
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain==0.0.350)
  Downloading dataclasses_json-0.6.3-

In [3]:
import os
from google.colab import userdata
os.environ["OPENAI_API_KEY"] = userdata.get("OPENAI_API_KEY")

In [4]:
os.environ["LANGCHAIN_WANDB_TRACING"] = "true"

In [5]:
!pip install pypdf

Collecting pypdf
  Downloading pypdf-3.17.4-py3-none-any.whl (278 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m278.2/278.2 kB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pypdf
Successfully installed pypdf-3.17.4


In [6]:
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

# Chunking
loader = PyPDFLoader("CommonInsuranceTerms.pdf")

documents = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500,chunk_overlap=50)
texts = text_splitter.split_documents(documents)

In [7]:
texts[0]

Document(page_content='Glossary of Common Insurance Terms \nNOTICE:  This document is for informational purposes only and is not in tended to alter or replace the \ninsurance policy. Additionally, this informational sheet is not  intended to fully set out your rights and \nobligations or the rights and obligations of the insurance comp any. If you have questions about your insurance, \nyou should consult your insurance agent, the insurance company,  or the language of the insurance policy. \nA', metadata={'source': 'CommonInsuranceTerms.pdf', 'page': 0})

In [11]:
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores.faiss import FAISS

docsearch = FAISS.from_documents(texts, OpenAIEmbeddings())

In [13]:
docsearch.save_local("/content")

In [14]:
from langchain.chains import RetrievalQA
from langchain.llms import OpenAI

qa_chain = RetrievalQA.from_chain_type(
    llm=OpenAI(temperature=0.3),
    chain_type="stuff",
    retriever=docsearch.as_retriever(),
)

In [15]:
print(qa_chain.run("What is Mortality Charge?"))

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

 ··········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Streaming LangChain activity to W&B at https://wandb.ai/aianytime07/uncategorized/runs/u5cu06k9
[34m[1mwandb[0m: `WandbTracer` is currently in beta.
[34m[1mwandb[0m: Please report any issues to https://github.com/wandb/wandb/issues with the tag `langchain`.


 The cost of the insurance protection element of a universal life policy, based on the insured's risk classification, age, and amount at risk.


In [16]:
print(qa_chain.run("Tell me about Preferred provider organization (PPO)."))

 A PPO is a type of health insurance plan where providers agree to discounted rates for an insurance company. These providers are part of the PPO's network, and if you use them, you will receive a higher percentage of reimbursement. If you go to providers outside of the network, you will have to pay more for your care.
