## Install All the Required Packages

In [None]:
!pip install langchain
!pip install pinecone-client
!pip install pypdf

In [None]:
!pip install openai
!pip install tiktoken

## Import All the Required Libraries

In [None]:
from langchain.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.llms import OpenAI
from langchain.vectorstores import Pinecone
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
import os

## Load the PDF Files

In [None]:
!mkdir pdfs

## Extract the Text from the PDF's

In [None]:
loader = PyPDFDirectoryLoader("pdfs")
data = loader.load()

In [None]:
data

## Split the Extracted Data into Text Chunks

In [None]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20)

In [None]:
text_chunks = text_splitter.split_documents(data)

In [None]:
text_chunks

In [None]:
len(text_chunks)

In [None]:
text_chunks[1]

## Download the Embeddings
### Else you can use hugging face Embedding modle like "mpnet v2"

In [None]:
import os

os.environ['OPENAI_API_KEY'] = "sush-xxxxxxxxxxxxxxxxxxxxxxxx"

In [None]:
embeddings = OpenAIEmbeddings()

In [None]:
result = embeddings.embed_query("Are you doing good")

In [None]:
len(result)

## Initializing the Pinecone

In [None]:
PINECONE_API_KEY = os.environ.get('PINECONE_API_KEY', 're57ft6e12r-5re12r')
PINECONE_API_ENV = os.environ.get('PINECONE_API_ENV', 'gcp-starter')

In [None]:
import pinecone
# initialize pinecone
pinecone.init(
    api_key=PINECONE_API_KEY,  # find at app.pinecone.io
    environment=PINECONE_API_ENV  # next to api key in console
)
index_name = "test" # put in the name of your pinecone index here


## Create Embeddings for each of the Text Chunk

In [None]:
docsearch = Pinecone.from_texts([t.page_content for t in text_chunks], embeddings, index_name=index_name)

## If you already have an index, you can load it like this

In [None]:
docsearch = Pinecone.from_existing_index(index_name, embeddings)
docsearch

## Similarity Search

In [None]:
query = "query what to search"

In [None]:
docs = docsearch.similarity_search(query, k=3)

In [None]:
docs

## Creating a LLM Model Wrapper

In [None]:
llm = OpenAI()

In [None]:
qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=docsearch.as_retriever())


## Q/A

In [None]:
query = "query what to search2"

In [None]:
qa.run(query)

In [None]:
query = "query what to search3"

In [None]:
qa.run(query)

In [None]:
import sys

In [None]:
while True:
  user_input = input(f"Input Prompt: ")
  if user_input == 'exit':
    print('Exiting')
    sys.exit()
  if user_input == '':
    continue
  result = qa({'query': user_input})
  print(f"Answer: {result['result']}")