# Langchain + Pinecone
You will learn the following things:

* Create embeddings from a document
* Save these embeddings to Pinecone indexes
* Query these Pinecone 

There is a `requirements.txt` file, install all the dependencies from it into a new virtual environment.

In [None]:
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import DirectoryLoader, TextLoader
from langchain.chains import RetrievalQA, ConversationalRetrievalChain
from langchain.chat_models import ChatOpenAI
from langchain.vectorstores import Pinecone
from PyPDF2 import PdfReader
import pinecone

from config import config

* Reading a PDF file from the path
* Creating and saving the content of the PDF file into a `output.txt` file

In [None]:
file_path = 'data/inputs/sample.pdf'

In [None]:
reader = PdfReader(file_path)
text = ''
for page in reader.pages:
    text += page.extract_text()

with open(f'data/outputs/output.txt', 'w') as file:
    file.write(text)

* Loading the folder with `.txt` files

In [None]:
loader = DirectoryLoader(
    'data/outputs/',
    glob='**/*.txt',
    loader_cls=TextLoader
)

In [None]:
documents = loader.load()

* Split the documents into chonk of text

In [None]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1024,
    chunk_overlap=0
)

In [None]:
texts = text_splitter.split_documents(documents)

In [None]:
embeddings = OpenAIEmbeddings(
    openai_api_key=config.OPENAI_API_KEY
)

* Initialize `Pinecone`

In [None]:
pinecone.init(
    api_key=config.PINECONE_API_KEY,
    environment=config.PINECONE_ENVIRONMENT,
)

In [None]:
index_name = 'test'

* Create indexes in Pinecone

In [None]:
vectordb = Pinecone.from_documents(
    documents=texts,
    embedding=embeddings,
    index_name=index_name
)

* Load the existing indexes

In [None]:
db = Pinecone.from_existing_index(
    index_name=index_name,
    embedding=embeddings
)

* QnA chain without `memory`

In [None]:
qa = RetrievalQA.from_chain_type(
    llm=ChatOpenAI(temperature=0.0),
    chain_type="stuff",
    retriever=db.as_retriever()
)

In [None]:
result = qa.run('what are the different spiritual laws?')

In [None]:
print(result)

* QnA with memory

In [None]:
cqa = ConversationalRetrievalChain.from_llm(
    llm=ChatOpenAI(temperature=0.0),
    retriever=db.as_retriever()
)

In [None]:
chat_history = []
query = 'what are the different spiritual laws?'
result = cqa({'question': query, 'chat_history': chat_history})

In [None]:
result

In [None]:
chat_history.append((query, result['answer']))

In [None]:
query = 'can you explain the first one?'
result = cqa({'question': query, 'chat_history': chat_history})

In [None]:
result