In [1]:
import pypdf

In [2]:
from langchain.document_loaders import PyPDFDirectoryLoader

loader = PyPDFDirectoryLoader("pdfs")
data = loader.load()

In [3]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size = 500, chunk_overlap = 20)

texts = text_splitter.split_documents(data)

In [5]:
print(len(texts))

1250


### Embeddings

In [6]:
from langchain.embeddings import HuggingFaceEmbeddings

In [7]:
embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')

.gitattributes:   0%|          | 0.00/1.18k [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

data_config.json:   0%|          | 0.00/39.3k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

train_script.py:   0%|          | 0.00/13.2k [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

In [9]:
query_result = embeddings.embed_query("What is the weather today?")

In [11]:
len(query_result)

384

### Initialize Pinecone

In [14]:
from dotenv import load_dotenv
load_dotenv() # Take Enviroment variables from .env
import os

PINECONE_API_KEY=os.getenv("PINECONE_API_KEY")
PINECONE_API_ENV=os.getenv("PINECONE_API_ENV")


In [17]:
import pinecone
#Initialize Pinecone
pinecone.init(
    api_key=PINECONE_API_KEY,
    environment=PINECONE_API_ENV

)

index_name = "genai-ineuron"



In [23]:
from langchain.vectorstores import Pinecone

In [21]:
docsearch = Pinecone.from_texts([t.page_content for t in texts], embeddings, index_name=index_name) #Create embedding using the embedding model for the PDF or Text Chunks created from PDF

### Load index from existing Index in Pinecone

In [25]:
docsearch = Pinecone.from_existing_index(index_name, embeddings)

In [33]:
docsearch

<langchain_community.vectorstores.pinecone.Pinecone at 0x2a18af0a0>

In [26]:
query = "What is probability?"
docs = docsearch.similarity_search(query, k=3)

In [27]:
docs

[Document(page_content='14   Chapter 2What i s a Probability?\nThe idea of probability is deeply ingrained in our everyday language. \nWhenever you say something such as “That seems unlikely!” or “I would be surprised if that’s not the case” or “I’m not sure about that,” you’re making a claim about probability. Probability is a measurement of how strongly we believe things about the world.\nIn the previous chapter we used abstract, qualitative terms to describe'),
 Document(page_content='Wrapping u p\nIn this chapter we explored two different types of probabilities: those of events and those of beliefs. We define probability as the ratio of the outcome(s) we care about to the number of all possible outcomes. \nWhile this is the most common definition of probability, it is difficult \nto apply to beliefs because most practical, everyday probability prob -\nlems do not have clear-cut outcomes and so aren’t intuitively assigned discrete numbers.'),
 Document(page_content='accurately quant

In [34]:
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')

In [36]:
from langchain_openai import OpenAI

In [39]:
from langchain.chains import RetrievalQA

qa_chain = RetrievalQA.from_chain_type(
    llm = OpenAI(),
    chain_type = "stuff",
    retriever = docsearch.as_retriever(), #Object that we are getting from the database
    return_source_documents = True
    )


In [43]:
print(qa_chain(query)['result'])

 Probability is a measurement of how strongly we believe things about the world, and it can be expressed as the ratio of the outcome(s) we care about to the number of all possible outcomes. It is a fundamental concept used to understand the likelihood of events occurring and to make predictions and decisions based on our beliefs.
