# Giving more data power to LLMs

In [None]:
## Installation of libraries
#pip install langchain
#pip install langchain-opanai
#pip install openai
#pip install pypdf2
#pip install faiss-cpu
#pip install tiktoken

In [None]:
#Imports

from PyPDF2 import PdfReader
from langchain_openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain.chains.question_answering import load_qa_chain
from langchain_openai import OpenAI

In [None]:
import os
OPENAI_KEY = os.environ["OPENAI_API_KEY"] #Key must be set according to os environments
print(OPENAI_KEY)

## Simple LLM call with generic knowledge

In [None]:
llm = OpenAI(model_name="gpt-3.5-turbo-instruct")

In [None]:
our_query = "what is the birth year of {your name}?"
print(llm.invoke(our_query))

## LLM Call with out PDF as data reference

### Step 1: Reading pdf file

In [None]:
data = PdfReader(r'C:\Users\dhruv\OneDrive\Documents\data.pdf')
# data file content text information about you.

In [None]:
combined_text = ''
for i, page in enumerate(data.pages):
    text = page.extract_text()
    if text:
        combined_text += text

In [None]:
combined_text

### Step 2: Breaking down the PDF data into smaller chunks

In [None]:
text_splitter = CharacterTextSplitter(
    separator='\n',
    chunk_size=200,
    chunk_overlap=20,
    length_function = len,
)

In [None]:
finalData = text_splitter.split_text(combined_text)

In [None]:
len(finalData)

### Step 3: Generating Text Embeddings & Storing them in Vector Store

In [None]:
embeddings = OpenAIEmbeddings()

In [None]:
# Facebook AI Similarity Search (Faiss) is a library that allows developers to search for multimedia document embeddings that are similar 
# to one another.
documentsearch = FAISS.from_texts(finalData, embeddings)

### Step 4: Fetching answers for the user's questions.

In [None]:
chain = load_qa_chain(OpenAI(),chain_type="stuff")

In [None]:
our_query = "Who is Dhruv Savaliya?"
docs = documentsearch.similarity_search(our_query)
print(chain.run(input_documents=docs, question= our_query))

In [None]:
our_query = "When did Dhruv born?"
docs = documentsearch.similarity_search(our_query)
print(chain.run(input_documents=docs, question= our_query))

In [None]:
our_query = "If today is 2024, How older Dhruv is?"
docs = documentsearch.similarity_search(our_query)
print(chain.run(input_documents=docs, question= our_query))

# -----------------------------------------------------------------------------------------