In [3]:
from dotenv import load_dotenv
import os
import openai
from PyPDF2 import PdfReader
import streamlit as st
from langchain.chat_models import ChatOpenAI
from langchain.text_splitter import CharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain import FAISS
from langchain.llms import OpenAI
from langchain.callbacks import get_openai_callback
from langchain.chains.question_answering import load_qa_chain
from langchain.prompts import PromptTemplate
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings.huggingface import HuggingFaceEmbeddings


# Load environment variables
load_dotenv()

True

### Reading a Large PDF

- Page Number:100+

In [4]:
import PyPDF2

pdf_file_obj = open('bcg-2022-annual-sustainability-report-apr-2023.pdf', 'rb')
pdf_reader = PyPDF2.PdfReader(pdf_file_obj)
num_pages = len(pdf_reader.pages)
detected_text = ''

for page_num in range(num_pages):
    page_obj = pdf_reader.pages[page_num]
    detected_text += page_obj.extract_text() + '\n\n'

pdf_file_obj.close()

In [5]:
#detected_text

### Langchain - Recursive Character Text Splitter 

- We will perform chunking and split the text using LangChain text splitters

In [6]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
texts = text_splitter.create_documents([detected_text])

In [7]:
len(texts)

344

- We create a vector database using the chunks. We will save it the database for future use as well
- We will use **FAISS (Facebook AI Similarity Search)** for vector Store

## Generating Embeddings & Store it into vectorDB

In [8]:
#Huggingface Embedding ("all-MiniLM-L6-v2")
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

directory = 'index_store'
vector_index = FAISS.from_documents(texts, embeddings)
vector_index.save_local(directory)

- We now load the database. Using the database, we configure a retriever and then create a chat object. This chat object (qa_interface) will be used to chat with the PDF.

In [13]:
vector_index = FAISS.load_local('index_store', embeddings)

In [21]:
query = "List measures taken to address diseases occuring in developing industries.List in a pointwise manner."
docs = vector_index.similarity_search(query)

In [22]:
# Check for first two results
print(docs[0])
print("___"*10)
print(docs[1])

page_content='plans to further expand its Accord for a Healthier World to \nextend access to the full portfolio of medicines and vaccines \nto all eligible individuals. \n45\n1.2 billionlower-income countries \nwith increased access to \ninnovative medicines \nwith the potential to ac -\ncess treatment for deadly \ninfectious diseases'
______________________________
page_content='BOSTON CONSULTING GROUP  19\nIMPACT: Bringing Pfizer’s Innovative Portfolio of Medicines to the African \nUnion\nSocietal Problem  \nHalf of the world’s population suffers from a health equity \ngap, living without access to high-quality, safe, effective \nmedicines. In this context, Pfizer announced “An Accord \nfor a Healthier World” to provide access to innovative \nmedicines for people living in 45 lower-income countries. \nThe program has the potential to improve the health of up \nto 1.2 billion people affected by deadly infectious diseases, \nas well as certain cancers and inflammatory diseases.\nPartne

In [23]:
## Initiliazing the LLM
llm = ChatOpenAI(model_name="gpt-3.5-turbo-0125", temperature=0)

In [24]:
qa_interface = load_qa_chain(llm=llm,chain_type="stuff")

In [25]:
 with get_openai_callback() as cost:
    response = qa_interface.run(input_documents=docs, question=query)
    print(cost)

Tokens Used: 926
	Prompt Tokens: 795
	Completion Tokens: 131
Successful Requests: 1
Total Cost (USD): $0.000594


In [27]:
print(response)

1. Pfizer's "Accord for a Healthier World" program aims to provide access to innovative medicines for people in 45 lower-income countries.
2. The program targets improving the health of up to 1.2 billion individuals affected by deadly infectious diseases, cancers, and inflammatory conditions.
3. Pfizer sought assistance from the Boston Consulting Group to develop a partnership model for distributing their innovative portfolio in developing countries.
4. The partnership model focuses on mitigating risks, meeting regulatory requirements, and ensuring high distribution security.
5. The Boston Consulting Group developed scenarios in consultation with local leadership and industry experts to prepare for all contingencies in 2022.
