In [None]:
!pip install langchain
!pip install pinecone-client
!pip install pypdf
!pip install openai
!pip install tiktoken

In [1]:
from langchain.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.llms import OpenAI
from langchain.vectorstores import Pinecone
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
import os

In [2]:
!mkdir pdfs

In [3]:
loader=PyPDFDirectoryLoader("pdfs")

In [4]:
data=loader.load()

In [5]:
data[0]

Document(page_content='Deep Residual Learning for Image Recognition\nKaiming He Xiangyu Zhang Shaoqing Ren Jian Sun\nMicrosoft Research\n{kahe, v-xiangz, v-shren, jiansun }@microsoft.com\nAbstract\nDeeper neural networks are more difﬁcult to train. We\npresent a residual learning framework to ease the training\nof networks that are substantially deeper than those used\npreviously. We explicitly reformulate the layers as learn-\ning residual functions with reference to the layer inputs, in-\nstead of learning unreferenced functions. We provide com-\nprehensive empirical evidence showing that these residual\nnetworks are easier to optimize, and can gain accuracy from\nconsiderably increased depth. On the ImageNet dataset we\nevaluate residual nets with a depth of up to 152 layers—8 ×\ndeeper than VGG nets [41] but still having lower complex-\nity. An ensemble of these residual nets achieves 3.57% error\non the ImageNet testset. This result won the 1st place on the\nILSVRC 2015 classiﬁcat

In [6]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20)

In [7]:
text_chunks = text_splitter.split_documents(data)

In [8]:
text_chunks

[Document(page_content='Deep Residual Learning for Image Recognition\nKaiming He Xiangyu Zhang Shaoqing Ren Jian Sun\nMicrosoft Research\n{kahe, v-xiangz, v-shren, jiansun }@microsoft.com\nAbstract\nDeeper neural networks are more difﬁcult to train. We\npresent a residual learning framework to ease the training\nof networks that are substantially deeper than those used\npreviously. We explicitly reformulate the layers as learn-\ning residual functions with reference to the layer inputs, in-', metadata={'source': 'pdfs\\Deep Residual Learning for Image Recognition(ResNet).pdf', 'page': 0}),
 Document(page_content='stead of learning unreferenced functions. We provide com-\nprehensive empirical evidence showing that these residual\nnetworks are easier to optimize, and can gain accuracy from\nconsiderably increased depth. On the ImageNet dataset we\nevaluate residual nets with a depth of up to 152 layers—8 ×\ndeeper than VGG nets [41] but still having lower complex-\nity. An ensemble of th

In [14]:
len(text_chunks)

132

In [10]:
print(text_chunks[0].page_content)

Deep Residual Learning for Image Recognition
Kaiming He Xiangyu Zhang Shaoqing Ren Jian Sun
Microsoft Research
{kahe, v-xiangz, v-shren, jiansun }@microsoft.com
Abstract
Deeper neural networks are more difﬁcult to train. We
present a residual learning framework to ease the training
of networks that are substantially deeper than those used
previously. We explicitly reformulate the layers as learn-
ing residual functions with reference to the layer inputs, in-


In [11]:
print(text_chunks[1].page_content)

stead of learning unreferenced functions. We provide com-
prehensive empirical evidence showing that these residual
networks are easier to optimize, and can gain accuracy from
considerably increased depth. On the ImageNet dataset we
evaluate residual nets with a depth of up to 152 layers—8 ×
deeper than VGG nets [41] but still having lower complex-
ity. An ensemble of these residual nets achieves 3.57% error
on the ImageNet testset. This result won the 1st place on the


In [12]:
print(text_chunks[2].page_content)

ILSVRC 2015 classiﬁcation task. We also present analysis
on CIFAR-10 with 100 and 1000 layers.
The depth of representations is of central importance
for many visual recognition tasks. Solely due to our ex-
tremely deep representations, we obtain a 28% relative im-
provement on the COCO object detection dataset. Deep
residual nets are foundations of our submissions to ILSVRC
& COCO 2015 competitions1, where we also won the 1st
places on the tasks of ImageNet detection, ImageNet local-


In [13]:
print(text_chunks[3].page_content)

ization, COCO detection, and COCO segmentation.
1. Introduction
Deep convolutional neural networks [22, 21] have led
to a series of breakthroughs for image classiﬁcation [21,
50, 40]. Deep networks naturally integrate low/mid/high-
level features [50] and classiﬁers in an end-to-end multi-
layer fashion, and the “levels” of features can be enriched
by the number of stacked layers (depth). Recent evidence
[41, 44] reveals that network depth is of crucial importance,


In [None]:
from dotenv import load_dotenv
load_dotenv()

In [15]:
import os
#os.environ["OPENAI_API_KEY"]="-----------------"

In [25]:
from langchain.embeddings.openai import OpenAIEmbeddings
import tiktoken

In [40]:
embedding=OpenAIEmbeddings(api_key=os.environ['OPENAI_API_KEY'])

In [41]:
len(embedding.embed_query("How are you"))

1536

In [42]:
len(embedding.embed_query("hi i am fine"))

1536

In [43]:
PINECONE_API_KEY = os.environ.get('PINECONE_API_KEY', '----------------------------')
PINECONE_API_ENV = os.environ.get('PINECONE_API_ENV', 'gcp-starter')

In [44]:
import pinecone
pinecone.init( api_key=PINECONE_API_KEY,  # find at app.pinecone.io
     environment=PINECONE_API_ENV  # next to api key in console
)

In [48]:
index_name = "test" # put in the name of your pinecone index here

In [49]:
index = pinecone.Index('test')

## Create Embeddings for each of the Text Chunk

In [50]:
docsearch = Pinecone.from_texts([t.page_content for t in text_chunks], embedding, index_name=index_name)

In [51]:
docsearch

<langchain_community.vectorstores.pinecone.Pinecone at 0x19d6c3d9a10>

In [52]:
docsearch.as_retriever()

VectorStoreRetriever(tags=['Pinecone', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.pinecone.Pinecone object at 0x0000019D6C3D9A10>)

In [69]:
query= "ILSVRC 2015 classiﬁcation task"

In [70]:
docs = docsearch.similarity_search(query)
docs

[Document(page_content='ILSVRC 2015 classiﬁcation task. We also present analysis\non CIFAR-10 with 100 and 1000 layers.\nThe depth of representations is of central importance\nfor many visual recognition tasks. Solely due to our ex-\ntremely deep representations, we obtain a 28% relative im-\nprovement on the COCO object detection dataset. Deep\nresidual nets are foundations of our submissions to ILSVRC\n& COCO 2015 competitions1, where we also won the 1st\nplaces on the tasks of ImageNet detection, ImageNet local-'),
 Document(page_content='validation set. Using an ensemble of networks for both clas-\nsiﬁcation and localization, we achieve a top-5 localization\nerror of 9.0% on the test set. This number signiﬁcantly out-\nperforms the ILSVRC 14 results (Table 14), showing a 64%\nrelative reduction of error. This result won the 1st place in\nthe ImageNet localization task in ILSVRC 2015.\n12'),
 Document(page_content='trained on the 1000-class ImageNet classiﬁcation set, and\nare ﬁne-t

In [71]:
llm = OpenAI()

In [72]:
qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=docsearch.as_retriever())

In [73]:
query = "ILSVRC 2015 classiﬁcation task"

In [74]:
qa.run(query)

' ILSVRC 2015 was an image classification task. It used the 1000-class ImageNet classiﬁcation set, and results were evaluated on the COCO object detection dataset. The ImageNet localization task was won by a team that achieved a top-5 localization error of 9.0% on the test set.'

In [76]:
import sys
while True:
  #user_input = input(f"Input Prompt: ")
  user_input = "ILSVRC 2015 classiﬁcation task"
  if user_input == 'exit':
    print('Exiting')
    sys.exit()
  if user_input == '':
    continue
  result = qa({'query': user_input})
  print(f"Answer: {result['result']}")

Answer:  The ILSVRC 2015 classiﬁcation task was won by an ensemble of networks for both classification and localization, achieving a top-5 localization error of 9.0%.
Answer:  ILSVRC stands for ImageNet Large Scale Visual Recognition Challenge. It is an annual competition that aims to evaluate algorithms for object detection and image classification.
Answer:  ILSVRC stands for ImageNet Large Scale Visual Recognition Challenge. It is an annual competition that evaluates computer vision algorithms on the ImageNet dataset.


RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for text-embedding-ada-002 in organization org-rGeJAvOgfIVeJmMiPZ3ZUOjM on requests per min (RPM): Limit 3, Used 3, Requested 1. Please try again in 20s. Visit https://platform.openai.com/account/rate-limits to learn more. You can increase your rate limit by adding a payment method to your account at https://platform.openai.com/account/billing.', 'type': 'requests', 'param': None, 'code': 'rate_limit_exceeded'}}