In [1]:
import os, pandas as pd, matplotlib.pyplot as plt, openai
from transformers import GPT2TokenizerFast
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains.question_answering import load_qa_chain
from langchain.llms import OpenAI
from langchain.chains import ConversationalRetrievalChain

In [2]:
os.environ['OPENAI_API_KEY'] = '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'
openai.api_key = os.getenv('OPENAI_API_KEY')

## Loading the PDF in chunks with Langchain

### Split by pages

In [3]:
loader = PyPDFLoader('Attention_is_all_you_need.pdf')

In [4]:
pages = loader.load_and_split()

In [5]:
print(pages[0])

page_content='Attention Is All You Need\nAshish Vaswani\x03\nGoogle Brain\navaswani@google.comNoam Shazeer\x03\nGoogle Brain\nnoam@google.comNiki Parmar\x03\nGoogle Research\nnikip@google.comJakob Uszkoreit\x03\nGoogle Research\nusz@google.com\nLlion Jones\x03\nGoogle Research\nllion@google.comAidan N. Gomez\x03y\nUniversity of Toronto\naidan@cs.toronto.eduŁukasz Kaiser\x03\nGoogle Brain\nlukaszkaiser@google.com\nIllia Polosukhin\x03z\nillia.polosukhin@gmail.com\nAbstract\nThe dominant sequence transduction models are based on complex recurrent or\nconvolutional neural networks that include an encoder and a decoder. The best\nperforming models also connect the encoder and decoder through an attention\nmechanism. We propose a new simple network architecture, the Transformer,\nbased solely on attention mechanisms, dispensing with recurrence and convolutions\nentirely. Experiments on two machine translation tasks show these models to\nbe superior in quality while being more parallelizable

In [6]:
chunks = pages

In [7]:
type(chunks)

list

In [8]:
type(chunks[0])

langchain.schema.Document

In [9]:
len(chunks)

16

## 2) Embedding Text and Store embeddings

In [10]:
embeddings = OpenAIEmbeddings() # get embedding model

In [11]:
db = FAISS.from_documents(chunks, embeddings) # Create vector database

## 3) Setup retrieval function

In [12]:
query = "Who created transformers?"
docs = db.similarity_search(query)

In [13]:
len(docs)

4

In [14]:
print(docs[0])

page_content='Attention Is All You Need\nAshish Vaswani\x03\nGoogle Brain\navaswani@google.comNoam Shazeer\x03\nGoogle Brain\nnoam@google.comNiki Parmar\x03\nGoogle Research\nnikip@google.comJakob Uszkoreit\x03\nGoogle Research\nusz@google.com\nLlion Jones\x03\nGoogle Research\nllion@google.comAidan N. Gomez\x03y\nUniversity of Toronto\naidan@cs.toronto.eduŁukasz Kaiser\x03\nGoogle Brain\nlukaszkaiser@google.com\nIllia Polosukhin\x03z\nillia.polosukhin@gmail.com\nAbstract\nThe dominant sequence transduction models are based on complex recurrent or\nconvolutional neural networks that include an encoder and a decoder. The best\nperforming models also connect the encoder and decoder through an attention\nmechanism. We propose a new simple network architecture, the Transformer,\nbased solely on attention mechanisms, dispensing with recurrence and convolutions\nentirely. Experiments on two machine translation tasks show these models to\nbe superior in quality while being more parallelizable

In [15]:
docs

[Document(page_content='Attention Is All You Need\nAshish Vaswani\x03\nGoogle Brain\navaswani@google.comNoam Shazeer\x03\nGoogle Brain\nnoam@google.comNiki Parmar\x03\nGoogle Research\nnikip@google.comJakob Uszkoreit\x03\nGoogle Research\nusz@google.com\nLlion Jones\x03\nGoogle Research\nllion@google.comAidan N. Gomez\x03y\nUniversity of Toronto\naidan@cs.toronto.eduŁukasz Kaiser\x03\nGoogle Brain\nlukaszkaiser@google.com\nIllia Polosukhin\x03z\nillia.polosukhin@gmail.com\nAbstract\nThe dominant sequence transduction models are based on complex recurrent or\nconvolutional neural networks that include an encoder and a decoder. The best\nperforming models also connect the encoder and decoder through an attention\nmechanism. We propose a new simple network architecture, the Transformer,\nbased solely on attention mechanisms, dispensing with recurrence and convolutions\nentirely. Experiments on two machine translation tasks show these models to\nbe superior in quality while being more para

### Create Q&A chain to integrate similarity search with user queries (answer queries from Knowledge Base Articles (KBAs))

In [16]:
chain = load_qa_chain(OpenAI(temperature=0), chain_type='stuff')

In [17]:
query = "Who created transformers?"
docs = db.similarity_search(query)

chain.run(input_documents=docs, question = query)

' Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N. Gomez, and Łukasz Kaiser created transformers.'

In [18]:
answer = str(chain.run(input_documents=docs, question = query)).strip()
answer

'Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N. Gomez, and Łukasz Kaiser created transformers.'

In [19]:
query = input("Enter question: ")
loader = PyPDFLoader('Attention_is_all_you_need.pdf')
pages = loader.load_and_split()
chunks = pages
embeddings = OpenAIEmbeddings() # get embedding model
db = FAISS.from_documents(chunks, embeddings) # Create vector database
docs = db.similarity_search(query)
chain = load_qa_chain(OpenAI(temperature=0), chain_type='stuff') # setting up the chain instance
chain.run(input_documents=docs, question = query) # executing the chain
answer = str(chain.run(input_documents=docs, question = query)).strip()
print(answer)

Enter question:  who built transformers


Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N. Gomez, Łukasz Kaiser, and Illia Polosukhin built the Transformer.


## Create chatbot with chat memory

In [20]:
from IPython.display import display 
import ipywidgets as widgets

### Create conversation chain that uses our vectordb as retriever, this also allows for chat history management

In [21]:
qa = ConversationalRetrievalChain.from_llm(OpenAI(temperature=0), db.as_retriever())

In [22]:
chat_history = []

def on_submit(_):
    query = input_box.value
    input_box.value = ""
    
    if query.lower() == 'exit':
        print('Thanks!!')
        return 
    
    result = qa({"question":query, "chat_history":chat_history})
    chat_history.append((query, result['answer']))
    
    display(widgets.HTML(f'<b>User: <b> {query}'))
    display(widgets.HTML(f"<b><font color='blue'> Chatbot: </font></b> {result['answer']}"))
    
print("Welcome!!")

input_box = widgets.Text(placeholder = 'Please enter your question: ')
input_box.on_submit(on_submit)

display(input_box)

Welcome!!


  input_box.on_submit(on_submit)


Text(value='', placeholder='Please enter your question: ')

In [6]:
# Result is many LangChain 'Documents' around 500 tokens or less (Recursive splitter sometimes allows more tokens to retain context)
type(chunks[0]) 

langchain.schema.Document