## Documents Q&A

In [2]:
import os
from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv(), override=True)

True

#### Load your documents

In [3]:
def load_document(file):
    # Using URL for file param can load data from web.
    name, extension = os.path.splitext(file)

    if extension == '.pdf':
        from langchain.document_loaders import PyPDFLoader
        print(f'Loading {file}')
        loader = PyPDFLoader(file)
    elif extension == '.docx':
        from langchain.document_loaders import Docx2txtLoader
        print(f'Loading {file}')
        loader = Docx2txtLoader(file)
    else:
        print('File type not supported!')

    data = loader.load()
    return data

def load_from_wiki(query, lang='en', load_max_docs=1):
    from langchain.document_loaders import WikipediaLoader
    loader = WikipediaLoader(query=query, lang=lang, load_max_docs=load_max_docs)
    data = loader.load
    return data

#### Create text chunks

In [4]:
def chunk_data(data, chunk_size):
    from langchain.text_splitter import RecursiveCharacterTextSplitter
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=0)
    chunks = text_splitter.split_documents(data)
    return chunks

#### Embedding and Uploading to Pinecone

In [5]:
def insert_or_fetch_embeddings(index_name, chunks=None):
    import pinecone
    from pinecone import ServerlessSpec
    from langchain.vectorstores import Pinecone
    from langchain.embeddings.openai import OpenAIEmbeddings

    embeddings = OpenAIEmbeddings()
    pc = pinecone.Pinecone(api_key = os.environ.get('PINECONE_API_KEY'))

    indexes = pc.list_indexes()
    index_exist = False
    for index in indexes:
        if index['name'] == index_name:
            index_exist = True
            vector_store = Pinecone.from_existing_index(index_name, embeddings)
    
    if not index_exist:
        pc.create_index(index_name, dimension=1536, metric='cosine', spec=ServerlessSpec(cloud="aws", region="us-west-2"))
        vector_store = Pinecone.from_documents(chunks, embeddings, index_name=index_name)
    
    return vector_store

In [6]:
def delete_pinecone_index(index_name='all'):
    import pinecone
    pc = pinecone.Pinecone(api_key = os.environ.get('PINECONE_API_KEY'))
    
    if index_name == 'all':
        indexes = pc.list_indexes()
        for index in indexes:
            pc.delete_index(index['name'])
    else:
        pc.delete_index(index_name)

#### Asking and Answering

In [7]:
def get_answer(vector_store, question):
    from langchain.chains import RetrievalQA
    from langchain.chat_models import ChatOpenAI

    llm = ChatOpenAI(model_name='gpt-3.5-turbo', temperature=1)

    retriever = vector_store.as_retriever(search_type='similarity', search_kwargs={'k':3})
    chain = RetrievalQA.from_chain_type(llm=llm, chain_type='stuff', retriever=retriever)

    answer = chain.run(question)

    return answer

In [18]:
def ask_with_memory(vector_store, q, chat_history=[]):
    from langchain.chains import ConversationalRetrievalChain
    from langchain.chat_models import ChatOpenAI

    llm = ChatOpenAI(temperature=1)
    retriever = vector_store.as_retriever(search_type='similarity', search_kwargs={'k':3})

    crc = ConversationalRetrievalChain.from_llm(llm, retriever)
    result = crc({'question':q, 'chat_history':chat_history})
    chat_history.append((q, result['answer']))

    return result, chat_history

## Running Q&A

In [8]:
data = load_document('https://arxiv.org/pdf/1706.03762.pdf')

Loading https://arxiv.org/pdf/1706.03762.pdf


In [9]:
# Uncomment to check the content info
# print(f'There is a total of {len(data)} pages.')
# print(data[0].page_content)
# print(data[0].metadata)

In [10]:
chunks = chunk_data(data, 256)

In [11]:
#chunks[0].page_content

In [12]:
# Delete all the indexes
# delete_pinecone_index()

In [13]:
index_name = 'attention'
vector_store = insert_or_fetch_embeddings(index_name=index_name, chunks=chunks)

  from tqdm.autonotebook import tqdm
  warn_deprecated(


In [14]:
question = 'How is the Transformer algorithm impelmented?'
answer = get_answer(vector_store, question)
print(answer)

  warn_deprecated(
  warn_deprecated(


The Transformer algorithm is implemented using stacked self-attention and point-wise, fully connected layers for both the encoder and decoder. The architecture of the Transformer model is shown in Figure 1, with the encoder on the left and the decoder on the right. The model relies entirely on self-attention to compute representations of its input and output without using sequence-based recurrence or convolution. Additionally, the model allows for parallelization and can achieve state-of-the-art translation quality after being trained for only twelve hours on eight P100 GPUs.


In [17]:
import time 
i = 1
print('Type Exit to quit chat.')
while True:
    q = input(f'Please enter you question: ')
    
    
    if q.lower() == 'exit':
        print('Quitting...\nByebye!')
        time.sleep(2)
        break
    
    print(f'Question {i}: {q}\n')
    answer = get_answer(vector_store, q)
    print(f'\nAnswer: {answer}')
    print(f'\n{"-"* 50}\n')

    i += 1

Type Exit to quit chat.
Question 1: exir


Answer: I'm sorry, but I'm not sure what you mean by "exir." Can you please provide more context or clarify your question?

--------------------------------------------------

Quitting...
Byebye!


#### Q&A with history

In [19]:
chat_history = []
question = 'What makes a transformer different from other neural network?'
result, chat_history = ask_with_memory(vector_store=vector_store, q=question, chat_history=chat_history)
print(result['answer'])
print(chat_history)

  warn_deprecated(


A Transformer is different from other neural networks in that it does not rely on recurrence (such as in recurrent neural networks) or convolution (such as in convolutional neural networks) for capturing dependencies between input and output. Instead, it uses an attention mechanism that allows it to draw global dependencies between input and output. The use of self-attention enables the Transformer to compute representations of its input and output without using sequential information, making it a powerful model architecture for tasks such as machine translation.
[('What makes a transformer different from other neural network?', 'A Transformer is different from other neural networks in that it does not rely on recurrence (such as in recurrent neural networks) or convolution (such as in convolutional neural networks) for capturing dependencies between input and output. Instead, it uses an attention mechanism that allows it to draw global dependencies between input and output. The use of