## Documents Q&A

In [1]:
import os
from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv(), override=True)

True

#### Load your documents

In [2]:
def load_document(file):
    # Using URL for file param can load data from web.
    name, extension = os.path.splitext(file)

    if extension == '.pdf':
        from langchain.document_loaders import PyPDFLoader
        print(f'Loading {file}')
        loader = PyPDFLoader(file)
    elif extension == '.docx':
        from langchain.document_loaders import Docx2txtLoader
        print(f'Loading {file}')
        loader = Docx2txtLoader(file)
    else:
        print('File type not supported!')

    data = loader.load()
    return data

def load_from_wiki(query, lang='en', load_max_docs=1):
    from langchain.document_loaders import WikipediaLoader
    loader = WikipediaLoader(query=query, lang=lang, load_max_docs=load_max_docs)
    data = loader.load
    return data

#### Create text chunks

In [3]:
def chunk_data(data, chunk_size):
    from langchain.text_splitter import RecursiveCharacterTextSplitter
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=0)
    chunks = text_splitter.split_documents(data)
    return chunks

#### Embedding and Uploading to Pinecone

In [4]:
def insert_or_fetch_embeddings(index_name, chunks=None):
    import pinecone
    from pinecone import ServerlessSpec
    from langchain.vectorstores import Pinecone
    from langchain.embeddings.openai import OpenAIEmbeddings

    embeddings = OpenAIEmbeddings()
    pc = pinecone.Pinecone(api_key = os.environ.get('PINECONE_API_KEY'))

    indexes = pc.list_indexes()
    index_exist = False
    for index in indexes:
        if index['name'] == index_name:
            index_exist = True
            vector_store = Pinecone.from_existing_index(index_name, embeddings)
    
    if not index_exist:
        pc.create_index(index_name, dimension=1536, metric='cosine', spec=ServerlessSpec(cloud="aws", region="us-west-2"))
        vector_store = Pinecone.from_documents(chunks, embeddings, index_name=index_name)
    
    return vector_store

In [5]:
def delete_pinecone_index(index_name='all'):
    import pinecone
    pc = pinecone.Pinecone(api_key = os.environ.get('PINECONE_API_KEY'))
    
    if index_name == 'all':
        indexes = pc.list_indexes()
        for index in indexes:
            pc.delete_index(index['name'])
    else:
        pc.delete_index(index_name)

#### Asking and Answering

In [6]:
def get_answer(vector_store, question):
    from langchain.chains import RetrievalQA
    from langchain.chat_models import ChatOpenAI

    llm = ChatOpenAI(model_name='gpt-3.5-turbo', temperature=1)

    retriever = vector_store.as_retriever(search_type='similarity', search_kwargs={'k':3})
    chain = RetrievalQA.from_chain_type(llm=llm, chain_type='stuff', retriever=retriever)

    answer = chain.run(question)

    return answer

## Running Q&A

In [7]:
data = load_document('https://arxiv.org/pdf/1706.03762.pdf')

Loading https://arxiv.org/pdf/1706.03762.pdf


In [8]:
# Uncomment to check the content info
# print(f'There is a total of {len(data)} pages.')
# print(data[0].page_content)
# print(data[0].metadata)

In [9]:
chunks = chunk_data(data, 256)

In [10]:
#chunks[0].page_content

In [11]:
# Delete all the indexes
# delete_pinecone_index()

  from tqdm.autonotebook import tqdm


In [12]:
index_name = 'attention'
vector_store = insert_or_fetch_embeddings(index_name=index_name, chunks=chunks)

  warn_deprecated(


In [13]:
question = 'How is the Transformer algorithm impelmented?'
answer = get_answer(vector_store, question)
print(answer)

  warn_deprecated(
  warn_deprecated(


The Transformer algorithm is implemented using deep learning frameworks such as TensorFlow or PyTorch. It involves creating a neural network architecture consisting of an encoder and a decoder. The encoder processes the input sequence, while the decoder generates the output sequence. The core components of the Transformer are multi-head self-attention and position-wise fully connected feed-forward networks. These components are stacked together to form multiple layers of the Transformer model. Training the Transformer involves optimizing the model's parameters using techniques like backpropagation and gradient descent.


In [17]:
import time 
i = 1
print('Type Exit to quit chat.')
while True:
    q = input(f'Question {i}: ')
    i += 1
    
    if q.lower() == 'exit':
        print('Quitting...')
        time.sleep(2)
        break

    answer = get_answer(vector_store, q)
    print(f'\nAnswer: {answer}')
    print(f'\n{"-"* 50}\n')

Type Exit to quit chat.

Answer: The Transformer is different from previous models because it relies entirely on self-attention, instead of recurrence, to compute representations of its input and output. It uses stacked self-attention and point-wise, fully connected layers for both the encoder and decoder. This allows the model to draw global dependencies between input and output, resulting in improved performance in tasks such as machine translation.

--------------------------------------------------


Answer: There are several ways in which the Transformer model can be improved:

1. Model Architecture: Different variations of the Transformer architecture can be explored to improve performance. This can include modifications to the attention mechanism, layer normalization, or the use of additional sublayers.

2. Pre-training: Pre-training the Transformer on large amounts of unlabeled data can help improve its performance. This can be done using methods like unsupervised or semi-super