In [1]:
import numpy as np
import torch
from transformers import pipeline
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
from langchain_community.llms import HuggingFacePipeline
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS

  from .autonotebook import tqdm as notebook_tqdm


In [9]:
#Split documents in smaller chunks
#Chunks of roughly 700 characters with an overlap of 50 characters
loader = PyPDFLoader("C:\\Users\\kamalam.s\\Desktop\\kamalam's\\nlp dev\\resources\\NIPS-2017-attention-is-all-you-need-Paper.pdf")

docs_before_split = loader.load()
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 700,
    chunk_overlap  = 50,
)
docs_after_split = text_splitter.split_documents(docs_before_split)

docs_after_split[0]

Document(page_content='Attention Is All You Need\nAshish Vaswani∗\nGoogle Brain\navaswani@google.comNoam Shazeer∗\nGoogle Brain\nnoam@google.comNiki Parmar∗\nGoogle Research\nnikip@google.comJakob Uszkoreit∗\nGoogle Research\nusz@google.com\nLlion Jones∗\nGoogle Research\nllion@google.comAidan N. Gomez∗†\nUniversity of Toronto\naidan@cs.toronto.eduŁukasz Kaiser∗\nGoogle Brain\nlukaszkaiser@google.com\nIllia Polosukhin∗‡\nillia.polosukhin@gmail.com\nAbstract\nThe dominant sequence transduction models are based on complex recurrent or\nconvolutional neural networks that include an encoder and a decoder. The best\nperforming models also connect the encoder and decoder through an attention', metadata={'source': "C:\\Users\\kamalam.s\\Desktop\\kamalam's\\nlp dev\\resources\\NIPS-2017-attention-is-all-you-need-Paper.pdf", 'page': 0})

In [10]:
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
from langchain_community.llms import HuggingFacePipeline

huggingface_embeddings = HuggingFaceBgeEmbeddings(
    model_name="sentence-transformers/all-MiniLM-l6-v2",  # alternatively use "sentence-transformers/all-MiniLM-l6-v2" for a light and faster experience.
    model_kwargs={'device':'cpu'}, 
    encode_kwargs={'normalize_embeddings': True}
)

In [11]:
from langchain_community.vectorstores import FAISS

vectordb = FAISS.from_documents(docs_after_split, huggingface_embeddings)

In [12]:
import re
def retrieve_information(query, knowledge_base):
    relevant_info = knowledge_base.similarity_search(query,k=3)
    
    context = []

    for i in range(len((query, knowledge_base.similarity_search(query, search_kwargs={"k": 3}))[1])):
        d = (query, knowledge_base.similarity_search(query, search_kwargs={"k": 3}))[1][i].dict()
        context.append(d['page_content'])
    
    parsed_context = []
    
    for ele in context:
        temp = re.sub("\n", '', ele)
        temp = re.sub(r'www.\S+', '', temp)
        parsed_context.append(temp)
    
    final_context = ""
    for ele in parsed_context:
        final_context += ele
    
    return final_context

In [14]:
retrieve_information("attention", vectordb)

'sentence representations used by state-of-the-art models in machine translations, such as word-piece[31] and byte-pair [ 25] representations. To improve computational performance for tasks involvingvery long sequences, self-attention could be restricted to considering only a neighborhood of size rin6arXiv:1703.10722 , 2017.[19] Zhouhan Lin, Minwei Feng, Cicero Nogueira dos Santos, Mo Yu, Bing Xiang, BowenZhou, and Yoshua Bengio. A structured self-attentive sentence embedding. arXiv preprintarXiv:1703.03130 , 2017.[20] Samy Bengio Łukasz Kaiser. Can active memory replace attention? In Advances in NeuralInformation Processing Systems, (NIPS) , 2016.10textual entailment and learning task-independent sentence representations [4, 22, 23, 19].End-to-end memory networks are based on a recurrent attention mechanism instead of sequence-aligned recurrence and have been shown to perform well on simple-language question answering andlanguage modeling tasks [28].To the best of our knowledge, howev

In [22]:
def generate_response(query, knowledge_base):
    # Retrieve relevant information based on the query
    relevant_info = retrieve_information(query, knowledge_base)
    # Model for text-generation, employing Q&A through text-generation, since debugging Q&A using llms and context info. (a work around basically)
    text2text_generator = pipeline(
        "text2text-generation",
        model="google-t5/t5-base",
    )
    return text2text_generator("As a research information assistant, based on the paper provided, answer the question: "+query+" context: "+relevant_info+ "If you are not sure about the answer, say 'Unknown' and nothing else.")

In [23]:
generate_response("Explain positional encoding", vectordb)

[{'generated_text': 'each position in the encoder can attend to all positions in the input sequence'}]

In [24]:
generate_response("what is multi-headed attention?", vectordb)

[{'generated_text': 'jointly attend to information from different representationsubspaces at different positions'}]

In [25]:
generate_response("What is it about?", vectordb)

[{'generated_text': 'Transformer'}]

In [26]:
generate_response("What is RoBERTa?", vectordb)

[{'generated_text': 'Unknown'}]