In [1]:
import os
import os
import json
from llama_index.core import SimpleDirectoryReader
import ollama
from groq import Groq
from langchain import PromptTemplate
# from langchain.chat_models import ChatOpenAI
from langchain.chains.summarize import load_summarize_chain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from PyPDF2 import PdfReader
from langchain.docstore.document import Document
from langchain_community.vectorstores import FAISS
from pathlib import Path
import json
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
import pickle
import time

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
!pip install faiss-cpu
import faiss



In [3]:
#Importiong api keys
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
GEMINI_API_KEY = os.getenv("GEMINI_API")

In [4]:
Parent_directory = Path('Papers')
File_paths = Path.glob(Parent_directory, '*.pdf')

In [5]:
File_paths

<generator object Path.glob at 0x7d2a67369ad0>

In [6]:
File_paths = [str(i) for i in File_paths]

In [7]:
File_paths

['Papers/Attention_is_all_you_need.pdf',
 'Papers/DCGAN-notes.pdf',
 'Papers/DeepSORT.pdf',
 'Papers/Fast-RCNN.pdf',
 'Papers/GANs_Paper.pdf',
 'Papers/Image_Augmentation_IllusionCraft.pdf',
 'Papers/Mamba.pdf',
 'Papers/Mask RCNN.pdf',
 'Papers/Mismatching_images___Keeping_a_check_on_the_generator (1).pdf',
 'Papers/SORT.pdf',
 'Papers/StackGAN.pdf',
 'Papers/StackGAN_original_paper.pdf',
 'Papers/The Power of Linear Recurrent Neural Networks.pdf',
 'Papers/Variational Auto encoders.pdf',
 'Papers/Word2Vec Paper.pdf']

#### Using LangSimth for monitoring

In [8]:
# Using langsmith to monitor the progress of the summarization
#! If you want to use langsmith, please set the environment variable LANGCHAIN_API to your langsmith api key
if os.environ.get("LANGCHAIN_API")!="":
    os.environ["LANGCHAIN_TRACING_V2"] = "true"
    os.environ["LANGCHAIN_API_KEY"]=os.environ.get("LANGCHAIN_API")
    os.environ["LANGCHAIN_ENDPOINT"]="https://api.smith.langchain.com"
    os.environ["LANGCHAIN_PROJECT"]="LearnLang"

### Summarization

In [9]:
# Functionizing till split_chunks
def input_llm_pdf(File_path): #!Pass the file path of the pdf, this function will return the split_chunks
    # Reading the pdf file
    pdfreader = PdfReader(File_path)
    text = ''
    for i, page in enumerate(pdfreader.pages):
        content = page.extract_text()
        if content:
            text += content
    # Converting the text of the pdf of Document object
    docs = [Document(page_content=text)]
    docs
    ## Splittting the text
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=300000, chunk_overlap=200) # 300k characters per chunk, or nearly 75,000 tokens
    chunks = text_splitter.create_documents([text])
    split_chunks = []
    for x in range(0,len(chunks),12):
        split_chunks.append(chunks[x:x+12])
    return split_chunks

#### Prompts for summarizing

In [10]:
chunks_prompt="""
Sumarise the bellow document, it will be used to later match the document with a user query, so capture the most important informations.
Document:`{text}'.Also in a line return the keywords in the document, these keywords must include the vast majority of the important points in the document.Use the maximum number of characters to express the summary under 20,000 characters.
Summary:
"""
map_prompt_template=PromptTemplate(input_variables=['text'],
                                    template=chunks_prompt)

In [11]:
final_combine_prompt='''
Provide a final summary of the entire document with these important points, this will be used to match the document with a user query, so capture the most important informations.Also in a line return the keywords in the document, these keywords must include the vast majority of the important points in the document.Use the maximum number of characters to express the summary under 20,000 characters.
Document: `{text}`
'''
final_combine_prompt_template=PromptTemplate(input_variables=['text'],
                                             template=final_combine_prompt)

In [12]:
from langchain_google_genai import ChatGoogleGenerativeAI
GEMINI_API_KEY=os.environ.get("GEMINI_API")
llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash",api_key=GEMINI_API_KEY)

In [13]:
summary_chain = load_summarize_chain(
    llm=llm,
    chain_type='map_reduce',
    map_prompt=map_prompt_template,
    combine_prompt=final_combine_prompt_template,
    verbose=True
)


In [14]:
## Splittting the text
text_splitter = RecursiveCharacterTextSplitter(chunk_size=300000, chunk_overlap=200) # 300k characters per chunk, or nearly 75,000 tokens

In [15]:
start_time = time.time()
endtime = None
output = ''
requests_in_this_minute = 0
summarized_documents = []
start_time = time.time()
index = 0
while index < len(File_paths):
    try:
        
        file_path = str(File_paths[index])
        print(f"\n\n\n\n\n\nProcessing file: {file_path}")
        print(f"Index {index} of {len(File_paths)} completed\n\n\n\n\n\n")  
        split_chunks = input_llm_pdf(file_path)
        for x in range(0,len(split_chunks)):
            chunks = split_chunks[x]
            requests_in_this_minute += len(chunks)
            if (requests_in_this_minute>14) :
                time.sleep(60 - (time.time() - start_time))
                start_time = time.time()
                requests_in_this_minute = len(chunks)
            output += summary_chain.run(chunks)
            endtime = time.time()
            if (endtime-start_time)>60:
                requests_in_this_minute = 0
                start_time = time.time()
        output_doc = Document(page_content=output)
        chunks = text_splitter.create_documents([output])
        requests_in_this_minute += len(chunks)
        if (requests_in_this_minute>14) :
            time.sleep(60 - (time.time() - start_time))
            start_time = time.time()
            requests_in_this_minute = len(chunks)

        final_summary = summary_chain.run(chunks)
        final_summary = f"File_path: {File_paths[index]} \n"+final_summary
        index+=1
        summarized_documents.append([Document(page_content=final_summary)])
    except Exception as e:
        print(f"Exception: {e}")
        print("Resource exhausted, waiting for 60 seconds...")
        index -= 1
        time.sleep(60)








Processing file: Papers/Attention_is_all_you_need.pdf
Index 0 of 15 completed








  output += summary_chain.run(chunks)




[1m> Entering new MapReduceDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
Sumarise the bellow document, it will be used to later match the document with a user query, so capture the most important informations.
Document:`Provided proper attribution is provided, Google hereby grants permission to
reproduce the tables and figures in this paper solely for use in journalistic or
scholarly works.
Attention Is All You Need
Ashish Vaswani‚àó
Google Brain
avaswani@google.comNoam Shazeer‚àó
Google Brain
noam@google.comNiki Parmar‚àó
Google Research
nikip@google.comJakob Uszkoreit‚àó
Google Research
usz@google.com
Llion Jones‚àó
Google Research
llion@google.comAidan N. Gomez‚àó ‚Ä†
University of Toronto
aidan@cs.toronto.edu≈Åukasz Kaiser‚àó
Google Brain
lukaszkaiser@google.com
Illia Polosukhin‚àó ‚Ä°
illia.polosukhin@gmail.com
Abstract
The dominant sequence transduction models are based on complex recurrent or
convolutional neural

Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 Resource has been exhausted (e.g. check quota)..



[1m> Finished chain.[0m

[1m> Finished chain.[0m


[1m> Entering new MapReduceDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
Sumarise the bellow document, it will be used to later match the document with a user query, so capture the most important informations.
Document:`This paper introduces the Transformer, a revolutionary neural network architecture that surpasses traditional recurrent and convolutional networks for sequence transduction tasks. Unlike its predecessors, the Transformer relies solely on attention mechanisms to capture global dependencies between input and output sequences. This innovative approach enables significant advantages in terms of parallelization, training speed, and translation quality, leading to state-of-the-art results on WMT 2014 English-to-German and English-to-French translation tasks.  Furthermore, the paper showcases the Transformer's versatility by applying it to English constituen

Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 Resource has been exhausted (e.g. check quota)..


Exception: 429 Resource has been exhausted (e.g. check quota).
Resource exhausted, waiting for 60 seconds...






Processing file: Papers/GANs_Paper.pdf
Index 4 of 15 completed








[1m> Entering new MapReduceDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
Sumarise the bellow document, it will be used to later match the document with a user query, so capture the most important informations.
Document:`Generative Adversarial Nets
Ian J. Goodfellow, Jean Pouget-Abadie, Mehdi Mirza, Bing Xu, David Warde-Farley,
Sherjil Ozairy, Aaron Courville, Yoshua Bengioz
D¬¥epartement d‚Äôinformatique et de recherche op ¬¥erationnelle
Universit ¬¥e de Montr ¬¥eal
Montr ¬¥eal, QC H3C 3J7
Abstract
We propose a new framework for estimating generative models via an adversar-
ial process, in which we simultaneously train two models: a generative model G
that captures the data distribution, and a discriminative model Dthat estimates
the prob

Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 Resource has been exhausted (e.g. check quota)..


Exception: 429 Resource has been exhausted (e.g. check quota).
Resource exhausted, waiting for 60 seconds...






Processing file: Papers/SORT.pdf
Index 9 of 15 completed








[1m> Entering new MapReduceDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
Sumarise the bellow document, it will be used to later match the document with a user query, so capture the most important informations.
Document:`arXiv:1602.00763v2  [cs.CV]  7 Jul 2017SIMPLE ONLINE AND REALTIME TRACKING
Alex Bewley‚Ä†, Zongyuan Ge‚Ä†, Lionel Ott‚ãÑ, Fabio Ramos‚ãÑ, Ben Upcroft‚Ä†
Queensland University of Technology‚Ä†, University of Sydney‚ãÑ
ABSTRACT
This paper explores a pragmatic approach to multiple ob-
ject tracking where the main focus is to associate objects ef -
Ô¨Åciently for online and realtime applications. To this end, de-
tection quality is identiÔ¨Åed as a key factor inÔ¨Çuencing trac k-
ing performance, where changing the detector can impro

In [16]:
summarized_documents

[[Document(metadata={}, page_content="File_path: Papers/Attention_is_all_you_need.pdf \n## Summary:\n\nThe paper introduces the Transformer, a novel neural network architecture that surpasses traditional recurrent and convolutional networks in sequence transduction tasks. Unlike previous models, the Transformer relies solely on attention mechanisms to grasp global dependencies between input and output sequences. This groundbreaking approach offers significant benefits in terms of parallelization, training speed, and translation quality, leading to exceptional results on WMT 2014 English-to-German and English-to-French translation tasks. The paper further demonstrates the Transformer's versatility by applying it to English constituency parsing, achieving competitive results even with limited training data.\n\n## Keywords:\n\nTransformer, neural network architecture, sequence transduction, attention mechanism, machine translation, parallelization, training speed, translation quality, Eng

In [17]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings
embeddings = GoogleGenerativeAIEmbeddings(model="models/text-embedding-004",google_api_key=GEMINI_API_KEY)

In [18]:
len(summarized_documents),len(File_paths)

(17, 15)

In [19]:
path_vector_db_folder = Path("Summaries")
path_vector_db = Path.joinpath(path_vector_db_folder, "faiss_index.index")
path_docstore = Path.joinpath(path_vector_db_folder, "docstore.pkl")
path_index_to_docstore_id = Path.joinpath(path_vector_db_folder, "index_to_docstore_id.pkl")
if not path_vector_db_folder.exists():
    path_vector_db_folder.mkdir()
index = 0
while index < len(summarized_documents):
    chunks = summarized_documents[index]
    print(f"Processig index: {index} of {len(summarized_documents)}")
    start_time = time.time()
    try:
        vector_db = FAISS.from_documents(chunks, embeddings)
        try:
            with open(str(path_docstore), 'ab') as f:
                pickle.dump(vector_db.docstore, f)
            with open(str(path_index_to_docstore_id), 'ab') as f:
                pickle.dump(vector_db.index_to_docstore_id, f)
        except:
            with open(str(path_docstore), 'wb') as f:
                pickle.dump(vector_db.docstore, f)
            with open(str(path_index_to_docstore_id), 'wb') as f:
                pickle.dump(vector_db.index_to_docstore_id, f)
        try:
            index_old = faiss.read_index(str(path_vector_db))
            index_old.add(vector_db.index)
            faiss.write_index(index_old, str(path_vector_db))
        except:
            faiss.write_index(vector_db.index, str(path_vector_db))
        index += 1
    except Exception as e:
        print(f"Exception: {e}")
        print("Resource exhausted, waiting for 60 seconds...")
        index -= 1
        time.sleep(60)

Processig index: 0 of 17
Processig index: 1 of 17
Processig index: 2 of 17
Processig index: 3 of 17
Processig index: 4 of 17
Processig index: 5 of 17
Processig index: 6 of 17
Processig index: 7 of 17
Processig index: 8 of 17
Processig index: 9 of 17
Processig index: 10 of 17
Processig index: 11 of 17
Processig index: 12 of 17
Processig index: 13 of 17
Processig index: 14 of 17
Processig index: 15 of 17
Processig index: 16 of 17


In [20]:
loaded_faiss_index = faiss.read_index(str(path_vector_db))
# Load the docstore and index_to_docstore_id
with open(str(path_docstore), 'rb') as f:
    loaded_docstore = pickle.load(f)

with open(str(path_index_to_docstore_id), 'rb') as f:
    loaded_index_to_docstore_id = pickle.load(f)


vector_db_loaded = FAISS(
    embeddings.embed_query, 
    loaded_faiss_index, 
    loaded_docstore, 
    loaded_index_to_docstore_id
)

`embedding_function` is expected to be an Embeddings object, support for passing in a function will soon be removed.


In [21]:
retriever = vector_db_loaded.as_retriever()

In [22]:
chunks_prompt="""
Using the Context bellow answer the question {question}, mention the path of the MOST RELAVENT documents.

NOTE: The contexts are summaries of maybe very large documents, so scrutinize it well and at the end of each summary keywords are also mentioned, use these also to answer the question.RETURN ONLY PATH TO THE MOST RELEVANT DOCUMENT
Context:
{text}

IMPORTANT: The answer should be in the following format:
RETURN ONLY JSON DATA NOTHING ELSE
```
    {{
    "files": [
        {{
        "file_path": "path to the file "
        }}
    ]
    }}
    ```
"""
RAG_prompt_template=PromptTemplate(input_variables=['text','question'],
                                    template=chunks_prompt)

In [23]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [24]:
from langchain_google_genai import ChatGoogleGenerativeAI
GEMINI_API_KEY=os.environ.get("GEMINI_API")
llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash",api_key=GEMINI_API_KEY)

In [25]:
question = "Explain me what is the attention mechanism in 100 words"
context = retriever.invoke(question)
rag_chain = (
    {"text": retriever | format_docs, "question": RunnablePassthrough()}
    | RAG_prompt_template
    | llm
    | StrOutputParser()
)


In [26]:
response = rag_chain.invoke(question)

In [27]:
response

'```json\n    {\n    "files": [\n        {\n        "file_path": "Papers/Attention_is_all_you_need.pdf"\n        }\n    ]\n    }\n```'

In [28]:
formated_response = response.strip('`').split('json')[1]

In [29]:
json_response = json.loads(formated_response)

In [30]:
json_response

{'files': [{'file_path': 'Papers/Attention_is_all_you_need.pdf'}]}

In [31]:
File_paths_relavent = [x['file_path'] for x in json_response['files']]
File_paths_relavent

['Papers/Attention_is_all_you_need.pdf']

In [32]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings
embeddings = GoogleGenerativeAIEmbeddings(model="models/text-embedding-004",google_api_key=GEMINI_API_KEY)

In [33]:
# Functionizing till split_chunks for vectorization
def input_llm_embeddings_pdf(File_path): #!Pass the file path of the pdf, this function will return the split_chunks
    # Reading the pdf file
    pdfreader = PdfReader(File_path)
    text = ''
    for i, page in enumerate(pdfreader.pages):
        content = page.extract_text()
        if content:
            text += content
    # Converting the text of the pdf of Document object
    docs = [Document(page_content=text)]
    docs
    ## Splittting the text
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=5000, chunk_overlap=200) # 300k characters per chunk, or nearly 75,000 tokens
    chunks = text_splitter.create_documents([text])
    split_chunks = []
    for x in range(0,len(chunks),1400):
        split_chunks.append(chunks[x:x+1400])
    return split_chunks

In [34]:
!mkdir -p VectorDatabases

I0000 00:00:1728307297.135230   35566 fork_posix.cc:77] Other threads are currently calling into gRPC, skipping fork() handlers


In [35]:
File_paths_relavent

['Papers/Attention_is_all_you_need.pdf']

In [36]:
index = 0
path_vector_database = Path('VectorDatabases')
while index < len(File_paths_relavent):
    file_path = File_paths_relavent[index]
    print(f"Processing file: {file_path}")
    print(f"Index {index+1} of {len(File_paths_relavent)} completed")

    path_vector_db_folder = Path.joinpath(path_vector_database,file_path)
    path_vector_db = Path.joinpath(path_vector_db_folder, "faiss_index.index")
    path_docstore = Path.joinpath(path_vector_db_folder, "docstore.pkl")
    path_index_to_docstore_id = Path.joinpath(path_vector_db_folder, "index_to_docstore_id.pkl")
    if not path_vector_db_folder.exists():
        # path_vector_db_folder.mkdir(parents)
        path_vector_db_folder.mkdir(parents=True, exist_ok=True)
        split_chunks = input_llm_embeddings_pdf(file_path)
        try:
            for chunks in split_chunks:
                start_time = time.time()
                vector_db = FAISS.from_documents(chunks, embeddings)
                try:
                    with open(str(path_docstore), 'ab') as f:
                        pickle.dump(vector_db.docstore, f)
                    with open(str(path_index_to_docstore_id), 'ab') as f:
                        pickle.dump(vector_db.index_to_docstore_id, f)
                except:
                    with open(str(path_docstore), 'wb') as f:
                        pickle.dump(vector_db.docstore, f)
                    with open(str(path_index_to_docstore_id), 'wb') as f:
                        pickle.dump(vector_db.index_to_docstore_id, f)
                try:
                    index_old = faiss.read_index(str(path_vector_db))
                    index_old.add(vector_db.index)
                    faiss.write_index(index_old, str(path_vector_db))
                except:
                    faiss.write_index(vector_db.index, str(path_vector_db))
                finally:
                    endtime = time.time()
                    if (endtime - start_time) < 60:
                        time.sleep(60 - (endtime - start_time))
        except Exception as e:
            print(f"Exception: {e}")
            print("Resource exhausted, waiting for 60 seconds...")
            index -= 1
            time.sleep(60)
    else:
        pass
    index += 1
print(index)


Processing file: Papers/Attention_is_all_you_need.pdf
Index 1 of 1 completed
1


In [37]:
question

'Explain me what is the attention mechanism in 100 words'

In [38]:
chunks_prompt="""
Using the Context bellow answer the question {question}, also mention the page number, name of the doc, etc relavent details at /the end of the response.

Context:
{text}
"""
RAG_prompt_template=PromptTemplate(input_variables=['text','question'],
                                    template=chunks_prompt)
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)
context = retriever.invoke(question)
rag_chain = (
    {"text": retriever | format_docs, "question": RunnablePassthrough()}
    | RAG_prompt_template
    | llm
    | StrOutputParser()
)

from langchain_google_genai import ChatGoogleGenerativeAI
GEMINI_API_KEY=os.environ.get("GEMINI_API")
llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash",api_key=GEMINI_API_KEY)
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_API_KEY"]=os.environ.get("LANGCHAIN_API")
os.environ["LANGCHAIN_ENDPOINT"]="https://api.smith.langchain.com"
os.environ["LANGCHAIN_PROJECT"]="LearnLang"

In [39]:
context = list()
index = 0
path_vector_database = Path('VectorDatabases')
while index < len(File_paths_relavent):
    file_path = File_paths_relavent[index]
    print(f"Processing file: {file_path}")
    print(f"Index {index+1} of {len(File_paths_relavent)} completed")

    path_vector_db_folder = Path.joinpath(path_vector_database,file_path)
    path_vector_db = Path.joinpath(path_vector_db_folder, "faiss_index.index")
    path_docstore = Path.joinpath(path_vector_db_folder, "docstore.pkl")
    loaded_faiss_index = faiss.read_index(str(path_vector_db))
    # Load the docstore and index_to_docstore_id
    with open(str(path_docstore), 'rb') as f:
        loaded_docstore = pickle.load(f)

    with open(str(path_index_to_docstore_id), 'rb') as f:
        loaded_index_to_docstore_id = pickle.load(f)


    vector_db_loaded = FAISS(
        embeddings.embed_query, 
        loaded_faiss_index, 
        loaded_docstore, 
        loaded_index_to_docstore_id
    )
    retriever = vector_db_loaded.as_retriever()
    context.extend(retriever.invoke(question))
    index += 1

`embedding_function` is expected to be an Embeddings object, support for passing in a function will soon be removed.


Processing file: Papers/Attention_is_all_you_need.pdf
Index 1 of 1 completed


In [41]:
rag_chain.invoke(question)

'The attention mechanism, described in the paper "Attention Is All You Need" (page 3), allows a neural network to focus on specific parts of an input sequence when processing it. This mechanism assigns weights to different elements of the input, indicating their importance for the current task. This selective focus allows the model to efficiently capture long-range dependencies and understand complex relationships within the data. \n'

In [49]:
file_path = File_paths_relavent[0]
def return_context(question):
    index = 0
    while index < len(File_paths_relavent):
        file_path = File_paths_relavent[index]
        print(f"Processing file: {file_path}")
        print(f"Index {index+1} of {len(File_paths_relavent)} completed")

        path_vector_db_folder = Path.joinpath(path_vector_database,file_path)
        path_vector_db = Path.joinpath(path_vector_db_folder, "faiss_index.index")
        path_docstore = Path.joinpath(path_vector_db_folder, "docstore.pkl")
        loaded_faiss_index = faiss.read_index(str(path_vector_db))
        # Load the docstore and index_to_docstore_id
        with open(str(path_docstore), 'rb') as f:
            loaded_docstore = pickle.load(f)

        with open(str(path_index_to_docstore_id), 'rb') as f:
            loaded_index_to_docstore_id = pickle.load(f)


        vector_db_loaded = FAISS(
            embeddings.embed_query, 
            loaded_faiss_index, 
            loaded_docstore, 
            loaded_index_to_docstore_id
        )
        retriever = vector_db_loaded.as_retriever()
        context.extend(retriever.invoke(question))
        index +=1 
    return context

In [50]:
chunks_prompt="""
Using the Context bellow answer the question {question}, also mention the page number, name of the doc, etc relavent details at /the end of the response.

Context:
{text}
"""
RAG_prompt_template=PromptTemplate(input_variables=['text','question'],
                                    template=chunks_prompt)
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)
context = retriever.invoke(question)
rag_chain = (
    {"text": return_context , "question": RunnablePassthrough()}
    | RAG_prompt_template
    | llm
    | StrOutputParser()
)

In [52]:
response = rag_chain.invoke(question)

`embedding_function` is expected to be an Embeddings object, support for passing in a function will soon be removed.


Processing file: Papers/Attention_is_all_you_need.pdf
Index 1 of 1 completed


In [53]:
response

'The attention mechanism in a neural network allows the model to focus on specific parts of the input sequence when processing information. It assigns weights to different parts of the input, giving more importance to relevant elements. This is done by calculating a "compatibility function" between a "query" and a set of "key-value" pairs.  The weights are then used to compute a weighted sum of the "values," yielding an output that reflects the attended information.\n\nThis explanation is from the document titled "Attention is All You Need", on page 3. \n'