### Data ingestion

In [59]:
from langchain_core.documents import Document

In [60]:
doc  = Document(
  page_content="This is the main content Im using to create RAG",
  metadata={
    "source":"nothing.txt",
    "Auther":"Akash VP",
    "pages":1,
    "date_created":"11-07-2025"
  }
  
)

In [61]:
#text loader
from langchain_community.document_loaders import TextLoader

loader = TextLoader("../data/simple/policy.txt",encoding = "utf-8")
document = loader.load()

print(document)


[Document(metadata={'source': '../data/simple/policy.txt'}, page_content='# Company Policies Document\n\n## 1. Work-Life Balance\nAt TechCorp, we strongly encourage employees to maintain a healthy work-life balance.\nEmployees can work flexible hours between 8 AM to 8 PM, as long as they complete their assigned 8-hour workday.\nWork-from-home is allowed on Fridays and up to 4 additional days per month upon manager approval.\n\n## 2. Leave Policy\nEach full-time employee is entitled to 20 days of paid leave per year.\nUnused leaves can be carried forward up to a maximum of 10 days.\nFor emergencies, employees can request up to 5 additional unpaid leaves.\nPublic holidays are separate and do not count toward paid leave.\n\n## 3. Remote Work Policy\nRemote work is permitted for roles that do not require physical presence in the office.\nEmployees must ensure a reliable internet connection and a professional workspace.\nMeetings and performance reviews will continue virtually during remote

In [62]:
#Directory loader
from langchain_community.document_loaders import DirectoryLoader

#for lading all the text file from the directory
dir_loader = DirectoryLoader(
  "../data/simple",
  glob="**/*.txt",
  loader_cls=TextLoader,
  loader_kwargs={"encoding":"utf-8"},
  show_progress=True
)

documents = dir_loader.load()
print(documents)

100%|██████████| 2/2 [00:00<00:00, 5555.37it/s]

[Document(metadata={'source': '../data/simple/policy.txt'}, page_content='# Company Policies Document\n\n## 1. Work-Life Balance\nAt TechCorp, we strongly encourage employees to maintain a healthy work-life balance.\nEmployees can work flexible hours between 8 AM to 8 PM, as long as they complete their assigned 8-hour workday.\nWork-from-home is allowed on Fridays and up to 4 additional days per month upon manager approval.\n\n## 2. Leave Policy\nEach full-time employee is entitled to 20 days of paid leave per year.\nUnused leaves can be carried forward up to a maximum of 10 days.\nFor emergencies, employees can request up to 5 additional unpaid leaves.\nPublic holidays are separate and do not count toward paid leave.\n\n## 3. Remote Work Policy\nRemote work is permitted for roles that do not require physical presence in the office.\nEmployees must ensure a reliable internet connection and a professional workspace.\nMeetings and performance reviews will continue virtually during remote




In [63]:
# loading pdf
from langchain_community.document_loaders import PyPDFLoader, PyMuPDFLoader

dir_loader = DirectoryLoader(
  "../data/pdf",
  glob="**/*.pdf",
  loader_cls=PyMuPDFLoader,
  show_progress=False
)
pdf_doc = dir_loader.load()
print(pdf_doc)

[Document(metadata={'producer': 'Skia/PDF m141', 'creator': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/141.0.0.0 Safari/537.36', 'creationdate': '2025-10-27T09:57:02+00:00', 'source': '../data/pdf/LangChain overview - Docs by LangChain.pdf', 'file_path': '../data/pdf/LangChain overview - Docs by LangChain.pdf', 'total_pages': 4, 'format': 'PDF 1.4', 'title': 'LangChain overview - Docs by LangChain', 'author': '', 'subject': '', 'keywords': '', 'moddate': '2025-10-27T09:57:02+00:00', 'trapped': '', 'modDate': "D:20251027095702+00'00'", 'creationDate': "D:20251027095702+00'00'", 'page': 0}, page_content="LangChain overview\nCopy page\nLangChain v1.0 is now available!\nFor a complete list of changes and instructions on how to upgrade your code, see the\nrelease notes and migration guide.\nIf you encounter any issues or have feedback, please open an issue so we can improve. To\nview v0.x documentation, go to the archived site.\nLangChain is the easiest w

### RAG_pipeline: data ingestion


In [64]:
from langchain_core.documents import Document
from langchain_community.document_loaders import TextLoader
from langchain_community.document_loaders import PyPDFLoader, PyMuPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
import os
from pathlib import Path


from dotenv import load_dotenv

load_dotenv()

True

In [65]:
def process_all_pdf(pdf_dir):
  all_docs = []
  pdf_dir = Path(pdf_dir)

  dir_loader = DirectoryLoader(
    pdf_dir,
    glob="**/*.pdf",
    loader_cls=PyMuPDFLoader,
    show_progress=False
  )
  all_documents = dir_loader.load()
  print(f"found {len(all_documents)}")

  return all_documents


docs = process_all_pdf("../data/pdf")
  

found 15


In [66]:
# # loading pdf
# from langchain_community.document_loaders import PyPDFLoader, PyMuPDFLoader

# dir_loader = PyMuPDFLoader(
#    )
# pdf_doc = dir_loader.load()
# print(pdf_doc)

In [67]:
## text splitting

def split_documents(documents,chunk_size = 1000, chunk_overlap = 200):
  text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=chunk_size,
    chunk_overlap=chunk_overlap,
    length_function = len,
    separators=["\n\n","\n"," ",""]
  )

  split_docs = text_splitter.split_documents(documents)
  print(f"split {len(documents)} to {len(split_docs)} chunks ")

  # if split_docs:
  #   print(split_docs)
  return split_docs

In [68]:
doc_chunks = split_documents(docs)

split 15 to 49 chunks 


### Embedding and vector store

In [69]:
import numpy as np
from sentence_transformers import SentenceTransformer
import chromadb
from chromadb.config import Settings
import uuid
from typing import List,Dict, Any, Tuple
from sklearn.metrics.pairwise import cosine_similarity


In [70]:
class EmbeddingMnager:

    def __init__(self, model_name: str = "sentence-transformers/all-MiniLM-L6-v2"):
        """
        Ineatialize the embedding manager

        Args:
        model name: Hugging fce model name for sentence embedding

        """
        self.model_name = model_name
        self.model = None
        self._load_model()

    def _load_model(self):
        try:
            self.model = SentenceTransformer(self.model_name,token= os.getenv('HF_KEY'))
            print(f"model loaded succesfully{self.model}")

        except Exception as e:
            print("Loadding errorrrr",e)

    def generate_embedding(self, texts:List[str])-> np.ndarray:
        if not self.model:
            print("model errorr")

        print("generating embedding...")
        embeddings  = self.model.encode(texts, show_progress_bar=True)
        return embeddings

embedding_manager = EmbeddingMnager()
embedding_manager

model loaded succesfullySentenceTransformer(
  (0): Transformer({'max_seq_length': 256, 'do_lower_case': False, 'architecture': 'BertModel'})
  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
  (2): Normalize()
)


<__main__.EmbeddingMnager at 0x71012826f1f0>

### Vector store

In [71]:
class VectorStore:

    def __init__(
      self,
      collection_name: str = "pdf_documents",
      persist_directory: str = "../data/vector_sore",):

        """ Ineatialize vector store
        Args:
            Collection_name: Name of the collection
            persist_directory: location of vector DB
        """

        self.collection_name = collection_name
        self.persist_directory = persist_directory
        self.client = None
        self.collection = None
        self._initialize_store()

    def _initialize_store(self):
        """Ineatialize chromadb client and collection"""

        try:
            os.makedirs(self.persist_directory,exist_ok=True)
            self.client = chromadb.PersistentClient(path=self.persist_directory)

            self.collection = self.client.get_or_create_collection(
        name = self.collection_name,
        metadata={"description":"PFD document embeddings for RAG"}
      )

        except Exception as e:
            print("Error found !! cant create collection",e)

    def add_documents(self, documents:List[Any], embeddings: np.ndarray):
        """ Add document and their embedding to vector store
        
        Args:
            documents: List of langchain documents
            embeddings: embeddings for the document
        """

        if len(documents) != len(embeddings):
            raise ValueError("Number of document must match number of embedding")

        # adding data
        ids = []
        metadatas = []
        document_text = []
        embedding_list = []

        for i, (doc,embedding) in enumerate(zip(documents,embeddings)):
            
            doc_id = f"doc_{uuid.uuid4().hex[:8]}_{i}"
            ids.append(doc_id)

            # prepare metadata
            metadata  = dict(doc.metadata)
            metadata['doc_index'] = i
            metadata['content_length'] = len(doc.page_content)
            metadatas.append(metadata)

            document_text.append(doc.page_content)
            print(embedding,type(embedding))
            embedding_list.append(embedding.tolist())

        try:
            self.collection.add(
                ids = ids,
                embeddings=embedding_list,
                metadatas=metadatas,
                documents = document_text
            )
            print("data added succesfullyyyy")

        except Exception as e:
            print("error founf while adding data!!!",e)


vector_store = VectorStore()
vector_store

<__main__.VectorStore at 0x71012828ae60>

In [72]:
doc_chunks

[Document(metadata={'producer': 'Skia/PDF m141', 'creator': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/141.0.0.0 Safari/537.36', 'creationdate': '2025-10-27T09:57:02+00:00', 'source': '../data/pdf/LangChain overview - Docs by LangChain.pdf', 'file_path': '../data/pdf/LangChain overview - Docs by LangChain.pdf', 'total_pages': 4, 'format': 'PDF 1.4', 'title': 'LangChain overview - Docs by LangChain', 'author': '', 'subject': '', 'keywords': '', 'moddate': '2025-10-27T09:57:02+00:00', 'trapped': '', 'modDate': "D:20251027095702+00'00'", 'creationDate': "D:20251027095702+00'00'", 'page': 0}, page_content='LangChain overview\nCopy page\nLangChain v1.0 is now available!\nFor a complete list of changes and instructions on how to upgrade your code, see the\nrelease notes and migration guide.\nIf you encounter any issues or have feedback, please open an issue so we can improve. To\nview v0.x documentation, go to the archived site.\nLangChain is the easiest w

In [73]:
#convert text to embedding
texts = [doc.page_content for doc in doc_chunks]

#Generate the embedding
embeddings = embedding_manager.generate_embedding(texts)

# #store vectordb
vector_store.add_documents(doc_chunks,embeddings)

generating embedding...


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

Batches: 100%|██████████| 2/2 [00:02<00:00,  1.01s/it]

[-1.81916589e-03 -7.64698386e-02 -1.13948612e-02 -7.81533271e-02
 -3.39220650e-02 -3.39260586e-02 -4.18138169e-02  4.53561768e-02
 -3.87674905e-02 -2.24063527e-02 -1.03343939e-02 -2.89574191e-02
  5.63037880e-02 -1.19036087e-03  9.40575078e-02  4.51966152e-02
  6.54897168e-02  4.69215326e-02  3.37556154e-02 -7.15350583e-02
 -1.53132612e-02 -2.59530414e-02  3.89258552e-04 -2.64718058e-03
 -2.56984867e-02 -5.96107095e-02  1.43174520e-02 -1.79528110e-02
  2.67106835e-02 -1.03378575e-02  4.67290655e-02  7.21931979e-02
 -5.29102003e-03  7.18968511e-02 -2.16374081e-02  1.14319868e-01
  1.16484398e-02 -5.09084377e-04 -5.44402637e-02  2.08353307e-02
 -2.56123208e-02 -3.28093246e-02 -3.18833217e-02 -4.58392985e-02
  3.26267537e-03 -1.44938612e-02 -2.20249649e-02  2.46892441e-02
 -5.51388264e-02  2.64433436e-02 -1.93583556e-02 -1.15371771e-01
  2.90114339e-02 -1.73363611e-02 -6.45895749e-02 -8.60766787e-03
 -1.20357601e-02  1.28799323e-02 -2.35026795e-02 -8.68085176e-02
  3.21170501e-02  5.46759




### RAG- Retriever pipeline from VectorStore

In [74]:
class RAGRetriever:

    def __init__(self, vector_store: VectorStore, embedding_manager: EmbeddingMnager):

        """ Ineatialize vector store
    
    Args:
      vector_store: vector db containing document embedding
      embedding_manager: Manager for generating embedding

    """
        self.vector_store = vector_store
        self.embedding_manager = embedding_manager 

    def retrieve(self,query:str, top_k:int = 5, score_threshold:float = 0) -> List[Dict[str,Any]]:
        """ Retriebe relecvent document from query
    
    Args:
      query: The search query
      top_k: Number of top result to return
      score_threshold: Minimum similarity score threshold

    Returns:
      List of dictionaries containing retrieved documents and metadata
    """

        query_embedding = self.embedding_manager.generate_embedding([query])[0]

        try:
            results = self.vector_store.collection.query(
        query_embeddings=[query_embedding.tolist()],
        n_results= top_k
      )
            retrieved_docs = []

            print(results["metadatas"][0])

            if results['documents'] and results['documents'][0]:
                documents = results['documents'][0]
                metadatas = results["metadatas"][0]
                distances = results['distances'][0]
                ids = results['ids'][0]

                for i,(doc_id,document, metadata,distance) in enumerate(zip(ids,documents,metadatas,distances)):
                    similarity_score = 1-distance

                    if similarity_score >= score_threshold:
                        retrieved_docs.append({
                          "id":doc_id,
                          "content":document,
                          "metadata":metadata,
                          "similarity_score":similarity_score,
                          "rank":i+1
                     })
            else:
                print("No documnet found!")

        except Exception as e:
            print("Errorr while retrieving..",e)
            return []
        return retrieved_docs


rag_retriever = RAGRetriever(vector_store, embedding_manager)

In [75]:
rag_retriever.retrieve("what is attention mechanism")

generating embedding...


Batches: 100%|██████████| 1/1 [00:00<00:00, 70.95it/s]

[{'subject': 'Neural Information Processing Systems http://nips.cc/', 'file_path': '../data/pdf/NIPS-2017-attention-is-all-you-need-Paper.pdf', 'modDate': "D:20180212212210-08'00'", 'total_pages': 11, 'author': 'Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N. Gomez, Łukasz Kaiser, Illia Polosukhin', 'content_length': 835, 'page': 3, 'trapped': '', 'title': 'Attention is All you Need', 'creator': '', 'creationDate': '', 'format': 'PDF 1.3', 'moddate': '2018-02-12T21:22:10-08:00', 'creationdate': '', 'source': '../data/pdf/NIPS-2017-attention-is-all-you-need-Paper.pdf', 'keywords': '', 'producer': 'PyPDF2', 'doc_index': 20}, {'content_length': 835, 'trapped': '', 'keywords': '', 'file_path': '../data/pdf/NIPS-2017-attention-is-all-you-need-Paper.pdf', 'author': 'Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N. Gomez, Łukasz Kaiser, Illia Polosukhin', 'moddate': '2018-02-12T21:22:10-08:00', 'producer': 'PyPDF2', 'subject':




[]

### Vectordb context pipeline with llm output

In [76]:
from langchain_google_genai import ChatGoogleGenerativeAI
import os
from dotenv import load_dotenv
load_dotenv()

#using gooole gemini llm
llm = ChatGoogleGenerativeAI(
    model="gemini-2.5-flash", api_key=os.getenv("GEMINI_API_KEY")
)

def rag_simple(query:str, retriever:RAGRetriever, llm, top_k =3):
  """ This will retriev context and give it to llm for generate output
  
  Args:
    query: user query
    retriever: Retriver for getting context
    llm: LLM model
    top_k: number of ouptut from vectordb
    
  Returns:
    LLM response content  
  """
  
  results = retriever.retrieve(query, top_k = top_k)
  context: str = "\n\n".join([doc['content'] for doc in results]) if results else ""

  if not context:
    return "No relevent context found!"
  
  #generate answer using llm
  prompt = f"""Answer for the question using the following context
            context:{context}
            question:{query}
            Answer:
            """
  response = llm.invoke([prompt.format(context = context, query = query)])

  return response.content
  



In [77]:
rag_simple("what is attention mechanism", rag_retriever, llm)

generating embedding...


Batches: 100%|██████████| 1/1 [00:00<00:00, 47.46it/s]

[{'format': 'PDF 1.3', 'content_length': 835, 'doc_index': 20, 'producer': 'PyPDF2', 'creationdate': '', 'moddate': '2018-02-12T21:22:10-08:00', 'total_pages': 11, 'file_path': '../data/pdf/NIPS-2017-attention-is-all-you-need-Paper.pdf', 'source': '../data/pdf/NIPS-2017-attention-is-all-you-need-Paper.pdf', 'author': 'Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N. Gomez, Łukasz Kaiser, Illia Polosukhin', 'title': 'Attention is All you Need', 'trapped': '', 'creator': '', 'subject': 'Neural Information Processing Systems http://nips.cc/', 'keywords': '', 'page': 3, 'modDate': "D:20180212212210-08'00'", 'creationDate': ''}, {'format': 'PDF 1.3', 'producer': 'PyPDF2', 'moddate': '2018-02-12T21:22:10-08:00', 'content_length': 835, 'trapped': '', 'keywords': '', 'source': '../data/pdf/NIPS-2017-attention-is-all-you-need-Paper.pdf', 'title': 'Attention is All you Need', 'modDate': "D:20180212212210-08'00'", 'subject': 'Neural Information Processing Systems 




'No relevent context found!'

### Enhanced RAG pipeline

In [78]:
def rag_advanced(query,retriever,llm,top_k = 5, min_score = 0.0, return_context = False):
    """RAG with extra feature
  
  Args:
    query: user query
    retriever: Retriver for getting context
    llm: LLM model
    top_k: number of ouptut from vectordb
    
  Returns:
    LLM response content  
  
  """

    results = retriever.retrieve(query, top_k=5, score_threshold=min_score)
    print(results)
    if not results:
        return {
            "answer": "No relevent context found.",
            "source": [],
            "confidence": 0.0,
            "context": "",
        }

    # prepare context and source
    context = "\n\n".join([doc["content"] for doc in results]) if results else ""
    sources = [
        {
            "source": doc["metadata"].get("source", "Unknown"),
            "page": doc["metadata"].get("Page", "Unknown"),
            "score": doc["similarity_score"],
            "preview": doc["content"][:120]+"..."

        }for doc in results] #applying throughout the results
    confidence = max([doc['similarity_score'] for doc in results])
    
    #Generating output
    prompt = f"""Use the following context to answer for the given question consisely
     
    context:{context}

    question:{query}

    Answer:
     """
    
    response = llm.invoke([prompt.format(context = context, query = query)])

    output = {
        "answer":response.content,
        "sources":sources,
        "confidence": confidence
    }
    
    if return_context:
        output['context'] = context
    
    return output
    

In [79]:
result = rag_advanced(
    "what is attention mechanism",
    rag_retriever,
    llm,
    top_k=3,
    min_score=0.0,
    return_context=True,
)

result

generating embedding...


Batches: 100%|██████████| 1/1 [00:00<00:00, 31.89it/s]

[{'producer': 'PyPDF2', 'format': 'PDF 1.3', 'source': '../data/pdf/NIPS-2017-attention-is-all-you-need-Paper.pdf', 'file_path': '../data/pdf/NIPS-2017-attention-is-all-you-need-Paper.pdf', 'title': 'Attention is All you Need', 'creationDate': '', 'creationdate': '', 'modDate': "D:20180212212210-08'00'", 'subject': 'Neural Information Processing Systems http://nips.cc/', 'moddate': '2018-02-12T21:22:10-08:00', 'keywords': '', 'content_length': 835, 'doc_index': 20, 'page': 3, 'total_pages': 11, 'creator': '', 'trapped': '', 'author': 'Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N. Gomez, Łukasz Kaiser, Illia Polosukhin'}, {'moddate': '2018-02-12T21:22:10-08:00', 'doc_index': 20, 'page': 3, 'producer': 'PyPDF2', 'title': 'Attention is All you Need', 'trapped': '', 'author': 'Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N. Gomez, Łukasz Kaiser, Illia Polosukhin', 'keywords': '', 'modDate': "D:20180212212210-08'00'", 'co




{'answer': 'No relevent context found.',
 'source': [],
 'confidence': 0.0,
 'context': ''}

### Advanced RAG pipeline

In [80]:
import time


class AdvancedRAGPipeline:
    def __init__(self,retriever,llm):
        """ setting retriever and llm

        Args:
            retriever:
            llm:

        """
        self.retriever = retriever
        self.llm = llm
        self.history = [] #for storing query history

    def query(self,question:str, top_k = 5, min_score: float = 0.1, stream:bool = False, summarize:bool = False):
        """This will return the response for a query
    
          Args:
            quetion:
            top_k:
            min_score:
            stream:
        """
        results = self.retriever.retrieve(
            question, top_k=top_k, score_threshold=min_score
        )

        if not results:
            answer = "No relevent context found!"
            sources = []
            context = ""
        else:
            context = "\n\n".join(doc["content"] for doc in results)
            sources = [
                {
                    "source": doc["metadata"].get("source", "Unknown"),
                    "page": doc["metadata"].get("Page", "Unknown"),
                    "score": doc["similarity_score"],
                    "preview": doc["content"][:120] + "...",
                }
                for doc in results
            ]  

            prompt = f"""Use the following context to answer the question consisely
            \n context:{context}
            \n question:{question}
            """
            if stream:
                print("streaming..")
                for i in range(0,len(prompt),50):
                    print(prompt[i:i+50],end = "",flush=True)
                    time.sleep(0.05)
                print()

            response = self.llm.invoke([prompt.format(context = context, question = question)])
            answer = response.content



            citations = [f"[{i+1}] {src['source']} (page {src['page']})" for i,src in enumerate(sources)]
            answer_with_citations = answer+"\n\nCitations:\n:"+"\n".join(citations) if citations else answer

            # summarize
            summary = None
            if summarize and answer:
                summary_prompt = f"Summarize the following answer in 2 sentences:\n{answer}"
                summary_resp = self.llm.invoke([summary_prompt])
                summary = summary_resp.content

            # store query history
            self.history.append({
                "question":question,
                "answer":answer,
                "sources":sources,
                "summary":summary
            })

            return {
                "question": question,
                "answer": answer_with_citations,
                "sources": sources,
                "summary": summary,
                "history":self.history
            }


avd_rag = AdvancedRAGPipeline(rag_retriever,llm)
result = avd_rag.query("what is attention mechanism", stream=True)
print(result)

generating embedding...


Batches: 100%|██████████| 1/1 [00:00<00:00,  9.01it/s]

[{'keywords': '', 'creationDate': '', 'file_path': '../data/pdf/NIPS-2017-attention-is-all-you-need-Paper.pdf', 'creationdate': '', 'creator': '', 'format': 'PDF 1.3', 'modDate': "D:20180212212210-08'00'", 'page': 3, 'trapped': '', 'doc_index': 20, 'author': 'Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N. Gomez, Łukasz Kaiser, Illia Polosukhin', 'subject': 'Neural Information Processing Systems http://nips.cc/', 'content_length': 835, 'total_pages': 11, 'source': '../data/pdf/NIPS-2017-attention-is-all-you-need-Paper.pdf', 'producer': 'PyPDF2', 'moddate': '2018-02-12T21:22:10-08:00', 'title': 'Attention is All you Need'}, {'content_length': 835, 'creationdate': '', 'keywords': '', 'producer': 'PyPDF2', 'moddate': '2018-02-12T21:22:10-08:00', 'creationDate': '', 'source': '../data/pdf/NIPS-2017-attention-is-all-you-need-Paper.pdf', 'modDate': "D:20180212212210-08'00'", 'doc_index': 20, 'total_pages': 11, 'title': 'Attention is All you Need', 'subject':


