In [59]:
import os
from dotenv import load_dotenv

load_dotenv()

True

#### Langchain + OpenAI GPT3.5 Setup

In [62]:
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=.3)

#### Load Documents

In [63]:
from langchain_community.document_loaders import PyPDFLoader

loader = PyPDFLoader(file_path="examplefiles/attention is all you need.pdf")
docs = loader.load()

#### Split Documents

In [64]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
)

all_splits = text_splitter.split_documents(docs)

len(all_splits)

16

#### Store Splits

In [65]:
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings

vectorstore = Chroma.from_documents(documents=docs,
                                    embedding=OpenAIEmbeddings(),
                                    persist_directory="data")

#### Retrieval

In [50]:
retriever = vectorstore.as_retriever(search_type = "similarity", search_kwargs={"k" : 6})
retrieved_docs = retriever.invoke("which dataset is used to test the model")

In [66]:
retrieved_docs[0].page_content.replace("\n", " ")

'Table 3: Variations on the Transformer architecture. Unlisted values are identical to those of the base model. All metrics are on the English-to-German translation development set, newstest2013. Listed perplexities are per-wordpiece, according to our byte-pair encoding, and should not be compared to per-word perplexities. N d model dff h d k dv Pdrop ϵls train PPL BLEU params steps (dev) (dev) ×106 base 6 512 2048 8 64 64 0.1 0.1 100K 4.92 25.8 65 (A) 1 512 512 5.29 24.9 4 128 128 5.00 25.5 16 32 32 4.91 25.8 32 16 16 5.01 25.4 (B) 16 5.16 25.1 58 32 5.01 25.4 60 (C) 2 6.11 23.7 36 4 5.19 25.3 50 8 4.88 25.5 80 256 32 32 5.75 24.5 28 1024 128 128 4.66 26.0 168 1024 5.12 25.4 53 4096 4.75 26.2 90 (D) 0.0 5.77 24.6 0.2 4.95 25.5 0.0 4.67 25.3 0.2 5.47 25.7 (E) positional embedding instead of sinusoids 4.92 25.7 big 6 1024 4096 16 0.3 300K 4.33 26.4 213 development set, newstest2013. We used beam search as described in the previous section, but no checkpoint averaging. We present these r

#### Runnable

In [69]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_core.prompts import ChatPromptTemplate


def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

template = '''Answer the question based only on the following context. please Answer with more than 3 sentences. :
{context}

Question: {question}
'''


In [70]:

prompt = ChatPromptTemplate.from_template(template)

rag_chain = (
    {"context" : retriever | format_docs, "question" : RunnablePassthrough() }
    | prompt
    | llm
    | StrOutputParser()
)

for chunk in rag_chain.stream("논문의 주제는 뭐야?"):
    print(chunk, end="", flush=True)

이 문맥에서는 논문이 attention heads의 행동과 self-attention의 구조와 관련된 주제를 다루고 있다. 논문은 encoder self-attention의 레이어 5에서 다양한 작업을 수행하는 attention heads에 대한 예시를 제시하고 있다. 또한, anaphora resolution과 같은 특정 작업에 관련된 attention heads에 대한 분석도 진행하고 있다. 따라서, 이 논문의 주제는 attention mechanism과 self-attention의 동작에 대한 연구와 분석이라고 할 수 있다.