In [1]:
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_ollama import OllamaEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import AzureChatOpenAI
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain
from langserve import add_routes
import uvicorn
import os
from dotenv import load_dotenv
from fastapi import Request
from openai import AzureOpenAI
from datetime import datetime

load_dotenv()

True

In [2]:

# PDF loader
attention_loader = PyPDFLoader('../rag/attention.pdf')
attention_docs = attention_loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
attention_documents = text_splitter.split_documents(attention_docs)

# Vector embedding and vector store
embedding = OllamaEmbeddings(model="nomic-embed-text")

In [3]:
## FAISS Vector Database

try:
    combo_db = FAISS.load_local("../storage/attention_index", embedding, allow_dangerous_deserialization=True)
except Exception as e:
    combo_db = FAISS.from_documents(attention_documents, embedding)    
    combo_db.save_local("../storage/attention_index")

db = combo_db

In [4]:
endpoint = os.getenv("NATL_AZURE_OPENAI_ENDPOINT")
model_name = os.getenv("NATL_AZURE_OPENAI_MODEL_NAME")
deployment = os.getenv("NATL_AZURE_OPENAI_MODEL__DEPLOYMENT_NAME")

subscription_key = os.getenv("NATL_AZURE_OPENAI_KEY")
api_version = "2024-12-01-preview"

llm = AzureChatOpenAI(
    azure_deployment=deployment,
    openai_api_version=api_version,
    azure_endpoint=endpoint,
    api_key=subscription_key,
    temperature=0.7
)

In [5]:
## Design chat prompt

prompt = ChatPromptTemplate.from_template("""
                                          Answer the following question based only on the provided context.
                                          Think step by step before providing a detailed answer.
                                          <context>
                                          {context}
                                          </context>
                                          Question: {input}""")

In [6]:
## Chain introduction
## Create stuff documents chain

document_chain = create_stuff_documents_chain(llm=llm, prompt=prompt)

retriever = db.as_retriever()

retrieval_chain = create_retrieval_chain(retriever, document_chain)

In [7]:
user_input = "what is an encoder?"

try:
    response = retrieval_chain.invoke({"input": user_input})
except Exception as e:
    print("error: ", str(e))

In [8]:
response['answer']

'Based on the provided context, here is a detailed step-by-step explanation of what an encoder is in the Transformer model:\n\n1. **Role in the Transformer Architecture**:  \n   The encoder is one half of the Transformer model architecture, with the other half being the decoder. It processes the input sequence to generate a representation that the decoder can then use to produce an output sequence.\n\n2. **Structure of the Encoder**:  \n   - The encoder is composed of a stack of **N = 6 identical layers**.  \n   - Each layer contains **two sub-layers**:  \n     a. A **multi-head self-attention mechanism** sub-layer.  \n     b. A **position-wise fully connected feed-forward network** sub-layer.\n\n3. **Residual Connections and Layer Normalization**:  \n   Around each of the two sub-layers in each encoder layer, residual connections are employed. This means the input to a sub-layer is added to its output before applying layer normalization. Formally, the output of each sub-layer is:  \n 