## **Setup Console**

In [3]:
from rich.console import Console
from rich import print as pretty_print
from rich.pretty import pprint
from rich import inspect

console = Console()

## **Setup PDF Loader**

In [4]:
from langchain_community.document_loaders import PyPDFLoader
from langchain_core.documents import Document

loader: PyPDFLoader = PyPDFLoader("../assets/NIPS-2017-attention-is-all-you-need-Paper.pdf")
documents: list[Document] = loader.load()

pretty_print(f"[bold white]Length of documents: {len(documents)}[/]")
pprint(documents[0])
# inspect(documents,methods=True)

## **Setup TextSplitter**

### **Normal One**

In [None]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=50,
)

docs_chunk = text_splitter.split_documents(documents)

pretty_print(f"[bold white]Length of split documents: {len(docs_chunk)}[/]")
# inspect(docs_chunk[1])

### **Semantic One**

In [6]:
!ollama list

NAME                     ID              SIZE      MODIFIED       
llama3.1:8b              46e0c10c039e    4.9 GB    27 minutes ago    
nomic-embed-text:v1.5    0a109f422b47    274 MB    25 hours ago      
deepseek-r1:1.5b         e0979632db5a    1.1 GB    7 days ago        
llava:13b                0d0eb4d7f485    8.0 GB    2 months ago      
mistral:latest           3944fe81ec14    4.1 GB    2 months ago      


In [7]:
from langchain_ollama.embeddings import OllamaEmbeddings
from langchain_experimental.text_splitter import SemanticChunker

embedding_model = OllamaEmbeddings(model="nomic-embed-text:v1.5")
semantic_splitter = SemanticChunker(embeddings=embedding_model)
semantic_chunks = semantic_splitter.split_documents(documents=documents)

pretty_print(f"Length of semantic chunks: {len(semantic_chunks)}")
pprint(semantic_chunks[0])


In [None]:
from langchain_chroma import Chroma
from langchain_ollama import ChatOllama
from langchain_core.prompts import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough,RunnableParallel


vectorstore = Chroma.from_documents(
    documents=semantic_chunks,
    embedding=embedding_model,
    persist_directory="../DB/transformer_paper_db"
)

llm = ChatOllama(model="mistral:latest")

prompt_template = ChatPromptTemplate.from_messages([
    SystemMessagePromptTemplate.from_template("You are a helpful assistant. Here is one context {context}"),
    HumanMessagePromptTemplate.from_template("{question}")
])


chain = (
    RunnableParallel(context=vectorstore.as_retriever(search_kwargs={"k": 2}),question=RunnablePassthrough()) |
    {"context": vectorstore.as_retriever(search_kwargs={"k": 2}),"question":RunnablePassthrough()} | # RunnableParallel
    prompt_template | 
    llm |
    StrOutputParser()
)

for chunk in chain.stream("Tell me about transformers in details."):
    print(chunk,end="",flush=True)

 The Transformer is a type of neural network architecture proposed in the paper "Attention is All You Need" by Ashish Vaswani et al., published at the Neural Information Processing Systems (NIPS) Conference in 2017. The Transformer model is designed for sequence-to-sequence tasks, such as machine translation, and it significantly outperformed existing models while being more parallelizable and requiring less training time.

The Transformer architecture consists of an encoder and a decoder, both built using self-attention mechanisms instead of recurrence or convolutions. Here is a detailed breakdown of the Transformer model:

1. Encoder: The encoder is composed of multiple identical layers stacked on top of each other. Each layer in the encoder contains two sub-layers followed by layer normalization and residual connections. The first sub-layer applies self-attention to the input, while the second sub-layer is a fully connected feed-forward network (FCFFN). To facilitate residual connec