In [14]:
import nest_asyncio
from dotenv import load_dotenv
from IPython.display import Markdown, display

from llama_index.core import Settings
from llama_index.llms.ollama import Ollama
from llama_index.core import PromptTemplate
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import VectorStoreIndex, ServiceContext, SimpleDirectoryReader

In [15]:
from llama_index.core import SimpleDirectoryReader

input_dir_path = "/Users/aagamchhajer/Desktop/aagam-projects/DeepSeek-RAG/pdf_dir"
loader = SimpleDirectoryReader(
    input_dir = input_dir_path,
    required_exts = [".pdf"],
    recursive=True
)

docs = loader.load_data()

In [18]:

# setup llm & embedding model

# embed_model = HuggingFaceEmbedding( model_name="Snowflake/snowflake-arctic-embed-m", trust_remote_code=True)
embed_model = HuggingFaceEmbedding( model_name="BAAI/bge-large-en-v1.5", trust_remote_code=True)

In [19]:
llm=Ollama(model="deepseek-r1:8b", request_timeout=120.0)

In [20]:

# Creating an index over loaded data
Settings.embed_model = embed_model
index = VectorStoreIndex.from_documents(docs, show_progress=True)

# Create the query engine, where we use a cohere reranker on the fetched nodes
Settings.llm = llm
query_engine = index.as_query_engine()

# ====== Customise prompt template ======
qa_prompt_tmpl_str = (
"Context information is below.\n"
"---------------------\n"
"{context_str}\n"
"---------------------\n"
"Given the context information above I want you to think step by step to answer the query in a crisp manner, incase case you don't know the answer say 'I don't know!'.\n"
"Query: {query_str}\n"
"Answer: "
)
qa_prompt_tmpl = PromptTemplate(qa_prompt_tmpl_str)

query_engine.update_prompts(
    {"response_synthesizer:text_qa_template": qa_prompt_tmpl}
)

# Generate the response
response = query_engine.query("Define a LLM Architecture?",)

Parsing nodes: 100%|██████████| 3/3 [00:00<00:00, 3036.42it/s]
Generating embeddings: 100%|██████████| 3/3 [00:01<00:00,  1.79it/s]


In [21]:
print(response)

<think>
Okay, so I need to define an LLM architecture based on the context provided. Let me start by understanding what's given.

From page_label 2, a basic LLM app has four main elements: Foundation LLM, Vector database, Orchestration framework, and UI framework. The context also mentions that when moving to a professional level, an advanced architecture with more components is needed.

On page_label 3, the advanced version includes ten elements: Foundation LLM, Vector database, Orchestration framework, UI framework, Backend framework, Integrated external APIs, Validation framework, LLMOps, LLM cache, and Cloud provider.

So, for a basic definition, I should probably mention both basic and advanced architectures. The basic has four parts, while the advanced adds six more components on top of the original four.

I need to make sure I clearly differentiate between the two levels, highlighting each component without getting too technical. Maybe start by defining an LLM architecture in ge