In [1]:
from langchain_core.documents import Document
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_community.document_loaders import TextLoader
from langchain_community.vectorstores import Chroma
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings,ChatHuggingFace,HuggingFaceEndpoint

  import pynvml  # type: ignore[import]


In [2]:
doc=Document(
    page_content="Completing the RAG Pipeline",
    metadata={
        "source":"python_introduction.txt",
        "author":"Masemene Matlakana Benny",
        "date":"13/01/2026"
    }
)
doc

Document(metadata={'source': 'python_introduction.txt', 'author': 'Masemene Matlakana Benny', 'date': '13/01/2026'}, page_content='Completing the RAG Pipeline')

In [3]:
loader=TextLoader("python_introduction.txt")
document=loader.load()

In [4]:
## text splitting:
text_splitter=RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200,
    length_function=len,
    separators=["\n\n","\n","", " "]
)

In [5]:
## get the chunks:
chunks=text_splitter.split_documents([doc])

In [6]:
## create the embedding:
embedding=HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)

In [7]:
## create the vector store:
vector_store=Chroma.from_documents(
    documents=chunks,
    embedding=embedding,
    persist_directory="./chroma_db"
)
vector_store

<langchain_community.vectorstores.chroma.Chroma at 0x2a0a96eb0e0>

In [8]:
## create the retriever:
retriever=vector_store.as_retriever(
    search_type="similarity",
    search_kwags={"k":2}
)
retriever

VectorStoreRetriever(tags=['Chroma', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.chroma.Chroma object at 0x000002A0A96EB0E0>, search_kwargs={})

In [9]:
from langchain_core.prompts import ChatPromptTemplate

prompt = ChatPromptTemplate.from_template(
    """
    You are a question-answering assistant.

    RULES:
    - Answer using ONLY the context below.
    - If the answer is not in the context, respond EXACTLY with:
      "No response to the question."
    - Return ONLY the final answer.
    - Do NOT explain your reasoning.
    - Do NOT include thoughts, analysis, tags like <think>.

    Context:
    {context}

    Question:
    {question}
    """
)


In [10]:
llm_endpoint = HuggingFaceEndpoint(
    repo_id="deepseek-ai/DeepSeek-R1-0528",
    task="text-generation",
    max_new_tokens=512,
    do_sample=False,
    repetition_penalty=1.03,
    provider="auto",  # let Hugging Face choose the best provider for you
)

In [11]:
chat_model=ChatHuggingFace(llm=llm_endpoint)
chat_model

ChatHuggingFace(llm=HuggingFaceEndpoint(repo_id='deepseek-ai/DeepSeek-R1-0528', provider='auto', repetition_penalty=1.03, stop_sequences=[], server_kwargs={}, model_kwargs={}, model='deepseek-ai/DeepSeek-R1-0528', client=<InferenceClient(model='deepseek-ai/DeepSeek-R1-0528', timeout=120)>, async_client=<InferenceClient(model='deepseek-ai/DeepSeek-R1-0528', timeout=120)>, task='text-generation'), model_id='deepseek-ai/DeepSeek-R1-0528', temperature=0.8, frequency_penalty=1.03, top_p=0.95, max_tokens=512, model_kwargs={})

In [12]:
from langchain_core.runnables import RunnablePassthrough

rag_chain=({
    "context":retriever,
    "question":RunnablePassthrough()

}
| prompt
| chat_model
| StrOutputParser()
          )

In [13]:
rag_chain.invoke(
    "what is Python used for"
)

'<think>\nWe are given a context that includes several documents, but note that all the page_content entries are either "Completing the RAG Pipeline" or "RAG 03". There is no mention of what Python is used for in the provided context.\n\nTherefore, according to the rules, if the answer is not in the context, we must respond exactly with: "No response to the question."\n\nWe are to return ONLY the final answer without any explanation, thoughts, or additional text.\n</think>\nNo response to the question.'