In [1]:
from langchain_community.document_loaders import DirectoryLoader  , TextLoader


dir_loader=DirectoryLoader(
    "data/text_files",
    glob="**/*.txt",
    loader_cls=TextLoader,   
    loader_kwargs={'encoding':'utf-8'},
    show_progress=False
    
)

doc=dir_loader.load()

for d in doc:
    print("SOURCE:", d.metadata["source"])
    print("CONTENT:")
    print(d.page_content)
    print("-" * 80)


  from .autonotebook import tqdm as notebook_tqdm


SOURCE: data\text_files\Ml_intro.txt
CONTENT:
Machine Learning (ML) is a fundamental branch of artificial intelligence that focuses on enabling computers to learn from data and make decisions or predictions without being explicitly programmed for every task.

In machine learning, algorithms analyze historical data to identify patterns, relationships, and trends that help them generalize to new and unseen data.

Machine learning techniques are commonly divided into supervised learning, unsupervised learning, semi-supervised learning, and reinforcement learning, each designed to solve different types of problems such as classification, regression, clustering, and control.

Some widely used machine learning algorithms include linear regression, decision trees, support vector machines, k-nearest neighbors, neural networks, and ensemble models.

The machine learning process usually involves data collection, data cleaning, feature selection, model training, performance evaluation, and deploy

In [2]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter=RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=100,
    separators=["\n\n","\n",". "," ",""]
)
chunks=text_splitter.split_documents(doc)

In [3]:
chunks[0]

Document(metadata={'source': 'data\\text_files\\Ml_intro.txt'}, page_content='Machine Learning (ML) is a fundamental branch of artificial intelligence that focuses on enabling computers to learn from data and make decisions or predictions without being explicitly programmed for every task.\n\nIn machine learning, algorithms analyze historical data to identify patterns, relationships, and trends that help them generalize to new and unseen data.')

In [4]:
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
embeddings_model=HuggingFaceBgeEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"

)
'''
vector=embeddings_model.embed_query("Who am i")
vector'''

  embeddings_model=HuggingFaceBgeEmbeddings(


'\nvector=embeddings_model.embed_query("Who am i")\nvector'

In [5]:
from langchain_community.vectorstores import Chroma

vectorstore = Chroma.from_documents(
    documents=chunks,
    embedding=embeddings_model,
    persist_directory="./chroma_db"
)


In [6]:
from langchain_community.llms import Ollama
llm = Ollama(
    model="smalllama-rag",
    temperature=0.0,
    top_p=1.0
)


  llm = Ollama(


In [7]:
retriever = vectorstore.as_retriever(
    search_kwargs={"k": 3}
)


In [8]:
from langchain_core.prompts import PromptTemplate

prompt = PromptTemplate(
    input_variables=["context", "question"],
    template="""
You are a strict retrieval-augmented generation (RAG) system.

RULES (ABSOLUTE â€” NO EXCEPTIONS):
- Use ONLY the information explicitly present in the Context.
- Do NOT use external knowledge, reasoning, or inference.
- If the answer is NOT explicitly stated in the Context, output EXACTLY:
I don't know based on the provided context.
- Your response MUST be ONE line only.
- Output ONLY the answer. No explanations. No extra text.

Context:
{context}

Question:
{question}

Final Answer:
"""
)


In [9]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)



In [10]:
docs = retriever.invoke("What is Machine Learning?")

for d in docs:
    print(d.page_content)
    print("-" * 40)


Machine Learning (ML) is a fundamental branch of artificial intelligence that focuses on enabling computers to learn from data and make decisions or predictions without being explicitly programmed for every task.

In machine learning, algorithms analyze historical data to identify patterns, relationships, and trends that help them generalize to new and unseen data.
----------------------------------------
Machine Learning (ML) is a fundamental branch of artificial intelligence that focuses on enabling computers to learn from data and make decisions or predictions without being explicitly programmed for every task.

In machine learning, algorithms analyze historical data to identify patterns, relationships, and trends that help them generalize to new and unseen data.
----------------------------------------
Machine Learning (ML) is a fundamental branch of artificial intelligence that focuses on enabling computers to learn from data and make decisions or predictions without being explici

In [11]:
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

rag_chain = (
    {
        "context": retriever | format_docs,
        "question": RunnablePassthrough()
    }
    | prompt
    | llm
    | StrOutputParser()
)


In [13]:
query = "What is RAG?"
response = rag_chain.invoke(query)
print(response)


RAG (Retrieval-Augmented Generation) is an advanced approach in artificial intelligence that combines information retrieval techniques with generative language models to improve the quality and accuracy of responses. It involves retrieving relevant information from external sources such as documents, databases, PDFs, or knowledge bases at the time a query is made. The system does not rely on external knowledge but rather uses pre-trained languaage models to retrieve relevant information.
