In [6]:
import os
from dotenv import load_dotenv
load_dotenv()
from langchain.chat_models import init_chat_model
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import TextLoader
from langchain_openai import OpenAIEmbeddings
from langchain_core.documents import Document
from langchain_chroma import Chroma
import numpy as np
from typing import List
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
os.environ['GROQ_API_KEY'] = os.getenv("GROQ_API_KEY")
model = init_chat_model("groq:qwen/qwen3-32b")


In [7]:
sample_docs = [
    """
    Retrieval-Augmented Generation (RAG) and Vector Databases
    
    RAG systems enhance Large Language Models by retrieving relevant data from external sources like ChromaDB or FAISS. 
    By converting documents into vector embeddings, the system can perform semantic searches to find context 
    that a model wasn't originally trained on, reducing hallucinations.
    """,
    
    """
    Agentic AI and Autonomous Workflows
    
    Agentic AI refers to systems designed to use tools and make decisions to achieve a goal. 
    Unlike standard chatbots, AI agents can use 'Reasoning and Acting' (ReAct) patterns to 
    call APIs, search the web, or execute code independently to complete multi-step tasks.
    """,
    
    """
    Cloud-Native Microservices and Scalability
    
    Modern backend architectures often utilize Spring Boot and Docker to create microservices. 
    Deploying these on AWS using services like EKS or Lambda allows for elastic scaling. 
    API Gateways act as the entry point, routing traffic to specific services like Product, Order, or User modules.
    """,
    
    """
    Natural Language Processing with NLTK and Transformers
    
    Natural Language Processing (NLP) involves the interaction between computers and human languages. 
    Libraries like NLTK are used for basic tokenization and stop-word removal, while Transformer-based 
    models like BERT or GPT handle complex tasks like sentiment analysis and language translation.
    """
]

In [8]:
sample_docs

["\n    Retrieval-Augmented Generation (RAG) and Vector Databases\n\n    RAG systems enhance Large Language Models by retrieving relevant data from external sources like ChromaDB or FAISS. \n    By converting documents into vector embeddings, the system can perform semantic searches to find context \n    that a model wasn't originally trained on, reducing hallucinations.\n    ",
 "\n    Agentic AI and Autonomous Workflows\n\n    Agentic AI refers to systems designed to use tools and make decisions to achieve a goal. \n    Unlike standard chatbots, AI agents can use 'Reasoning and Acting' (ReAct) patterns to \n    call APIs, search the web, or execute code independently to complete multi-step tasks.\n    ",
 '\n    Cloud-Native Microservices and Scalability\n\n    Modern backend architectures often utilize Spring Boot and Docker to create microservices. \n    Deploying these on AWS using services like EKS or Lambda allows for elastic scaling. \n    API Gateways act as the entry point, r

In [10]:
#Save sample
import tempfile
temp_dir = tempfile.mkdtemp()


for i , doc in enumerate(sample_docs):
    with open(f"doc.{i}.txt", "w") as f:
        f.write(doc)


In [16]:
#Document Loading
from langchain_community.document_loaders import DirectoryLoader

loader = DirectoryLoader(
    "Data",
    glob ="*.txt",
    loader_cls=TextLoader,
    loader_kwargs={'encoding':'utf-8'}
)

In [17]:
loader

<langchain_community.document_loaders.directory.DirectoryLoader at 0x211c5146d10>

In [18]:
documents = loader.load()

In [19]:
documents

[Document(metadata={'source': 'Data\\doc.0.txt'}, page_content="\n    Retrieval-Augmented Generation (RAG) and Vector Databases\n\n    RAG systems enhance Large Language Models by retrieving relevant data from external sources like ChromaDB or FAISS. \n    By converting documents into vector embeddings, the system can perform semantic searches to find context \n    that a model wasn't originally trained on, reducing hallucinations.\n    "),
 Document(metadata={'source': 'Data\\doc.1.txt'}, page_content="\n    Agentic AI and Autonomous Workflows\n\n    Agentic AI refers to systems designed to use tools and make decisions to achieve a goal. \n    Unlike standard chatbots, AI agents can use 'Reasoning and Acting' (ReAct) patterns to \n    call APIs, search the web, or execute code independently to complete multi-step tasks.\n    "),
 Document(metadata={'source': 'Data\\doc.2.txt'}, page_content='\n    Cloud-Native Microservices and Scalability\n\n    Modern backend architectures often uti

In [22]:
#Text Splliting 
text_splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=0 , length_function = len , separators=[" "] )

chunk = text_splitter.split_documents(documents)

In [23]:
chunk

[Document(metadata={'source': 'Data\\doc.0.txt'}, page_content='Retrieval-Augmented Generation (RAG) and Vector Databases\n\n    RAG systems enhance Large'),
 Document(metadata={'source': 'Data\\doc.0.txt'}, page_content='Language Models by retrieving relevant data from external sources like ChromaDB or FAISS. \n    By'),
 Document(metadata={'source': 'Data\\doc.0.txt'}, page_content='converting documents into vector embeddings, the system can perform semantic searches to find'),
 Document(metadata={'source': 'Data\\doc.0.txt'}, page_content="context \n    that a model wasn't originally trained on, reducing hallucinations."),
 Document(metadata={'source': 'Data\\doc.1.txt'}, page_content='Agentic AI and Autonomous Workflows\n\n    Agentic AI refers to systems designed to use tools and'),
 Document(metadata={'source': 'Data\\doc.1.txt'}, page_content="make decisions to achieve a goal. \n    Unlike standard chatbots, AI agents can use 'Reasoning and"),
 Document(metadata={'source': 'Data

In [24]:
#Embedding Models 
embeddings = OpenAIEmbeddings()


In [27]:
#Chroma DB Vector Store 
persistence_directory = "./chrom_db"

vectorStore = Chroma.from_documents(
    documents=chunk,
    embedding=OpenAIEmbeddings(),
    persist_directory=persistence_directory,
    collection_name="rag_collection"
)

In [32]:
#Text Similarity Search 
query = "What refers to Agentic AI?"

similar_docs = vectorStore.similarity_search(query , k=3)
similar_docs

[Document(id='fed22f43-38e9-4be4-8e35-d2ec3e2d5f9d', metadata={'source': 'Data\\doc.1.txt'}, page_content='Agentic AI and Autonomous Workflows\n\n    Agentic AI refers to systems designed to use tools and'),
 Document(id='b7ed2ebe-41fc-444e-88e4-c2fc37b0dcab', metadata={'source': 'Data\\doc.3.txt'}, page_content='Natural Language Processing with NLTK and Transformers\n\n    Natural Language Processing (NLP)'),
 Document(id='0a3feb73-abe5-4cf1-bbb1-bb50110b138f', metadata={'source': 'Data\\doc.1.txt'}, page_content="make decisions to achieve a goal. \n    Unlike standard chatbots, AI agents can use 'Reasoning and")]

In [33]:
#Advacne similarity search 
score = vectorStore.similarity_search_with_score(query , k =3)
score

[(Document(id='fed22f43-38e9-4be4-8e35-d2ec3e2d5f9d', metadata={'source': 'Data\\doc.1.txt'}, page_content='Agentic AI and Autonomous Workflows\n\n    Agentic AI refers to systems designed to use tools and'),
  0.2739553451538086),
 (Document(id='b7ed2ebe-41fc-444e-88e4-c2fc37b0dcab', metadata={'source': 'Data\\doc.3.txt'}, page_content='Natural Language Processing with NLTK and Transformers\n\n    Natural Language Processing (NLP)'),
  0.4770853817462921),
 (Document(id='0a3feb73-abe5-4cf1-bbb1-bb50110b138f', metadata={'source': 'Data\\doc.1.txt'}, page_content="make decisions to achieve a goal. \n    Unlike standard chatbots, AI agents can use 'Reasoning and"),
  0.4980132281780243)]

In [34]:
from langchain_groq import ChatGroq

In [36]:
llm = ChatGroq(
    model= "qwen/qwen3-32b",
    temperature= 0.2,
    max_tokens=500
)

In [39]:
llm.invoke("What is Large Language Model")

AIMessage(content="<think>\nOkay, the user is asking what a large language model is. First, I need to define it clearly. A large language model is a type of artificial intelligence that processes and generates human-like text. It's based on deep learning techniques, especially neural networks.\n\nNext, I should explain the key components. They are trained on vast amounts of text data, which allows them to understand grammar, context, and even some reasoning. The training process involves predicting the next word in a sentence, which helps them learn patterns and relationships between words.\n\nThen, I should mention the different architectures, like the Transformer model, which is widely used. Transformers use attention mechanisms to handle long-range dependencies in text, making them more efficient and effective than older models like RNNs.\n\nApplications are important to highlight. They can be used for tasks like translation, summarization, question-answering, and even creative writ

In [40]:
llm = init_chat_model("groq:qwen/qwen3-32b")


In [41]:
#Rag Chain
from langchain_classic.chains import create_retrieval_chain
from langchain_core.prompts import ChatMessagePromptTemplate
from langchain_classic.chains.combine_documents import create_stuff_documents_chain