In [6]:
import os
from dotenv import load_dotenv
load_dotenv()
from langchain.chat_models import init_chat_model
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import TextLoader
from langchain_openai import OpenAIEmbeddings
from langchain_core.documents import Document
from langchain_chroma import Chroma
import numpy as np
from typing import List
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
os.environ['GROQ_API_KEY'] = os.getenv("GROQ_API_KEY")
model = init_chat_model("groq:qwen/qwen3-32b")


In [7]:
sample_docs = [
    """
    Retrieval-Augmented Generation (RAG) and Vector Databases
    
    RAG systems enhance Large Language Models by retrieving relevant data from external sources like ChromaDB or FAISS. 
    By converting documents into vector embeddings, the system can perform semantic searches to find context 
    that a model wasn't originally trained on, reducing hallucinations.
    """,
    
    """
    Agentic AI and Autonomous Workflows
    
    Agentic AI refers to systems designed to use tools and make decisions to achieve a goal. 
    Unlike standard chatbots, AI agents can use 'Reasoning and Acting' (ReAct) patterns to 
    call APIs, search the web, or execute code independently to complete multi-step tasks.
    """,
    
    """
    Cloud-Native Microservices and Scalability
    
    Modern backend architectures often utilize Spring Boot and Docker to create microservices. 
    Deploying these on AWS using services like EKS or Lambda allows for elastic scaling. 
    API Gateways act as the entry point, routing traffic to specific services like Product, Order, or User modules.
    """,
    
    """
    Natural Language Processing with NLTK and Transformers
    
    Natural Language Processing (NLP) involves the interaction between computers and human languages. 
    Libraries like NLTK are used for basic tokenization and stop-word removal, while Transformer-based 
    models like BERT or GPT handle complex tasks like sentiment analysis and language translation.
    """
]

In [8]:
sample_docs

["\n    Retrieval-Augmented Generation (RAG) and Vector Databases\n\n    RAG systems enhance Large Language Models by retrieving relevant data from external sources like ChromaDB or FAISS. \n    By converting documents into vector embeddings, the system can perform semantic searches to find context \n    that a model wasn't originally trained on, reducing hallucinations.\n    ",
 "\n    Agentic AI and Autonomous Workflows\n\n    Agentic AI refers to systems designed to use tools and make decisions to achieve a goal. \n    Unlike standard chatbots, AI agents can use 'Reasoning and Acting' (ReAct) patterns to \n    call APIs, search the web, or execute code independently to complete multi-step tasks.\n    ",
 '\n    Cloud-Native Microservices and Scalability\n\n    Modern backend architectures often utilize Spring Boot and Docker to create microservices. \n    Deploying these on AWS using services like EKS or Lambda allows for elastic scaling. \n    API Gateways act as the entry point, r

In [10]:
#Save sample
import tempfile
temp_dir = tempfile.mkdtemp()


for i , doc in enumerate(sample_docs):
    with open(f"doc.{i}.txt", "w") as f:
        f.write(doc)


In [None]:
#Document Loading
from langchain_community.document_loaders import DirectoryLoader

loader = DirectoryLoader(
    "Data",
    glob ="*.txt",
    loader_cls=TextLoader,
    loader_kwargs={'encoding':'utf-8'}
)

In [12]:
loader

<langchain_community.document_loaders.directory.DirectoryLoader at 0x211c699dfd0>

In [14]:
documents = loader.load()

In [15]:
documents

[]