In [31]:
# Agent for content generation using AWS Bedrock
# AI agent will save the content in a database
# AI agent will generate content based on the data a lambda function will pass 

In [32]:
# Configuration AWS
BEDROCK_MODEL_ID = "anthropic.claude-3-haiku-20240307-v1:0"
BEDROCK_EMBEDDING_MODEL_ID = "amazon.titan-embed-text-v2:0"
AWS_REGION = "us-east-1"

In [33]:
# Testing AWS Client
from langchain_aws import ChatBedrock

test_llm = ChatBedrock(
    model_id = BEDROCK_MODEL_ID,
    region_name = AWS_REGION,
    model_kwargs = {
        "max_tokens": 100,
        "temperature": 0.7,
        "top_p": 0.9
    }
)
test_response = test_llm.invoke("What are the benefits of using AWS cloud services? in 1 sentence only.")
print("Response:", test_response.content)
print()

Response: The benefits of using AWS cloud services include scalability, cost-effectiveness, reliability, security, and a wide range of services and tools to support various business needs.



In [34]:
# Configuring ChromaDB & Others

# ChromaDB persistent storage path
CHROMA_DB_PATH = "./vector_db"

# PDF files directory
PDF_DATA_PATH = "./.docs"

# Text splitting configuration
# Size of each text chunk
CHUNK_SIZE = 800      
# Overlap between chunks for context continuity
CHUNK_OVERLAP = 150    


In [35]:
#  Configuration Loading
print("Configuration loaded")
print(f"   LLM: {BEDROCK_MODEL_ID}")
print(f"   Embeddings: {BEDROCK_EMBEDDING_MODEL_ID}")
print(f"   Vector DB: {CHROMA_DB_PATH}")
print(f"   PDFs: {PDF_DATA_PATH}")

Configuration loaded
   LLM: anthropic.claude-3-haiku-20240307-v1:0
   Embeddings: amazon.titan-embed-text-v2:0
   Vector DB: ./vector_db
   PDFs: ./.docs


In [36]:
#  Loading PDFs files
from langchain_community.document_loaders import PyPDFLoader
import os


def load_documents(path: str):
    docs = []
    
    # Loop through every file in the directory
    for file in os.listdir(path):
        # Only process PDF files
        if file.endswith(".pdf"):
            file_path = os.path.join(path, file)
            print(f"   Loading: {file}")
            
            # PyPDFLoader extracts text from each page
            loader = PyPDFLoader(file_path)
            docs.extend(loader.load())
    
    return docs

# Load all PDFs
print("Loading PDF documents...")
documents = load_documents(PDF_DATA_PATH)
print(f"Loaded {len(documents)} pages from PDFs")

Loading PDF documents...
   Loading: Activation Functions.pdf
Loaded 10 pages from PDFs


In [37]:
# Splitting Text into Chunks
from langchain_text_splitters import RecursiveCharacterTextSplitter

def split_documents(documents: list):

    splitter = RecursiveCharacterTextSplitter(
        chunk_size=CHUNK_SIZE,        # Max characters per chunk
        chunk_overlap=CHUNK_OVERLAP   # Overlapping chars between chunks
    )

    return splitter.split_documents(documents)

# Split documents into chunks
print("Splitting documents into chunks...")
chunks = split_documents(documents)
print(f"Created {len(chunks)} chunks")
print(f"   Sample chunk preview: {chunks[0].page_content[:100]}...")


Splitting documents into chunks...
Created 25 chunks
   Sample chunk preview: Introduction to Activation Functionsin Neural Networks
Gain an understanding of common activation fu...


In [38]:
# Embedding + Vector Database

from langchain_aws import BedrockEmbeddings
from langchain_chroma import Chroma

def build_vector_store(chunks: list):
    embedding = BedrockEmbeddings(
        model_id=BEDROCK_EMBEDDING_MODEL_ID,
        region_name=AWS_REGION
    )
    
    # Create ChromaDB vector store from documents
    # This embeds all chunks and stores them persistently
    db = Chroma.from_documents(
        documents=chunks,
        embedding=embedding,
        persist_directory=CHROMA_DB_PATH
    )
    
    return db

# Build and persist vector store
print("Creating embeddings and storing in ChromaDB...")
vectorstore = build_vector_store(chunks)
print(f"Vector store created at: {CHROMA_DB_PATH}")

Creating embeddings and storing in ChromaDB...
Vector store created at: ./vector_db


In [39]:
# Retriever Setup

def get_retriever():
    # Load the persisted vector store
    db = Chroma(
        persist_directory=CHROMA_DB_PATH,
        embedding_function=BedrockEmbeddings(
        model_id=BEDROCK_EMBEDDING_MODEL_ID,
        region_name=AWS_REGION
    )
    )
    
    # Create retriever that returns top 5 most similar chunks
    return db.as_retriever(search_kwargs={"k": 5})

# Initialize retriever
print("Initializing retriever...")
retriever = get_retriever()
print("Retriever ready for similarity search")

Initializing retriever...
Retriever ready for similarity search


In [40]:
# Structured JSON System Prompt

SYSTEM_PROMPT = """
You are a Machine Learning  Assistant.
Use the user input, retrieved PDF content and respond ONLY in valid JSON format.

Your response MUST be a valid JSON object with this exact structure:

{
  "title": "Topic title based on the question",
  "content": "Detailed explanation from the PDF content",
  "summary": "A concise 2-3 sentence summary",
  "facts": "Key facts, figures, and specifications",
  "quiz": {
    "topic": "Quiz topic",
    "questions": [
      {
        "question": "Question text",
        "options": ["A) ...", "B) ...", "C) ...", "D) ..."],
        "correct_answer": "A"
      }
    ]
  },
  "key_notes": {
   "1.":"points should be of 1 sentence,and should be in short",
   "2.":"",
   "3.":""
  }
}

IMPORTANT RULES:
1. ONLY output valid JSON - no markdown, no explanations outside JSON
2. Base ALL content on the retrieved PDF context and the user query
3. If information is not found, use null or empty arrays []
4. Generate 2-3 quiz questions to test understanding
5. Extract any formulas, definitions, and key terms
"""

print(" System prompt configured for structured JSON output")

 System prompt configured for structured JSON output


In [41]:
# AGENT SETUP
from langchain_aws import ChatBedrock
from langchain.agents import create_agent
from langchain.tools import tool

# PDF Retriever Tool
@tool
def retrieve_context(query: str) -> str:
    """Search technical PDF documents for relevant information about Machine Learning."""
    docs = retriever.invoke(query)
    return "\n\n---\n\n".join(d.page_content for d in docs)

# Bedrock LLM 
bedrock_llm = ChatBedrock(
    model_id=BEDROCK_MODEL_ID,
    region_name=AWS_REGION,
    model_kwargs={
        "max_tokens": 10000,
        "temperature": 0.4,
        "top_p": 0.9
    }
)

# Create Agent
agent = create_agent(
    model=bedrock_llm,
    tools=[retrieve_context],
    system_prompt=SYSTEM_PROMPT
)

print("Agent initialized successfully!")

Agent initialized successfully!


In [None]:
# Run Agent Query

query = """Activation functions are fundamental components of artificial neural networks that determine whether and how strongly a neuron should fire based on its input. They introduce non-linearity into the network, enabling it to learn complex patterns and solve problems that linear models cannot handle.
The Role of Activation Functions
In a neural network, each neuron receives weighted inputs, sums them together, and passes this sum through an activation function to produce an output. Without activation functions, neural networks would simply be stacked linear transformations, no matter how many layers they contain. The activation function is what allows networks to approximate complex, non-linear relationships in data.
Common Activation Functions
Several activation functions have been developed over the years, each with distinct characteristics. The sigmoid function, one of the earliest used, squashes input values between 0 and 1, making it useful for binary classification. However, it suffers from the vanishing gradient problem, where gradients become extremely small during backpropagation, slowing down learning in deep networks.
The hyperbolic tangent (tanh) function addresses some sigmoid limitations by centering outputs around zero, ranging from -1 to 1. While this often leads to faster convergence, it still experiences vanishing gradients for extreme input values.
The Rectified Linear Unit (ReLU) revolutionized deep learning when it became widely adopted. It simply outputs the input if positive, and zero otherwise. ReLU is computationally efficient and helps mitigate vanishing gradients, though it can suffer from "dying ReLU" where neurons become permanently inactive. Variants like Leaky ReLU and Parametric ReLU were developed to address this by allowing small negative values.
More recent innovations include ELU (Exponential Linear Unit), which smooths the function for negative values, and Swish, a self-gated activation function developed by Google that has shown improved performance in some deep networks.
Choosing the Right Activation Function
The choice of activation function depends on the specific problem and network architecture. ReLU and its variants remain the default choice for hidden layers in most deep learning applications due to their effectiveness and computational efficiency. For output layers, the choice depends on the task: sigmoid for binary classification, softmax for multi-class classification, and linear activation for regression problems.
Conclusion
Activation functions are essential for giving neural networks their power and flexibility. As deep learning continues to evolve, researchers continue exploring new activation functions that can train faster, generalize better, and overcome the limitations of existing approaches. Understanding these functions is crucial for anyone working with neural networks, as they directly impact model performance and training dynamics."""

# print(f"Query: {query}")

# Run the agent
response = agent.invoke(
    {"messages": [{"role": "user", "content": query}]}
)


# pprint(response['messages'][1][0], width=120)



In [65]:
import json
from pprint import pprint

# Get the AIMessage object
ai_msg = response["messages"][1]

# Extract JSON string
json_str = ai_msg.content

# Convert string into dict
data = json.loads(json_str)

# Pretty print
pprint(data, width=120)



{'content': 'Activation functions are fundamental components of artificial neural networks that determine whether and '
            'how strongly a neuron should fire based on its input. They introduce non-linearity into the network, '
            'enabling it to learn complex patterns and solve problems that linear models cannot handle. Without '
            'activation functions, neural networks would simply be stacked linear transformations, no matter how many '
            'layers they contain. The activation function is what allows networks to approximate complex, non-linear '
            'relationships in data.\n'
            '\n'
            'Several activation functions have been developed over the years, each with distinct characteristics. The '
            'sigmoid function squashes input values between 0 and 1, making it useful for binary classification, but '
            'it suffers from the vanishing gradient problem. The hyperbolic tangent (tanh) function addresses some '

In [None]:
# Extract JSON string
mdata = ai_msg.additional_kwargs

# Pretty print
pprint(mdata, width=120)

{'model_id': 'anthropic.claude-3-haiku-20240307-v1:0',
 'stop_reason': 'end_turn',
 'usage': {'cache_read_input_tokens': 0,
           'cache_write_input_tokens': 0,
           'completion_tokens': 1139,
           'prompt_tokens': 1199,
           'total_tokens': 2338}}


In [67]:
# Checkinh format
print(data['content'])

Activation functions are fundamental components of artificial neural networks that determine whether and how strongly a neuron should fire based on its input. They introduce non-linearity into the network, enabling it to learn complex patterns and solve problems that linear models cannot handle. Without activation functions, neural networks would simply be stacked linear transformations, no matter how many layers they contain. The activation function is what allows networks to approximate complex, non-linear relationships in data.

Several activation functions have been developed over the years, each with distinct characteristics. The sigmoid function squashes input values between 0 and 1, making it useful for binary classification, but it suffers from the vanishing gradient problem. The hyperbolic tangent (tanh) function addresses some sigmoid limitations by centering outputs around zero, ranging from -1 to 1, but it still experiences vanishing gradients for extreme input values. The 