In [None]:
# 📦 Clean up conflicting packages from Kaggle's base environment
# These packages either conflict with our dependencies (e.g., Google Generative AI, LangGraph),
# or are not needed for this project (e.g., kfp, spacy, fastai, ydata-profiling).
!pip uninstall -qqy kfp jupyterlab libpysal thinc spacy fastai ydata-profiling google-cloud-bigquery google-generativeai

# 🚀 Install specific versions of LangGraph and related packages used in this notebook
# - langgraph: the core library for agent graphs and orchestration
# - langgraph-prebuilt: prebuilt components like ToolNode
# - langchain-google-genai: LangChain integration with Gemini models
!pip install -qU 'langgraph==0.3.21' 'langchain-google-genai==2.1.2' 'langgraph-prebuilt==0.1.7'


In [None]:
# ⚠️ THIS CELL MAY SHOW SOME DEPENDENCIES ERRORS, PLEASE IGNORE AS IT DOES NOT IMPACT THE REST OF THE NOTEBOOK
# 📚 Install additional packages used for RAG and document processing

# chromadb: lightweight vector database for storing and querying embeddings
!pip install -qU "chromadb==0.6.3"

# tqdm: for progress bars during indexing or loops (used in embedding steps)
!pip install tqdm

# pymupdf: for parsing and extracting text + metadata (like page numbers) from PDF documents
!pip install pymupdf


In [None]:
!pip install gradio

In [None]:
# 🔐 Load and set the Google API key from Kaggle secrets

import os
from kaggle_secrets import UserSecretsClient

# Retrieve the API key securely from Kaggle's secret manager
GOOGLE_API_KEY = UserSecretsClient().get_secret("GOOGLE_API_KEY")

# Set the key as an environment variable so it can be picked up by client libraries like Gemini or LangChain
os.environ["GOOGLE_API_KEY"] = GOOGLE_API_KEY


# RAG

🔍 **Why and How RAG is Used in This Project**

In this project, Retrieval-Augmented Generation (RAG) is used to provide accurate, grounded answers about the ElevatePro community — including its mission, values, events, and internal guidelines. Rather than relying solely on the LLM’s general knowledge, RAG allows the assistant to retrieve relevant context from official community documents stored in a **ChromaDB** vector database. To optimize retrieval quality, three different chunking strategies were tested: **fixed-size chunking**, **one chunk per document**, and **semantic chunking** using the unstructured package. This enabled experimentation with how different document structures affect answer accuracy. When a user asks a question, the assistant queries the indexed chunks to find the most relevant passages and uses them to craft informed, source-backed responses.

## Indexing

🧠 **Indexing Phase of RAG**

The indexing phase is a crucial step in the RAG workflow where source documents are preprocessed, chunked, and converted into numerical vector representations (embeddings). These embeddings capture the semantic meaning of each chunk and are stored in a ChromaDB collection for fast similarity-based retrieval. Depending on the chunking strategy used (fixed-size, full-doc, or semantic), the granularity of stored knowledge varies. This phase ensures that when a user asks a question, the system can quickly retrieve the most relevant pieces of context to support an accurate and grounded response.

In [None]:
# ⚠️ This might show dependency warnings/errors — they can be safely ignored.
# 💡 Uncomment the line below if you want to experiment with semantic chunking using the `unstructured` library.
# It enables automatic extraction of logical sections from PDF files (e.g., headings, paragraphs, etc.)

# !pip install "unstructured[pdf]"

In [None]:
import fitz  # PyMuPDF for PDF reading
import uuid  # For generating unique IDs for chunks
import re    # For regex-based emoji removal
from IPython.display import display, Markdown, clear_output
# from unstructured.partition.pdf import partition_pdf  # Optional semantic chunking

# 🧼 Utility to remove emojis from text to keep embeddings clean and consistent
def remove_emojis(text):
    """
    Function: remove_emojis
    Description: Removes emojis and special Unicode symbols from the input text.
                 Useful to clean content before generating embeddings.
    """
    emoji_pattern = re.compile(
        "["

        "\U0001F600-\U0001F64F"  # Emoticons
        "\U0001F300-\U0001F5FF"  # Symbols & pictographs
        "\U0001F680-\U0001F6FF"  # Transport & map symbols
        "\U0001F1E0-\U0001F1FF"  # Flags
        "\U00002700-\U000027BF"  # Dingbats
        "\U000024C2-\U0001F251"  # Enclosed characters
        "🤝" "🧠" "🟦" "🟪" "🧭"   # Specific additional emojis
        "]+", flags=re.UNICODE)
    return emoji_pattern.sub(r'', text)


# 📏 Utility to split long text into smaller overlapping chunks for embedding
def split_text_into_chunks(text, chunk_size=500, overlap=50):
    """
    Function: split_text_into_chunks
    Description: Splits input text into overlapping chunks of approximately `chunk_size` characters,
                 with `overlap` characters shared between chunks to preserve context continuity.
    """
    words = text.split()
    chunks = []
    current_chunk = []
    current_length = 0

    for word in words:
        word_len = len(word) + 1  # Include space
        if current_length + word_len > chunk_size:
            chunks.append(" ".join(current_chunk))
            # Add overlap: keep the last N words from previous chunk
            overlap_words = current_chunk[-(overlap // 5):]  # Rough estimate: 5 chars per word
            current_chunk = overlap_words + [word]
            current_length = sum(len(w)+1 for w in current_chunk)
        else:
            current_chunk.append(word)
            current_length += word_len

    if current_chunk:
        chunks.append(" ".join(current_chunk))

    return chunks


# 📄 Extracts chunks from a PDF file, either one big chunk per doc or multiple per page
def extract_chunks_from_pdf(filepath, chunk_size=500, overlap=50, one_chunk_per_doc=False):
    """
    Function: extract_chunks_from_pdf
    Description: Parses a PDF and returns a list of chunks with associated metadata.
                 Supports two modes:
                 - one_chunk_per_doc: treat the entire PDF as a single chunk.
                 - per-page chunking with optional overlap.
    """
    doc = fitz.open(filepath)
    filename = filepath.split('/')[-1]
    chunks = []

    if one_chunk_per_doc:
        # Combine all pages into one large cleaned chunk
        full_text = ""
        for page in doc:
            full_text += page.get_text() + "\n"
        full_text = remove_emojis(full_text)
        chunks.append({
            "id": str(uuid.uuid4()),
            "text": full_text.strip(),
            "metadata": {
                "filename": filename,
                "page_number": -1,  # Indicates full-document chunk
            }
        })
    else:
        # Chunk per page with optional overlap
        for page_num in range(len(doc)):
            page = doc[page_num]
            text = page.get_text()
            clean_text = remove_emojis(text)
            page_chunks = split_text_into_chunks(clean_text, chunk_size, overlap)

            for chunk in page_chunks:
                chunks.append({
                    "id": str(uuid.uuid4()),
                    "text": chunk,
                    "metadata": {
                        "filename": filename,
                        "page_number": page_num + 1,
                    }
                })

    return chunks


# 🧠 Semantic chunking using the `unstructured` library (optional, more intelligent splitting)
def extract_unstructured_chunks(pdf_path: str, source_name: str = None):
    """
    Function: extract_unstructured_chunks
    Description: Uses the `unstructured` library to parse PDF documents into semantically meaningful
                 sections (titles, narrative text, tables, etc.), capturing rich metadata like page number and category.
    """
    elements = partition_pdf(filename=pdf_path)  # Requires unstructured[pdf]
    chunks = []

    for i, el in enumerate(elements):
        if el.text.strip():  # Only include non-empty sections
            chunk = {
                "id": str(uuid.uuid4()),
                "text": el.text.strip(),
                "metadata": {
                    "type": el.category,  # E.g., Title, NarrativeText, ListItem
                    "filename": source_name or pdf_path.split("/")[-1],
                    "element_index": i,
                    'page_number': el.metadata.page_number or -1
                }
            }
            chunks.append(chunk)

    return chunks


### Document Chunking

In [None]:
# 📌 Choose your document chunking strategy for indexing
# Options:
# - "semantic_chunking": uses the `unstructured` library to detect logical sections
# - "one_chunk_per_doc": treats the entire PDF as a single chunk (simpler, fewer embeddings)
# - "fixed_chunking": splits text into fixed-size chunks with overlap (preserves context flow)

chuncking_startegy = "one_chunk_per_doc"  # 👈 current strategy in use

# ✅ Confirm strategy choice
print(f"📂 Chunking strategy selected: {chuncking_startegy}")

In [None]:
import kagglehub

# 📥 Download the community document dataset from Kaggle Hub
# This retrieves a dataset named 'community-docs' from the user's Kaggle account

community_docs_path = kagglehub.dataset_download('riadbensalem/community-docs')
print(community_docs_path)

In [None]:
from pathlib import Path

chunks = []

# 🔍 Get list of all PDF files in the /kaggle/input/community-docs directory
pdf_dir = Path(community_docs_path)  # Ensure it's a Path, not string
pathlist = list(pdf_dir.glob("*.pdf"))

# 🚨 Safety check: make sure there are PDFs to process
if not list(pathlist):
    raise FileNotFoundError("❌ No PDF files found in /kaggle/input/community-docs. Please upload at least one.")

# 📦 Loop through each PDF and apply the selected chunking strategy
for path in pathlist:
    print("📄 Chunking File:", str(path).split('/')[-1])

    if chuncking_startegy == "one_chunk_per_doc":
        one_chunk_per_doc = True
        # Entire PDF is treated as a single chunk
        chunks.append(extract_chunks_from_pdf(str(path), one_chunk_per_doc=one_chunk_per_doc))

    if chuncking_startegy == "fixed_chunking":
        one_chunk_per_doc = False
        # Chunk per page with overlap
        chunks.append(extract_chunks_from_pdf(str(path), one_chunk_per_doc=one_chunk_per_doc))

    if chuncking_startegy == "semantic_chunking":
        # Use unstructured to extract semantic elements (title, paragraphs, etc.)
        chunks.append(extract_unstructured_chunks(str(path), str(path).split('/')[-1]))

# 🔄 Flatten the nested list of chunks into a single list
chunks_flatten = [chunk for pdf in chunks for chunk in pdf]


### Embedding Vectors Computation

In [None]:
from chromadb import Documents, EmbeddingFunction, Embeddings
from langchain_google_genai import GoogleGenerativeAIEmbeddings

class GeminiEmbeddingFunction(EmbeddingFunction):
    """
    Class: GeminiEmbeddingFunction
    Description:
        Custom embedding function to integrate Gemini embeddings into ChromaDB.
        Allows toggling between document embedding and query embedding modes.
    """
    document_mode = True  # Toggle for document vs. query embedding

    def __init__(self):
        """
        Function: __init__
        Description:
            Initializes the LangChain embedding wrapper for Gemini's text-embedding-004 model.
        """
        self.embedder = GoogleGenerativeAIEmbeddings(
            model="models/text-embedding-004"
        )

    def __call__(self, input: Documents) -> Embeddings:
        """
        Function: __call__
        Description:
            Generates embeddings from a list of input documents or queries.
            If document_mode is True, calls embed_documents().
            If document_mode is False, calls embed_query() for each item individually.
        """
        if self.document_mode:
            return self.embedder.embed_documents(input)  # Batch embed documents
        else:
            return [self.embedder.embed_query(q) for q in input]  # Embed queries one-by-one


In [None]:
import chromadb
from tqdm import tqdm

# 🔌 Initialize the Gemini embedding function for ChromaDB
embed_fn = GeminiEmbeddingFunction()
embed_fn.document_mode = True  # Ensures we're embedding documents, not queries

# ⚙️ Create a ChromaDB client and choose collection name based on selected strategy
chroma_client = chromadb.Client()

if chuncking_startegy == "fixed_chunking":
    # For fixed-size overlapping chunks
    collection = chroma_client.get_or_create_collection("rag_chunks", embedding_function=embed_fn)

if chuncking_startegy == "one_chunk_per_doc":
    # For full-document chunks
    collection = chroma_client.get_or_create_collection("rag_docs", embedding_function=embed_fn)

if chuncking_startegy == "semantic_chunking":
    # For semantically chunked content
    collection = chroma_client.get_or_create_collection("rag_chunks_semantic", embedding_function=embed_fn)


# 📥 Batching function to index chunks into ChromaDB in batches (avoids rate limits)
def batch_add_to_chromadb(collection, chunks, batch_size=100):
    """
    Function: batch_add_to_chromadb
    Description:
        Adds chunks to a ChromaDB collection in batches. Each chunk includes an ID, the text content,
        and associated metadata. This avoids hitting embedding API limits and speeds up bulk indexing.

    Args:
        collection: The ChromaDB collection to add documents to.
        chunks (list): A list of chunk dictionaries with 'id', 'text', and 'metadata'.
        batch_size (int): Number of chunks to process per batch (default is 100).
    """
    for i in tqdm(range(0, len(chunks), batch_size), desc="Indexing into ChromaDB"):
        batch = chunks[i:i + batch_size]
        collection.add(
            documents=[c["text"] for c in batch],
            metadatas=[c["metadata"] for c in batch],
            ids=[c["id"] for c in batch],
        )

# 🚀 Start indexing all extracted and flattened chunks into ChromaDB
batch_add_to_chromadb(collection, chunks_flatten)


## Retrieval Test

📥 **Retrieval Phase of RAG**

In the retrieval phase of the RAG workflow, user queries are transformed into vector embeddings and compared against pre-indexed document chunks stored in ChromaDB. This semantic search process identifies the most relevant pieces of content based on meaning, not just keyword matches. The top results — typically the closest chunks in vector space — are then returned along with their metadata (such as source file and confidence score). These retrieved chunks form the external knowledge grounding the final answer generated by the LLM.

In [None]:
# 🔄 Switch the embedding function to query mode
# This ensures embeddings are generated using the correct task type (retrieval_query)
embed_fn.document_mode = False

# 🔍 Define a sample user query to test semantic retrieval from the vector database
query = "what is the mission of the community?"

# 🔎 Perform semantic search in the ChromaDB collection
# It returns the top 2 most relevant chunks based on cosine similarity
result_docs = collection.query(query_texts=[query], n_results=2)


In [None]:
print(collection.count())
print(result_docs["documents"])

# AI Agent

### 🤖 AI Agent in This Project

In this project, an AI agent is built using the **LangGraph** framework, which enables the orchestration of an intelligent, tool-using assistant. The agent is designed to act as an onboarding and support assistant for a professional community. It can engage in natural conversations, respond to user questions, and dynamically decide when to call tools based on intent.

The agent leverages two main tools:
1. **`get_info`**: A Retrieval-Augmented Generation (RAG) tool that queries a vector database to answer questions about the community (e.g., its mission, values, guidelines).
2. **`score_application`**: A tool that uses the LLM to evaluate membership applications by scoring responses based on predefined criteria.
3. **`check_new_applications`**: A utility tool that connects to a Google Sheet where application forms are submitted. It detects and evaluates only **new applications** (avoiding duplicates), then displays a color-coded summary of the results in a Markdown table and exports the data to a downloadable CSV file.

Using **LangGraph's node-based architecture**, the agent handles message history, routes logic conditionally (e.g., chat → tool → response), and maintains state across interactions. It allows both user-driven and model-driven tool invocation, making it capable of reasoning, calling functions, and continuing conversation — all while grounded in real data.

This makes the AI agent not just a chatbot, but a structured decision-making assistant that can automate onboarding tasks and support community growth intelligently.


In [None]:
from google.genai import types
import typing_extensions as typing
import enum
from pprint import pprint
import json
import re

class Verdict(enum.Enum):
    """
    Class: Verdict
    Description:
        Enumeration of possible outcomes when evaluating a community membership application.
        The values are:
        - APPROVE: The applicant is a strong fit and should be accepted.
        - REVIEW: The applicant might be a fit, but requires human review.
        - REJECT: The applicant is not a fit for the community.
    """
    APPROVE = "approve"
    REVIEW = "review"
    REJECT = "reject"


class Score(typing.TypedDict):
    """
    Class: Score
    Description:
        Defines the expected structured output of the LLM when evaluating a community application.
        It ensures consistency in how application scoring is handled and interpreted.

        This schema is used in the `score_application` tool and helps:
        - Enforce a structured format for LLM output
        - Facilitate parsing and validation
        - Guide downstream logic and display (e.g., storing scores, showing feedback)

    Fields:
        - score (int): A numerical score from 1 to 10 based on evaluation criteria.
        - verdict (Verdict): A high-level recommendation (approve, review, reject).
        - reasoning (str): A human-readable explanation of why the given score was assigned.
    """
    score: int
    verdict: Verdict
    reasoning: str


🧠 **Prompt Engineering Techniques Used**

This project combines multiple advanced prompt engineering techniques to guide the behavior of the AI agent. First, **role-based prompting** is used to define the assistant’s identity and goals — as either a community onboarding guide or an application reviewer. To ensure the agent uses external tools effectively, **tool-use prompting** is applied, with clearly labeled tool names, input formats, and specific examples of when to use each tool.

Additionally, **few-shot prompting** is incorporated via examples of both user inputs and assistant responses, helping the model learn how to respond in tool-call format through imitation. The `get_info` tool is guided using a structured **chain-of-thought (CoT)** strategy: the system prompt instructs the agent to break down the user query into topics, extract relevant keywords, and form a retrieval query — encouraging step-by-step reasoning before action.

Finally, the `score_application` tool uses **structured output prompting**, specifying a clear JSON schema and scoring rubric to guide the LLM’s response format. This mix of techniques ensures the agent behaves reliably, reasons clearly, and integrates seamlessly with tools in a LangGraph-driven environment.

In [None]:
from typing import Annotated
from typing_extensions import TypedDict
from langgraph.graph.message import add_messages
from langchain_core.messages import SystemMessage

# 🧠 LangGraph State Definition for Agent
class ReviewState(TypedDict):
    """
    Class: ReviewState
    Description:
        Defines the structure of the shared state used by the LangGraph agent.

        This state allows different nodes (chat, scoring, tools) to share information and update progress
        during the onboarding or application review process.

    Fields:
        - messages: A running history of messages exchanged. Annotated with `add_messages` to allow
                    LangGraph to append messages incrementally between nodes.
        - applications: A list of raw application texts submitted for scoring.
        - scores: A list of structured score objects (`Score`), each produced by the `score_application` tool.
        - conversation_done: A boolean flag indicating whether the user has chosen to end the conversation.
    """
    messages: Annotated[list, add_messages]
    applications: list[str]
    scores: list[Score]
    conversation_done: bool


# 🤖 System Prompt for Onboarding Assistant Agent (used by chatbot node)
AIONBOARD_SYSINT_backup1 = SystemMessage(content="You are an AI Onboarding Assistant for our organization's community.  Your mission is to help and support new members by:\n - Explaining the community’s mission, vision, and core values.\n - Sharing how members can get involved in initiatives, join groups, or contribute to projects.\n - Helping them discover relevant resources, guidelines, and communication channels.\n - Encouraging them to actively engage and contribute to the community.\n\n You have access to three tools and should use them whenever relevant, instead of guessing:\n 1. get_info(query: str): Retrieves authoritative information from community documentation using RAG.  Use this when users ask about the community's goals, onboarding process, contribution opportunities, resources, or policies. This must be your exclusive source of information for these topics or any question related the organization and community.\n When you receive questions about the community or need to provide guidance, follow this reasoning process to decide if and how to use the get_info tool:\n - Step 1: Identify the general topic(s) involved in the user query — e.g. mission, onboarding, contribution, policies, communication platforms, etc.\n - Step 2: Break down the query into specific sub-questions or keywords that best capture the user's intent.\n - Step 3: Rephrase or extract a clean query from the conversation that is optimal for retrieving information.\n - Step 4: Call the get_info tool with that query. Wait for the result, then synthesize a helpful and accurate response using what was retrieved.\n\n 2. score_application: Use the tool score_application to evaluate if a new applicant is a good fit for the community.  The input should include their full application context (background, motivation, interests, etc.).\n\n Be warm, welcoming, and helpful. Avoid guessing — use the tools.\n \n\n Examples:\n User: What are the core goals of this community?\n Assistant: <tool_call>\nget_info(\"core goals of the community\")\n</tool_call>\n\n User: Here's a new application: \"I'm a software engineer passionate about mentoring others and contributing to AI-for-social-impact projects.\"\n Assistant: <tool_call>\nscore_application(\"I'm a software engineer passionate about mentoring others and contributing to AI-for-social-impact projects.\")\n</tool_call>." )

AIONBOARD_SYSINT_backup2 = SystemMessage(content="You are an AI Onboarding Assistant for our organization's community.  Your mission is to help and support new members by:\n - Explaining the community’s mission, vision, and core values.\n - Sharing how members can get involved in initiatives, join groups, or contribute to projects.\n - Helping them discover relevant resources, guidelines, and communication channels.\n - Encouraging them to actively engage and contribute to the community.\n\n You have access to three tools and should use them whenever relevant, instead of guessing:\n 1. get_info(query: str): Retrieves authoritative information from community documentation using RAG.  Use this when users ask about the community's goals, onboarding process, contribution opportunities, resources, or policies. This must be your exclusive source of information for these topics or any question related the organization and community.\n When you receive questions about the community or need to provide guidance, follow this reasoning process to decide if and how to use the get_info tool:\n - Step 1: Identify the general topic(s) involved in the user query — e.g. mission, onboarding, contribution, policies, communication platforms, etc.\n - Step 2: Break down the query into specific sub-questions or keywords that best capture the user's intent.\n - Step 3: Rephrase or extract a clean query from the conversation that is optimal for retrieving information.\n - Step 4: Call the get_info tool with that query. Wait for the result, then synthesize a helpful and accurate response using what was retrieved.\n\n 2. score_application: Use the tool score_application to evaluate if a new applicant is a good fit for the community.  The input should include their full application context (background, motivation, interests, etc.).\n\n Be warm, welcoming, and helpful. Avoid guessing — use the tools.\n 3. check_new_applications: Use this tool when the user asks you to review new applications submitted through the form.You MUST call the tool using this syntax: <tool_call>check_new_applications()</tool_call>❗ Do NOT write Python code or use print() or default_api. Never say: `print(check_new_applications())`. This tool fetches new responses from a Google Sheet, scores them, prints results in a table, and saves to a CSV. \n\n Examples:\n User: What are the core goals of this community?\n Assistant: <tool_call>\nget_info(\"core goals of the community\")\n</tool_call>\n\n User: Here's a new application: \"I'm a software engineer passionate about mentoring others and contributing to AI-for-social-impact projects.\"\n Assistant: <tool_call>\nscore_application(\"I'm a software engineer passionate about mentoring others and contributing to AI-for-social-impact projects.\")\n</tool_call>. \n User: Can you check for new applications and score them? Assistant: <tool_call>check_new_applications()</tool_call>" )

AIONBOARD_SYSINT = SystemMessage(content="You are an AI Onboarding Assistant for our organization's community.  Your mission is to help and support new members by:\n - Explaining the community’s mission, vision, and core values.\n - Sharing how members can get involved in initiatives, join groups, or contribute to projects.\n - Helping them discover relevant resources, guidelines, and communication channels.\n - Encouraging them to actively engage and contribute to the community.\n\n You have access to three tools and should use them whenever relevant, instead of guessing:\n 1. get_info(query: str): Retrieves authoritative information from community documentation using RAG.  Use this when users ask about the community's goals, onboarding process, contribution opportunities, resources, or policies. This must be your exclusive source of information for these topics or any question related the organization and community.\n When you receive questions about the community or need to provide guidance, follow this reasoning process to decide if and how to use the get_info tool:\n - Step 1: Identify the general topic(s) involved in the user query — e.g. mission, onboarding, contribution, policies, communication platforms, etc.\n - Step 2: Break down the query into specific sub-questions or keywords that best capture the user's intent.\n - Step 3: Rephrase or extract a clean query from the conversation that is optimal for retrieving information.\n - Step 4: Call the get_info tool with that query. Wait for the result, then synthesize a helpful and accurate response using what was retrieved.\n\n 2. score_application: Use the tool score_application to evaluate if a new applicant is a good fit for the community.  The input should include their full application context (background, motivation, interests, etc.).\n\n Be warm, welcoming, and helpful. Avoid guessing — use the tools.\n 3. check_new_applications: Use this tool when the user asks you to review new applications submitted through the form.You MUST call the tool using this syntax: <tool_call>check_new_applications()</tool_call>❗ Do NOT write Python code or use print() or default_api. Never say: `print(check_new_applications())`. This tool fetches new responses from a Google Sheet, scores them, prints results in a table, and saves to a CSV. \n\n" )

# 🧠 System Prompt for Application Scoring Agent (used by scoring node)
AISCORER_SYSINT = SystemMessage(content="You are an assistant evaluating applications to join a professional community.  Please read the applicant’s responses and assign a score from 1 to 10 based on the following criteria:  Scoring Criteria:   1. Motivation & Intent (max 4 points):     - Clear and thoughtful reason for joining.     - Alignment with the community’s purpose: collaboration, professional growth, impact     - Strong motivation to learn or contribute.  \n\n  2. Background & Relevance (max 3 points):   - Professional background relevant to the community (e.g., tech, design, entrepreneurship, social impact).     - Experience that could add value to the group.  \n\n  3. Contribution Potential (max 3 points)     - Willingness and clarity in how they want to contribute (e.g., speaking, organizing, mentoring, sharing resources).     - Realistic and actionable contribution ideas.  \n\n  Acceptance Threshold:  - 8-10: Strong fit – recommend approval  - 5-7: Medium fit – recommend review by a human.  - 1-4: Weak fit – recommend rejection or feedback.eak fit – recommend rejection or feedback.  \n\n  Return the result ina valid json format with the fields:   score: X   verdict: Enum of 'approve' or 'review' or 'reject'   Reasoning: string where you explain the scores you gave under each Scoring Criteria")



# 📝 Legacy versions of the system prompts (not actively used)
AIONBOARD_SYSINT_old = ("system", "...")  # Legacy text version of onboarding instruction
AISCORER_SYSINT_old = ("system", "...")   # Legacy text version of scorer instruction


##  LLM Initialization and Tool Registration  

In this section, we define and register the tools available to the AI agent. These tools include:

- `get_info`: A RAG-based tool that retrieves authoritative information from the community’s knowledge base.
- `score_application`: A scoring tool that evaluates user-submitted applications using a structured rubric.

We also initialize two Gemini models:

- `llm_with_tools`: The main conversational model, configured to understand and invoke tools when needed.
- `llm_scorer`: A specialized model that returns structured JSON output for application evaluation.

Finally, we register the tools using LangGraph’s `ToolNode` and bind them to the appropriate LLM. This enables dynamic, context-aware tool calling during conversation, making the agent both interactive and intelligent.

utilities:
- **`parse_score_response`** (utility): Validates and parses the raw response from the LLM scorer into a clean, structured format (`Score`), including type checking and error handling.
- **`google_sheet_to_dataframe_api_key`** (utility): Reads a Google Spreadsheet using an API key and converts it to a pandas DataFrame.

In [None]:
GOOGLE_SHEET_API_KEY = UserSecretsClient().get_secret("GOOGLE_SHEET_API_KEY")

In [None]:
def parse_score_response(response: str) -> Score:
    """
    Function: parse_score_response
    Description:
        Parses a raw string response from the LLM into a structured `Score` TypedDict.

        It removes markdown formatting (e.g., ```json) if present, extracts the JSON payload,
        validates that all expected fields are present, and converts the `verdict` string to an Enum.

    Raises:
        ValueError if the response is not valid JSON or does not match the expected schema.

    Returns:
        Score: A structured object containing the score, verdict, and reasoning.
    """
    try:
        if "```json" in response:
            response = re.search(r"```json\n(.*?)\n```", response, re.DOTALL).group(1)

        data = json.loads(response)

        if not all(key in data for key in ("score", "verdict", "Reasoning")):
            raise ValueError("Missing one or more required keys in response.")

        score = int(data["score"])
        verdict_str = data["verdict"]
        reasoning = data["Reasoning"]

        try:
            verdict = Verdict(verdict_str)
        except ValueError:
            raise ValueError(f"Invalid verdict: {verdict_str}")

        return Score(score=score, verdict=verdict, reasoning=reasoning)

    except (json.JSONDecodeError, ValueError, KeyError) as e:
        raise ValueError(f"Invalid score response format: {e}")

In [None]:
import pandas as pd
import requests

def google_sheet_to_dataframe_api_key(spreadsheet_url, api_key):
    """
    Reads a Google Spreadsheet using an API key and converts it to a pandas DataFrame
    
    Parameters:
    spreadsheet_url (str): The URL of the Google Spreadsheet
    api_key (str): Your Google API key
    
    Returns:
    pandas.DataFrame: The spreadsheet data as a DataFrame
    """
    try:
        # Extract the spreadsheet ID from the URL
        if '/d/' in spreadsheet_url:
            spreadsheet_id = spreadsheet_url.split('/d/')[1].split('/')[0]
        else:
            raise ValueError("Invalid Google Spreadsheet URL format")
        
        # Using the Google Sheets API v4
        url = f"https://sheets.googleapis.com/v4/spreadsheets/{spreadsheet_id}/values/Sheet1?key={api_key}"
        
        # Make the API request
        response = requests.get(url)
        response.raise_for_status()  # Raise an exception for HTTP errors
        
        # Parse the response
        data = response.json()
        
        if 'values' not in data:
            raise ValueError("No values found in the spreadsheet")
            
        # Extract header and data
        headers = data['values'][0]
        rows = data['values'][1:]
        
        # Create a list of dictionaries for each row
        processed_data = []
        for row in rows:
            # Pad the row with None values if it's shorter than headers
            padded_row = row + [None] * (len(headers) - len(row))
            row_dict = dict(zip(headers, padded_row))
            processed_data.append(row_dict)
            
        # Convert to DataFrame
        df = pd.DataFrame(processed_data)
        
        return df
    
    except Exception as e:
        print(f"Error: {str(e)}")
        return None


In [None]:
from langchain_core.tools import tool
from typing import Literal
from langgraph.graph import StateGraph, START, END
from langchain_google_genai import ChatGoogleGenerativeAI
from langgraph.prebuilt import ToolNode
import csv

model_used = "gemini-2.0-flash"

# 🔧 Define the LLM used for scoring (returns structured output based on the Score schema)
llm_scorer = ChatGoogleGenerativeAI(
    model=model_used,
    config=types.GenerateContentConfig(
        temperature=0.1,
        response_mime_type="application/json",  # Expect JSON output
        response_schema=Score,                  # Enforce structured return
    )
)

# 🧠 LLM used by the chatbot node — this one is tool-aware via `bind_tools`
llm = ChatGoogleGenerativeAI(model=model_used)


@tool
def score_application(application: str) -> str:
    """
    Function: score_application
    Description:
        This tool is called by the LLM to evaluate a membership application.
        The input should be a text response covering motivation, background, and contribution intent.
        The tool will generate a structured evaluation using the `Score` schema.

    Parameters:
        application (str): A full application text (answers to 2–3 onboarding questions).

    Returns:
        str: A structured JSON string containing the score, verdict, and reasoning.
    """
    pass  # Placeholder, implemented by a separate node using llm_scorer


@tool
def get_info(query: str) -> str:
    """
    Function: get_info
    Description:
        This is a RAG-based tool that retrieves authoritative information from the community's knowledge base.
        It performs semantic search over embedded documents using ChromaDB and returns the most relevant chunk(s)
        with metadata like file source and confidence.

    Parameters:
        query (str): A user question or topic (e.g., "What are the community values?").

    Returns:
        str: A formatted string of the top matching chunk with confidence and source info.
    """
    if collection:
        embed_fn.document_mode = False  # Switch to query mode

        result = collection.query(query_texts=[query], n_results=1)

        documents = result.get("documents", [[]])[0]
        metadatas = result.get("metadatas", [[]])[0]
        distances = result.get("distances", [[]])[0]

        if not documents:
            return "Sorry, I couldn't find any relevant information in the knowledge base."

        formatted_chunks = []

        for i, (doc, meta, dist) in enumerate(zip(documents, metadatas, distances)):
            confidence = round(1 - dist, 2)
            filename = meta.get("filename", "Unknown Source")
            page_number = meta.get("page_number", "Unknown page number")
            if int(page_number) == -1:
                page_number = "Unknown"

            chunk = (
                f"**Source:** {filename}, at Page {page_number}\n\n"
                f"**Confidence:** {confidence}\n\n"
                f"{doc}"
            )
            formatted_chunks.append(chunk)

        # Optional: Display summary in notebook output
        #display(Markdown(
        #    f"📘 **Retrieved Information:**\n\n"
        #    f"**Source:** {filename}, at Page {page_number}\n\n"
        #    f"**Confidence:** {confidence}\n\n"
        #))

        return "\n\n---\n\n".join(formatted_chunks)


# Memory of previously scored application timestamps
already_scored_ids = set()

@tool
def check_new_applications() -> str:
    """
    Checks a Google Sheet for new community membership applications,
    scores the new ones using the scoring agent, and returns a summary.

    Parameters:
        non needed, hard coded for now

    Returns:
        str: Markdown-formatted summary of the scores
    """
    spreadsheet_url = "https://docs.google.com/spreadsheets/d/1vUs4m_hAaiI04iIWcKDQ7mV1j5LBwJUfxqviB1I6gCg/edit?usp=sharing"
    df = google_sheet_to_dataframe_api_key(spreadsheet_url, GOOGLE_SHEET_API_KEY)

    if df is None or df.empty:
        return "⚠️ Could not load the spreadsheet or it's empty."

    markdown_rows = []
    csv_rows = []
    new_found = False

    for i, row in df.iterrows():
        app_id = row.get("Timestamp")
        if app_id in already_scored_ids:
            continue

        new_found = True

        try:
            full_app = (
                f"1. Why do you want to join?   {row.get('Why do you want to join this community?')}\n"
                f"2. Your current interests or goals?   {row.get('What topics or goals are you most interested in right now?')}\n"
                f"3. Skills or experience?   {row.get('Do you have any experience or skills you’re excited to share with others?')}\n"
                f"4. Preferred participation style?   {row.get('How would you like to participate in the community?')}\n"
                f"5. Support or mentorship needs?   {row.get('What kind of support or mentorship are you looking for?')}"
            )

            messages = [AISCORER_SYSINT, HumanMessage(content=full_app)]
            result = llm_scorer.invoke(messages).content
            parsed = parse_score_response(result)

            already_scored_ids.add(app_id)

            # Verdict emojis
            emoji = {
                "approve": "✅",
                "review": "⚠️",
                "reject": "❌"
            }.get(parsed['verdict'].value, "")

            reasoning_short = parsed['reasoning'].replace("\n", " ").replace("|", "-")[:100] + "..."

            # Markdown row
            markdown_rows.append(
                f"| {row.get('First Name', '')} {row.get('Last Name', '')} "
                f"| {parsed['score']} "
                f"| {emoji} {parsed['verdict'].value} "
                f"| {reasoning_short} |"
            )

            # CSV row
            csv_rows.append({
                "Timestamp": app_id,
                "Name": f"{row.get('First Name')} {row.get('Last Name')}",
                "Score": parsed["score"],
                "Verdict": parsed["verdict"].value,
                "Reasoning": parsed["reasoning"]
            })

        except Exception as e:
            markdown_rows.append(
                f"| Row {i+1} | ❌ | Error | {str(e)} |"
            )

    if not new_found:
        return "✅ No new applications found."

    # Save CSV
    with open("scored_applications.csv", "w", newline='', encoding='utf-8') as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=["Timestamp", "Name", "Score", "Verdict", "Reasoning"])
        writer.writeheader()
        writer.writerows(csv_rows)

    # Print Markdown Table
    markdown_table = (
        "### ✅ Scored Applications\n\n"
        "| Applicant | Score | Verdict | Reasoning |\n"
        "|-----------|-------|---------|-----------|\n"
        + "\n".join(markdown_rows)
        + "\n\n📄 CSV saved as `scored_applications.csv`"
    )

    #display(Markdown(markdown_table))
    return markdown_table



# 🛠️ Register tools with LangGraph
# Auto-tools will be called automatically by ToolNode (e.g., get_info)
auto_tools = [get_info,check_new_applications]
#auto_tools = [get_info]
tool_node = ToolNode(auto_tools)

# Manual tools are handled via custom logic (e.g., score_application routed to scoring node)
other_tools = [score_application]

# 🤖 Bind all tools to the chatbot LLM so it can call them when needed
llm_with_tools = llm.bind_tools(auto_tools + other_tools)


## Nodes  

🧩 **LangGraph Nodes: Chatbot, Human, and Scoring Logic**

This section defines the core **nodes** used by the LangGraph agent, each responsible for a different part of the conversation flow.

- **`chatbot` node**: Handles general conversation and decides when to call tools. It uses a Gemini model (`llm_with_tools`) and follows a system prompt that defines its behavior and responsibilities.
  
- **`score_node`**: Executes when the model invokes the `score_application` tool. It sends the application text to a specialized scorer model, parses the structured output, and adds it to the state.

- **`human_node`**: Manages the notebook interaction loop. It displays the model's message, accepts user input, and determines if the conversation should end.


Each node contributes to enabling an intelligent, tool-using agent that can guide users, retrieve information, and evaluate applications in a human-like, structured workflow.


In [None]:
from langchain_core.messages.ai import AIMessage
from langchain_core.messages.tool import ToolMessage
from langchain_core.messages import HumanMessage
import ipywidgets as widgets




def score_node(state: ReviewState) -> ReviewState:
    """
    Function: score_node
    Description:
        This node is triggered when the `score_application` tool is called by the LLM.

        It extracts the application text from the tool call, sends it to the `llm_scorer` model with a
        system instruction, and parses the structured response using `parse_score_response`.

        The score and explanation are printed in the notebook, and the structured result is returned
        in the updated state.

    Returns:
        ReviewState: Updated state containing the score, tool message, and application content.
    """
    tool_msg = state.get("messages", [])[-1]
    outbound_msgs = []
    parsed_score = None
    application_text = None

    for tool_call in tool_msg.tool_calls:
        if tool_call['name'] == 'score_application':
            application_text = tool_call["args"]["application"]
            messages = [AISCORER_SYSINT, HumanMessage(content=application_text)]
            try:
                response = llm_scorer.invoke(messages)
                response_text = response.content

                parsed_score = parse_score_response(response_text)

                # Create a summary to show in chat (optional)
                summary_msg = AIMessage(content=(
                    f"### ✅ Application Scored\n"
                    f"- **Score**: `{parsed_score['score']}`\n"
                    f"- **Verdict**: `{parsed_score['verdict'].value}`\n"
                    f"- **Reasoning**:\n\n{parsed_score['reasoning']}"
                ))

                outbound_msgs.append(
                    ToolMessage(
                        content=response_text,
                        name=tool_call["name"],
                        tool_call_id=tool_call["id"],
                    )
                )
            except Exception as e:
                error_msg = AIMessage(content=f"❌ Scoring failed: {e}")
                outbound_msgs.append(error_msg)
                raise
        else:
            raise NotImplementedError(f"Unknown tool call: {tool_call['name']}")

    return {
        "messages": outbound_msgs,
        "applications": [application_text],
        "scores": [parsed_score] if parsed_score else [],
    }


def chatbot(state: ReviewState) -> ReviewState:
    """
    Function: chatbot
    Description:
        Core chatbot logic for continuing the conversation. It routes the conversation to
        the Gemini model with access to system instructions and prior messages.

        If there are no previous messages, it starts the conversation with a welcome message.

    Returns:
        ReviewState: Updated state with the assistant's response appended to messages.
    """
    if state["messages"]:
        #new_output = llm_with_tools.invoke([AIONBOARD_SYSINT] + state["messages"])
        # Remove any SystemMessage if present (Gemini doesn't like it here)
        #messages = [m for m in state["messages"] if not isinstance(m, SystemMessage)]
        messages = [m for m in state["messages"]]
        new_output = llm_with_tools.with_config({
            "system_instruction": AIONBOARD_SYSINT
        }).invoke(messages)
    else:
        new_output = AIMessage(content='Hi I am your community friendly AI companion ! How can I help?')

    return state | {"messages": [new_output]}


def human_node(state: ReviewState, user_input: str = None) -> ReviewState:
    """
    Gradio-compatible human input node.
    Adds the user's message to the state if provided.
    """
    if state.get("messages"):
        last_msg = state["messages"][-1]
        if isinstance(last_msg, AIMessage):
            display(Markdown(f"**🤖 Model:**\n\n{last_msg.content}"))

    if not user_input:
        print("⚠️ No user input provided.")
        return state

    if user_input.lower().strip() in {"q", "quit", "exit", "goodbye"}:
        state["conversation_done"] = True

    return state | {"messages": [HumanMessage(content=user_input)]}


def human_node_backup(state: ReviewState) -> ReviewState:
    """
    Function: human_node
    Description:
        This node handles the human interaction loop in a notebook environment.

        It displays the most recent AI message and prompts the user for input. If the user
        types an exit keyword, the conversation is flagged as completed.

    Returns:
        ReviewState: Updated state with the user's message added.
    """
    last_msg = state["messages"][-1]

    if isinstance(last_msg, AIMessage):
        display(Markdown(f"**🤖 Model:**\n\n{last_msg.content}"))
    else:
        print("🤖 Model:", last_msg)

    user_input = input("👤 You: ")

    if user_input.lower().strip() in {"q", "quit", "exit", "goodbye"}:
        state["conversation_done"] = True

    return state | {"messages": [HumanMessage(content=user_input)]}
  

## Routing 

🧭 **Conditional Routing Logic**

This section defines the routing functions that LangGraph uses to determine the next step in the conversation based on the current state.

- **`maybe_exit_human_node`**: Checks if the user wants to quit (e.g., typing "q" or "exit"). If so, it ends the conversation. Otherwise, it continues chatting.

- **`maybe_route_to_tools`**: After the LLM responds, this function decides what should happen next:
  - If the model made a valid tool call → route to `tools` or `scoring`.
  - If there's no tool call → go to `human` for more input.
  - If the user ended the chat → go to `__end__`.

These routes act as decision points that dynamically guide the agent's workflow during each turn of the conversation.


In [None]:
def maybe_exit_human_node(state: ReviewState) -> Literal["chatbot", "__end__"]:
    """
    Function: maybe_exit_human_node
    Description:
        Determines whether the conversation should continue or end after user input.

        If the `conversation_done` flag is True (e.g., user typed "quit"), the graph returns __end__.
        Otherwise, it loops back to the chatbot node to continue the conversation.

    Returns:
        Literal["chatbot", "__end__"]: The next node to route to.
    """
    if state.get("conversation_done", False):
        return END
    else:
        return "chatbot"


def maybe_route_to_tools(state: ReviewState) -> Literal["chatbot", "human","tools", "scoring", "__end__"]:
    """
    Function: maybe_route_to_tools
    Description:
        Controls how the conversation flows after a model response.

        Based on the most recent message, it checks:
        - If the conversation is done → end it.
        - If the model made a tool call → route to `tools` or `scoring`.
        - Otherwise → hand off to `human` for more input.

    Returns:
        Literal: The name of the next node to activate.
    """
    if not (msgs := state.get("messages", [])):
        raise ValueError(f"No messages found when parsing state: {state}")

    msg = msgs[-1]

    if state.get("conversation_done", False):
        return END

    elif hasattr(msg, "tool_calls") and len(msg.tool_calls) > 0:
        # Check if tool call is known and registered in the tool node
        if any(
            tool["name"] in tool_node.tools_by_name.keys() for tool in msg.tool_calls
        ):
            return "tools"
        else:
            return "scoring"

    else:
        return "human" 


## Graph Builder

🧠 **Building the Agent Graph with LangGraph**

This section constructs the full agent workflow as a graph using the `StateGraph` interface from LangGraph.

- Each **node** in the graph represents a step in the conversation flow, such as `chatbot`, `human`, `tools`, or `scoring`.
- Conditional routing logic is added to allow the graph to dynamically transition between nodes based on the conversation state and tool calls.
- After calling a tool or completing a scoring task, the graph routes the result back to the `chatbot` to continue the dialogue.
- The chatbot is set as the starting point of the graph using the `START` edge.

This modular architecture enables structured, multi-turn conversations where the LLM can delegate tasks (like scoring or retrieving knowledge) and loop back naturally.


In [None]:
# 🧱 Initialize a stateful LangGraph based on the ReviewState schema
graph_builder = StateGraph(ReviewState)

# 🔹 Add functional nodes to the graph
graph_builder.add_node("chatbot", chatbot)       # LLM node that handles conversation
graph_builder.add_node("human", human_node)      # Node for interactive user input (notebook)
graph_builder.add_node("scoring", score_node)    # Node for application evaluation
graph_builder.add_node("tools", tool_node)       # Node for auto-invoked tools (e.g., get_info)

# 🔁 Add conditional transitions based on output from nodes
graph_builder.add_conditional_edges("chatbot", maybe_route_to_tools)      # Decide where to go after the model replies
#graph_builder.add_conditional_edges("human", maybe_exit_human_node)       # Decide whether to quit or keep chatting

# 🔄 Route tool responses back to the chatbot
graph_builder.add_edge("tools", "chatbot")
graph_builder.add_edge("scoring", "chatbot")

# 🚀 Define entrypoint: the chatbot is the first node to run
graph_builder.add_edge(START, "chatbot")

# ✅ Compile the graph into a runnable LangGraph instance
chat_graph = graph_builder.compile()


🗺️ **Agent Graph Visualization**

The diagram below visualizes the full LangGraph execution flow used in this AI agent.

- Each box represents a **node** (e.g., chatbot, human, scoring).
- Arrows indicate the **flow of state and messages** between nodes.
- Conditional logic (e.g., routing based on tool calls) determines which path is followed at runtime.

This graph structure enables flexible, multi-step decision-making and clean integration of tools like RAG and scoring.


In [None]:
from IPython.display import Image, display, Markdown

# 🔍 Visualize the compiled LangGraph as a Mermaid diagram (rendered as PNG)
#Image(chat_graph.get_graph().draw_mermaid_png())


### 🧪 Launching the Agent

We now invoke the compiled LangGraph agent with an initial empty state.

- `messages: []` means this is the start of a new conversation.
- The `recursion_limit` ensures the graph doesn't run forever (e.g., in case of unexpected loops).
- The graph begins at the `chatbot` node, which sends a welcome message and waits for user input.

This step effectively "boots up" the AI onboarding assistant and prepares it to handle user interactions in a dynamic, tool-augmented way.


In [None]:
# ⚙️ Optional: Set graph execution limits to prevent infinite loops
#config = {"recursion_limit": 30}

# 🚀 Start the agent graph with an empty message history
# This triggers the chatbot node, which welcomes the user and begins the interaction
#state = chat_graph.invoke({"messages": []}, config)


## Application example for Testing

In [None]:


application = """1. Why do you want to join?  
I want to join to connect with like-minded professionals in tech, share knowledge, and contribute to a vibrant and supportive community. I'm also looking for opportunities to mentor others and grow my network.

2. Your background/profession?  
I'm a senior software engineer with 6 years of experience in backend development, currently working on cloud infrastructure and AI integrations.

3. How would you like to contribute?  
I can help with organizing technical talks, mentorship programs, and contribute to online discussions. I also enjoy helping with community outreach and creating content.

4. What areas are you most interested in?  
AI/ML, cloud technologies, community building, and career growth.

5. How much time can you dedicate monthly?  
5-8 hours/month.

"""

# Chat UI

In [None]:
import gradio as gr

conversation_state = {"messages": []}

# Action log to track backend events
action_log = []

def check_applications_via_button(history):
    simulated_message = "<tool_call>check_new_applications()</tool_call>"
    response, log = agent_response(simulated_message, history)
    return [['assistant',response]],log


def agent_response(message, history):
    # Append the new message to the conversation state
    action_log.append(f"🧑 User: {message[:100]}...")
    conversation_state["messages"].append(HumanMessage(content=message))

    try:
        # Run through the LangGraph agent
        updated_state = chat_graph.invoke(conversation_state)
        conversation_state.update(updated_state)

        # Get last assistant message
        last = conversation_state["messages"][-1]
        reply = last.content if hasattr(last, "content") else ""
        if len(conversation_state["messages"]) >=2:
            before_last=conversation_state["messages"][-2]
        if hasattr(before_last, "tool_call_id"):
            action_log.append('🛠️ Tool Call → '+ before_last.name)
        else:
            action_log.append("🤖 Assistant responded.")
        return reply, "\n".join(action_log)
    except Exception as e:
        return f"❌ Error: {str(e)}"




log_output = gr.Textbox(label="📝 Agent Log", lines=6, interactive=False)

chatbot = gr.ChatInterface(
    fn=agent_response,
    additional_outputs=[log_output],
    title="",
    submit_btn="Send"
)

with gr.Blocks() as ui:
    gr.Markdown("## 🤖 Community Onboarding Agent")
    chatbot.render()
    log_output.render()
    # Check New Applications button — needs access to chatbot.history
    gr.Button("📋 Check New Applications").click(
        fn=check_applications_via_button,
        inputs=[chatbot.chatbot],   # this provides history
        outputs=[chatbot.chatbot,log_output]
    )



ui.launch()






In [None]:
print(chatbot.chatbot)

In [None]:
'''


# --- Check applications button handler ---
def check_applications(history, agent_log):
    simulated_input = "<tool_call>check_new_applications()</tool_call>"
    return agent_response(simulated_input, history, agent_log)

# --- Gradio ChatInterface ---
chat = gr.ChatInterface(
    fn=agent_response,
    additional_inputs=[gr.Textbox(label="🔍 Agent Log", lines=6, interactive=False)],
    textbox=gr.Textbox(placeholder="Ask your question here..."),
    title="🤖 Community Onboarding Agent",
    description="Ask me anything about the community or say `check new applications` to score them.",
)

# --- Add extra button ---
with chat:
    check_button = gr.Button("📋 Check New Applications")
    check_button.click(
        fn=check_applications,
        inputs=[chat.chatbot, chat.additional_inputs[0]],
        outputs=[chat.chatbot, chat.additional_inputs[0]]
    )

chat.launch()
'''