# Agent Workflow with all Tools Defined and an Example Implementation

In [1]:
from langchain.tools import BaseTool
from IPython.display import Image, display
import os
from google.cloud import storage
from vertexai.language_models import TextEmbeddingModel, TextGenerationModel
import chromadb
import io
import vertexai
import re
import uuid
from langchain.vectorstores import Chroma
from langchain.llms import VertexAI
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_google_vertexai import ChatVertexAI, VertexAIEmbeddings
import warnings
from PyPDF2.errors import PdfReadWarning
import logging
from langchain.agents import initialize_agent, Tool
from langchain.agents import AgentType
from pydantic import BaseModel, Field
from typing import Callable, Any
from langchain.memory import ConversationBufferMemory
from langchain_core.tools import tool
from langgraph.graph import StateGraph, END
from typing import TypedDict, Annotated
import operator
import json
import mysql.connector
from datetime import datetime
from getpass import getpass
from dotenv import set_key
from dotenv import load_dotenv
from tavily import TavilyClient
from langchain.utilities.tavily_search import TavilySearchAPIWrapper
from langchain.tools.tavily_search import TavilySearchResults
from langchain_core.messages import (
    AIMessage,
    HumanMessage,
    SystemMessage,
    ToolMessage,
    AnyMessage
)
import getpass
from difflib import SequenceMatcher
from langgraph.checkpoint.sqlite import SqliteSaver

## Initialization

In [4]:
EMBEDDING_MODEL = "text-embedding-004"
LLM_MODEL = "gemini-1.5-flash-001"

In [5]:
# Initialize Vertex AI
vertexai.init(project=PROJECT_ID, location=LOCATION)

# Initialize Chroma client and collection
embedding_function = VertexAIEmbeddings(model_name=EMBEDDING_MODEL)

# Initialize models
llm = ChatVertexAI(model_name=LLM_MODEL, temperature=0, max_tokens = 4000)
llm_calendar = ChatVertexAI(model_name=LLM_MODEL, temperature=.1, max_tokens=8192)

In [7]:
def initialize_system():
    global global_chroma, chroma_client
    
    print("Initializing system...")
    chroma_client = chromadb.Client()
    print("Getting or creating collection...")
    collection = chroma_client.get_or_create_collection(name="class_materials")
    
    if collection.count() == 0:
        print("Collection is empty, processing PDFs...")
        process_pdfs_from_gcs(BUCKET_NAME, PDF_FOLDER, collection)
        print("PDF processing complete. Vector store updated.")
    else:
        print("Using existing processed PDFs.")
    
    print("Initializing global_chroma...")
    global_chroma = Chroma(
        client=chroma_client,
        collection_name="class_materials",
        embedding_function=embedding_function
    )
    print("System initialization complete.")

In [9]:
from getpass import getpass #keep this import here

new_db_host = input("Enter new database host (or press Enter to keep current): ")
new_db_user = input("Enter new database user (or press Enter to keep current): ")
new_db_password = getpass("Enter new database password (input hidden, or press Enter to keep current): ")
new_db_database = input("Enter new database name (or press Enter to keep current): ")

if new_db_host:
    set_key(".env", "DB_HOST", new_db_host)
if new_db_user:
    set_key(".env", "DB_USER", new_db_user)
if new_db_password:
    set_key(".env", "DB_PASSWORD", new_db_password)
if new_db_database:
    set_key(".env", "DB_DATABASE", new_db_database)

load_dotenv()  # Reload .env to get the new values


Enter new database host (or press Enter to keep current):  
Enter new database user (or press Enter to keep current):  
Enter new database password (input hidden, or press Enter to keep current):  ········
Enter new database name (or press Enter to keep current):  


True

## PDF Querying Logic

In [10]:
def extract_metadata_from_filename(filename):
    parts = filename.split('_')
    
    # Determine course name
    if filename.startswith("Marketing_Analytics"):
        course_name = "Marketing Analytics"
        remaining_parts = parts[2:]
    elif filename.startswith("Statistical_Analysis"):
        course_name = "Statistical Analysis"
        remaining_parts = parts[2:]
    elif filename.startswith("Introduction_to_Art_History"):
        course_name = "Introduction to Art History"
        remaining_parts = parts[4:]
    else:
        course_name = "Unknown Course"
        remaining_parts = parts

    # Extract week information
    week_number = ""
    for i, part in enumerate(remaining_parts):
        if part == "Week" and i + 1 < len(remaining_parts):
            week_number = remaining_parts[i + 1]
            break

    # Determine document type and assignment number
    document_type = "Other"
    assignment_number = ""
    if "Assignment" in filename:
        document_type = "Assignment"
        assignment_number = remaining_parts[-1].split('.')[0]
    elif "Lecture" in filename:
        document_type = "Lecture Notes"
    elif "Syllabus" in filename:
        document_type = "Syllabus"

    return course_name, document_type, week_number, assignment_number

In [11]:
def clean_metadata(metadata):
    """Standardize metadata formatting while preserving all fields."""
    cleaned = {}
    for k, v in metadata.items():
        if k in ['course_name', 'document_type']:
            cleaned[k] = str(v).lower() if v is not None else ""
        elif k in ['week', 'assignment_number', 'page', 'source']:
            cleaned[k] = str(v) if v is not None and v != "" else ""
        else:
            cleaned[k] = str(v) if v is not None else ""
    return cleaned

In [12]:
def process_pdfs_from_gcs(bucket_name, pdf_folder, collection):
    global chroma_client
    
    storage_client = storage.Client(project=PROJECT_ID)
    bucket = storage_client.bucket(bucket_name)
    blobs = bucket.list_blobs(prefix=pdf_folder)
    
    all_chunks = []
    all_metadatas = []
    all_ids = []
    
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=3000, chunk_overlap=300)
    
    warnings.simplefilter("ignore", PdfReadWarning)
    warnings.filterwarnings("ignore", message="Ignoring wrong pointing object")
    
    logging.getLogger('PyPDF2').setLevel(logging.ERROR)
    
    for blob in blobs:
        if blob.name.endswith(".pdf"):
            filename = blob.name.split('/')[-1]
            temp_file_path = f"/tmp/{filename}"
            blob.download_to_filename(temp_file_path)
            
            try:
                loader = PyPDFLoader(temp_file_path)
                pages = loader.load_and_split()
                
                course_name, document_type, week_number, assignment_number = extract_metadata_from_filename(filename)
                
                for page in pages:
                    chunks = text_splitter.split_text(page.page_content)
                    all_chunks.extend(chunks)
                    
                    for chunk in chunks:
                        chunk_id = str(uuid.uuid4())
                        all_ids.append(chunk_id)
                        
                        raw_metadata = {
                            "source": filename,
                            "page": str(page.metadata.get('page', '')),
                            "course_name": course_name,
                            "document_type": document_type,
                            "week": week_number,
                            "assignment_number": assignment_number
                        }
                        
                        cleaned_metadata = clean_metadata(raw_metadata)
                        all_metadatas.append(cleaned_metadata)
                
                os.remove(temp_file_path)
            except Exception as e:
                print(f"Error processing file {filename}: {str(e)}")
                continue
    
    if not all_chunks:
        print("No valid chunks were extracted from the PDFs.")
        return
    
    print(f"Extracted {len(all_chunks)} chunks.")
    
    # Generate embeddings
    embeddings = VertexAIEmbeddings(model_name=EMBEDDING_MODEL)
    embedded_chunks = []
    for i, chunk in enumerate(all_chunks):
        try:
            embedding = embeddings.embed_query(chunk)
            embedded_chunks.append(embedding)
        except Exception as e:
            print(f"Failed to embed chunk {i}: {str(e)}")
            print(f"Chunk content: {chunk[:100]}...")
    
    print(f"Generated {len(embedded_chunks)} embeddings.")
    
    if len(all_chunks) != len(embedded_chunks):
        print("Warning: Number of chunks doesn't match number of embeddings.")
        print("Chunks without embeddings:")
        for i, chunk in enumerate(all_chunks):
            if i >= len(embedded_chunks):
                print(f"Chunk {i}: {chunk[:250]}...")
    
    # Ensure all lists have the same length
    min_length = min(len(all_ids), len(embedded_chunks), len(all_metadatas), len(all_chunks))
    all_ids = all_ids[:min_length]
    embedded_chunks = embedded_chunks[:min_length]
    all_metadatas = all_metadatas[:min_length]
    all_chunks = all_chunks[:min_length]
    
    print(f"Final count: {min_length} items.")
    
    # Add to the existing Chroma collection
    try:
        collection.add(
            ids=all_ids,
            embeddings=embedded_chunks,
            metadatas=all_metadatas,
            documents=all_chunks
        )
        print(f"Added {min_length} chunks to the collection.")
    except ValueError as e:
        print(f"Error adding documents to collection: {str(e)}")
        print("First few metadatas for debugging:")
        for metadata in all_metadatas[:5]:
            print(metadata)
    
    return collection

In [13]:
def extract_details_llm(query, llm_model):
    llm_prompt = (
        f"Analyze the following query and extract the course name, document type, week number, and assignment number (if mentioned). "
        f"The course name will be one of 'Marketing Analytics', 'Statistical Analysis', or 'Introduction to Art History'. "
        f"The document type could be 'Assignment', 'Lecture Notes', 'Syllabus', or any other relevant type. "
        f"The week number should be specifically extracted if mentioned (e.g., from 'Week 1' only '1' should be returned). "
        f"The assignment number should be extracted if mentioned (e.g., e.g., from 'Assignment 1' only '1' should be returned). "
        f"If the query doesn't specify certain details, leave them blank. "
        f"Respond with the course name, document type, week number, and assignment number separated by commas.\n\n"
        f"Query: {query}\n\nResponse (Course Name, Document Type, Week Number, Assignment Number):"
    )
    response = llm_model.predict(llm_prompt)
    response = response.strip()
    
    parts = response.split(',')
    course_name = parts[0].strip() if len(parts) > 0 else ""
    document_type = parts[1].strip() if len(parts) > 1 else ""
    week_number = parts[2].strip() if len(parts) > 2 else ""
    assignment_number = parts[3].strip() if len(parts) > 3 else ""

    return course_name, document_type, week_number, assignment_number

In [14]:
def generate_response(query, context, llm):
    prompt = f"""
    You are an AI assistant for university courses including Marketing Analytics, Statistical Analysis, and Introduction to Art History. You have been given the following context information from course materials:

    {context}

    Based on this context, please answer the following question:

    {query}

    If the answer is not explicitly stated in the context, use your knowledge to provide a relevant response, but make it clear that this information is not directly from the course materials.

    Your response should be:
    1. Accurate based on the given context
    2. Concise yet informative
    3. Structured in a clear, easy-to-read format
    4. Tailored to the specific course (Marketing Analytics, Statistical Analysis, or Introduction to Art History) when applicable

    Answer:
    """
    
    response = llm.predict(prompt)
    return response

In [15]:
def query_collection(query, top_k=5):
    global global_chroma
    
    course_name, document_type, week_number, assignment_number = extract_details_llm(query, llm)
    print(f"Extracted - Course: {course_name}, Type: {document_type}, Week: {week_number}, Assignment: {assignment_number}")
    
    filter_conditions = []
    if course_name:
        filter_conditions.append({"course_name": {"$eq": course_name.lower()}})
    if document_type:
        filter_conditions.append({"document_type": {"$eq": document_type.lower()}})
    if week_number:
        filter_conditions.append({"week": {"$eq": week_number}})
    if assignment_number:
        filter_conditions.append({"assignment_number": {"$eq": assignment_number}})
    
    if len(filter_conditions) > 1:
        filter_dict = {"$and": filter_conditions}
    elif len(filter_conditions) == 1:
        filter_dict = filter_conditions[0]
    else:
        filter_dict = {}
    
    print(f"Using filter: {filter_dict}")
    
    try:
        results = global_chroma.similarity_search_with_score(
            query,
            k=top_k,
            filter=filter_dict
        )
    except Exception as e:
        print(f"Error during similarity search: {str(e)}")
        return "An error occurred while searching for relevant information."
    
    if not results:
        # If no results, try a more relaxed search
        relaxed_filter = {"course_name": {"$eq": course_name.lower()}} if course_name else {}
        try:
            results = global_chroma.similarity_search_with_score(
                query,
                k=top_k,
                filter=relaxed_filter
            )
        except Exception as e:
            print(f"Error during relaxed similarity search: {str(e)}")
            return "An error occurred while searching for relevant information."
    
    if not results:
        return "No relevant information found in the course documents."
    
    context = "\n\n".join([f"Source: {doc.metadata['source']}, Page: {doc.metadata['page']}\n{doc.page_content}" for doc, _ in results])
    response = generate_response(query, context, llm)
    return response

## *Tool for Agent 

In [16]:
@tool
def pdf_rag_query(query: str) -> str:
    """
    Query PDF documents for course information, assignments, and lecture content.
    
    Args:
        query: The question or query about course materials.
    
    Returns:
        A response based on the information found in the course PDFs.
    """
    # The query_collection function handles db initialization and querying
    response = query_collection(query)
    return response

## Calendar Querying Logic 

In [17]:
def get_events():
    # Get database connection details from environment variables
    db_host = os.getenv("DB_HOST")
    db_user = os.getenv("DB_USER")
    db_password = os.getenv("DB_PASSWORD")
    db_database = os.getenv("DB_DATABASE")

    try:
        # Establish database connection
        mydb = mysql.connector.connect(
            host=db_host,
            user=db_user,
            password=db_password,
            database=db_database
        )
        
        # Create cursor
        cursor = mydb.cursor()
        
        # Execute the SELECT query with ordering
        query = """
        SELECT events_json
        FROM events
        WHERE is_deleted = 0
        ORDER BY COALESCE(start_time, due_date) ASC
        """
        
        cursor.execute(query)
        
        # Fetch all rows
        rows = cursor.fetchall()
        
        formatted_events = []
        for row in rows:
            events_json = row[0]
            event_dict = json.loads(events_json)
            formatted_events.append(json.dumps(event_dict))
        
        # Join formatted events with double newlines
        return "\n\n".join(formatted_events)

    except mysql.connector.Error as err:
        print(f"Something went wrong: {err}")
        return None

    finally:
        # Close cursor and connection
        if 'cursor' in locals():
            cursor.close()
        if 'mydb' in locals():
            mydb.close()

In [18]:
# Get the events context
events_context = get_events()

In [20]:
def query_calendar(query, llm):
    
    # Get current date and time
    current_datetime = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    
    # Format the context with a header and code block
    formatted_context = f"""
    
    This is the current date and time: {current_datetime}
    
    Below is a list of events in the student's calendar, ordered chronologically. Each event is represented as a JSON object:

    ```
    {events_context}
    ```
    
    All events have the following fields:
    - tag: The type of event (class, personal, study time, quiz, discussion, or assignment)
    - title: The title of the event
    - eventDuration: The duration of the event in minutes 
    - description: A description of the event (if available)

    Each assigment, quiz, or discussion could have the following additional fields: 
    - due_date: The due date (for assignment type events)
    - instructions: instructions for the academic activity 
    
    Each event that is not an assignment, quiz, or discussion has the following fields:
    - startTime: The start time of the event (for non-assignment type events)
    - endTime: The end time of the event (for non-assignment type events)
    
    """

    prompt = f"""
    You are an AI assistant for a student calendar management system. You have been provided with the following context, which contains information about the student's events in chronological order:

    {formatted_context}

    Based on this context, please answer the following question:
    {query}

    Guidelines for your response:
    1. Provide accurate information based on the given context.
    2. If the exact answer isn't in the context, use your knowledge to give a relevant response, but clearly state that it's not directly from the calendar data.
    3. Remember that the events are already in chronological order with some events in the past and some in the future. 
    4. For assignment-type events, use the due_date field. For other events, use startTime and endTime.
    5. If asked about free time or scheduling, consider the startTime and endTime of events.
    6. Offer helpful suggestions or insights based on the student's schedule when appropriate.
    7. Keep your response concise yet informative.
    8. If you need more information to answer accurately, ask for clarification.

    Answer:
    """
    
    response = llm.predict(prompt)
    return response

## *Tool for Agent 

In [21]:
@tool
def calendar_query(query: str) -> str:
    """
    Query the student's calendar for information about events, classes, assignments, and schedule.
    
    Args:
        query: The question or query about the student's calendar and schedule.
    
    Returns:
        A response based on the information found in the student's calendar.
    """
    response = query_calendar(query, llm_calendar)
    return response

# Tavily Search

In [23]:
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

def content_recommendations(query: str) -> str:
    tavily_api_key = os.getenv("TAVILY_API_KEY")
    tavily_client = TavilyClient(api_key=tavily_api_key)

    search_query = f"What are some highly recommended, freely accessible resources to learn about {query}? Focus on reputable blog posts, open-access articles, and educational websites."

    response = tavily_client.search(search_query, search_depth="basic", include_answer=True, max_results=3)

    recommendations = f"Here are some recommended resources based on a few keywords taken from the content: {query}:\n\n"

    for i, result in enumerate(response['results'], 1):
        logger.info(f"Processing result {i}: {result}")
        
        title = result.get('title', 'No title available')
        url = result.get('url', 'No URL available')
        content = result.get('content', 'No content available')
        score = result.get('score', 0.0)

        recommendations += f"{i}. {title} (Relevance: {score:.2f})\n"
        recommendations += f"   URL: {url}\n"

    return recommendations

# Tool

In [24]:
@tool
def get_content_recommendations(query: str) -> str:
    """
    Get content recommendations for learning about a specific subject.
    
    Args:
        query: The subject or topic the user wants to learn about.
    
    Returns:
        A string containing relevant links and brief descriptions of recommended resources.
    """
    recommendations = content_recommendations(query)
    return recommendations

## AGENT

In [25]:
memory = SqliteSaver.from_conn_string(":memory:")

In [26]:
class AgentState(TypedDict):
    messages: Annotated[list[AnyMessage], operator.add]

In [27]:
class Agent:
    def __init__(self, model, tools, checkpointer, system=""):
        self.system = system
        graph = StateGraph(AgentState)
        graph.add_node("llm", self.call_gemini)
        graph.add_node("action", self.take_action)
        graph.add_conditional_edges(
            "llm",
            self.exists_action,
            {True: "action", False: END}
        )
        graph.add_edge("action", "llm")
        graph.set_entry_point("llm")
        self.graph = graph.compile(checkpointer = checkpointer)
        self.tools = {t.name: t for t in tools}
        self.model = model.bind_tools(tools)
        
    def log_state(self, state: AgentState, location: str):
        print(f"\n--- State at {location} ---")
        print(f"Number of messages: {len(state['messages'])}")
        print(f"Last message type: {type(state['messages'][-1])}")
        if isinstance(state['messages'][-1], ToolMessage):
            print(f"Last tool used: {state['messages'][-1].name}")
            print(f"Tool result: {state['messages'][-1].content}...")  # First 100 chars
        elif hasattr(state['messages'][-1], 'tool_calls'):
            print(f"Tool calls: {state['messages'][-1].tool_calls}")
        print("------------------------\n")

    def exists_action(self, state: AgentState):
        self.log_state(state, "exists_action")
        result = state['messages'][-1]
        return len(result.tool_calls) > 0

    def call_gemini(self, state: AgentState):
        self.log_state(state, "call_gemini (before)")
        messages = state['messages']
        if self.system:
            messages = [SystemMessage(content=self.system)] + messages
        message = self.model.invoke(messages)
        new_state = {'messages': [message]}
        self.log_state(new_state, "call_gemini (after)")
        return new_state

    def take_action(self, state: AgentState):
        self.log_state(state, "take_action (before)")
        tool_calls = state['messages'][-1].tool_calls
        results = []
        for t in tool_calls:
            print(f"Calling: {t}")
            if t['name'] not in self.tools:
                print("\n....bad tool name....")
                result = "bad tool name, retry"
            else:
                result = self.tools[t['name']].invoke(t['args'])
            results.append(ToolMessage(tool_call_id=t['id'], name=t['name'], content=str(result)))
        print("Back to the model!")
        new_state = {'messages': results}
        self.log_state(new_state, "take_action (after)")
        return new_state

In [28]:
agent_datetime = datetime.now().strftime("%Y-%m-%d %H:%M:%S")

In [30]:
initialize_system()

Initializing system...


INFO:chromadb.telemetry.product.posthog:Anonymized telemetry enabled. See                     https://docs.trychroma.com/telemetry for more information.


Getting or creating collection...
Collection is empty, processing PDFs...




Extracted 38 chunks.


INFO:chromadb.api.segment:Collection class_materials is not created.


Generated 38 embeddings.
Final count: 38 items.
Added 38 chunks to the collection.
PDF processing complete. Vector store updated.
Initializing global_chroma...
System initialization complete.


In [31]:
prompt = f"""
Current datetime: {agent_datetime}
You are an AI assistant supporting university students. Your primary goal is to provide comprehensive, helpful responses by effectively utilizing the following tools:

1. pdf_rag_query: Retrieves content from course documents (assignments, lecture notes, syllabi).
   - USE THIS TOOL FIRST for any query about specific course content, including lecture notes, assignments, or syllabus information.
   - Utilizes for query: course name, assignment number, week number, and document type (lecture notes, assignment, syllabus).
   - Example query: "Marketing Analytics Assignment 4 instructions" or "Week 3 Lecture Notes Statistical Analysis"

2. calendar_query: Searches through calendar events (classes, assignments, discussions and discussion content, personal events).
   - USE THIS TOOL ONLY for queries about schedules, deadlines, upcoming events, or when a student asks "what do I have" or "when is something due".
   - Example query: "next due assignment" or "upcoming Marketing Analytics classes" or "what do I have due today?"

3. get_content_recommendations: Web search tool for finding relevant educational resources.
   - USE THIS TOOL ONLY after gathering specific information from pdf_rag_query or when explicitly asked for external resources.
   - Use (only) 2 key terms or concepts for the web search.
   - Example query: "predictive modeling, optimization analysis"

DECISION-MAKING FRAMEWORK:
1. Carefully analyze the student's query. Identify the specific information needed to provide a complete answer.
2. Think and determine the most appropriate tool(s) for the query based on these guidelines:
   - If the query is about course content (lectures, assignments, syllabus), use pdf_rag_query FIRST.
   - If the query is about schedules or deadlines, use calendar_query.
   - If the query is about finding external resources, use get_content_recommendations AFTER using pdf_rag_query if needed.
3. Plan your approach step by step:
   a. Decide which tool(s) to use and in what order.
   b. Consider how information from one tool might inform the use of another.
4. Use tools ONE AT A TIME, in the most logical order to answer the query comprehensively.
5. For each tool use:
   a. Formulate specific queries based on the student's question and any previously gathered information.
   b. Include relevant course names, assignment numbers, or key concepts as needed.
6. After each tool use, evaluate:
   a. Review the information received.
   b. Have you gathered all necessary information?
   c. Do you need to use additional tools based on this new information?
   d. Can you now provide a comprehensive answer?
7. If another tool is needed, formulate the next query based on all information gathered so far.
8. Only proceed to the next tool after fully processing the results of the previous tool.
9. For get_content_recommendations, always base your search terms on concrete information from the query or previous tool results, not assumptions.

EXAMPLES OF TOOL USAGE PATTERNS:
1. For "Can you give me some resources for the next assignment?":
   - Use calendar_query to identify the next assignment
   - Then use pdf_rag_query to get details about that assignment
   - Finally use get_content_recommendations with key terms from the assignment details

2. For "What resources would you recommend based on marketing analytics assignment 2?":
   - First use pdf_rag_query to get the content of Marketing Analytics Assignment 2
   - Review the assignment content
   - Then use get_content_recommendations with 2 key concepts from the actual assignment content

Remember: Each query is unique. Adapt your tool usage based on the specific information needed. Process tools sequentially, using the output of one to inform the use of the next if needed. Do not call multiple tools simultaneously.

RESPONSE GUIDELINES:
1. Address the student directly in a helpful, encouraging manner.
2. Provide a clear, structured response that answers all aspects of the query.
3. Explain your reasoning if you've made any assumptions or interpretations.
4. Encourage the student to explore recommended resources when applicable.
5. Please include all tool responses in the final response.

Always prioritize providing the most relevant and helpful information to the student based on their specific query and the actual information gathered from the tools.
"""

agent = Agent(llm_calendar, [pdf_rag_query, calendar_query, get_content_recommendations], system=prompt, checkpointer = memory)

In [32]:
def should_start_new_thread(previous_query: str, current_query: str) -> bool:
    # Check if the queries are similar
    similarity = SequenceMatcher(None, previous_query.lower(), current_query.lower()).ratio()
    
    # Check if the current query is a follow-up question
    follow_up_patterns = [
        r"^(what|how) about",
        r"^and ",
        r"^also",
        r"^additionally",
        r"^moreover",
        r"^furthermore",
        r"^in addition",
    ]
    
    is_follow_up = any(re.match(pattern, current_query.lower()) for pattern in follow_up_patterns)
    
    # Start a new thread if the queries are not similar and it's not a follow-up question
    return similarity < 0.7 and not is_follow_up

In [33]:
last_query = ""
current_thread_id = str(uuid.uuid4())

In [34]:
def process_query(query: str) -> str:
    global last_query, current_thread_id
    
    if should_start_new_thread(last_query, query):
        current_thread_id = str(uuid.uuid4())
        print(f"Starting new thread with ID: {current_thread_id}")
    
    
    messages = [HumanMessage(content=query)]
    thread = {"configurable": {"thread_id": current_thread_id}}
    result = agent.graph.invoke({"messages": messages}, thread)
    
    # Extract the calendar query response and content recommendations
    calendar_response = ""
    content_recommendations = ""
    pdf_rag_response = ""
    
    tool_responses_found = False
    
    for message in result['messages']:
        if isinstance(message, ToolMessage):
            tool_responses_found = True
            if message.name == 'calendar_query':
                calendar_response = message.content
            elif message.name == 'get_content_recommendations':
                content_recommendations = message.content
            elif message.name == 'pdf_rag_query':
                pdf_rag_response = message.content
    
    if tool_responses_found:
        # Combine the responses if tools were used
        final_response = f"{calendar_response}\n\n{pdf_rag_response}\n\n{content_recommendations}".strip()
    else:
        # Use the agent's direct response if no tools were used
        final_response = result['messages'][-1].content
    
    # Update the last query
    last_query = query
    
    return final_response

In [36]:
if __name__ == "__main__":
    test_queries = [
        """what's my next event?"""
    ]


    for query in test_queries:
        print(f"\nQuery: {query}")
        response = process_query(query)
        print(f"Response:\n{response}")
        print("-" * 50)


Query: what's my next event?
Starting new thread with ID: 4aeec1de-bfc1-466f-b559-2aaf14542636

--- State at call_gemini (before) ---
Number of messages: 1
Last message type: <class 'langchain_core.messages.human.HumanMessage'>
------------------------


--- State at call_gemini (after) ---
Number of messages: 1
Last message type: <class 'langchain_core.messages.ai.AIMessage'>
Tool calls: [{'name': 'calendar_query', 'args': {'query': 'next event'}, 'id': '053cf8b0-5a9b-41ca-8e09-73098873a78d', 'type': 'tool_call'}]
------------------------


--- State at exists_action ---
Number of messages: 2
Last message type: <class 'langchain_core.messages.ai.AIMessage'>
Tool calls: [{'name': 'calendar_query', 'args': {'query': 'next event'}, 'id': '053cf8b0-5a9b-41ca-8e09-73098873a78d', 'type': 'tool_call'}]
------------------------


--- State at take_action (before) ---
Number of messages: 2
Last message type: <class 'langchain_core.messages.ai.AIMessage'>
Tool calls: [{'name': 'calendar_query'