In [3]:
from urllib.parse import quote
from langchain.tools import Tool
import requests
import json

In [5]:
def process_serpapi_results(search_results, filter_domain=True):
    """
    Process and filter SerpAPI results to extract the most relevant information.
    """
    processed_data = {
        "search_metadata": {},
        "organic_results": [],
        "knowledge_graph": {},
        "related_questions": []
    }
    
    # Extract search metadata
    if "search_metadata" in search_results:
        processed_data["search_metadata"] = {
            "query": search_results["search_metadata"].get("query", ""),
            "total_results": search_results.get("search_information", {}).get("total_results", 0)
        }
    
    # Extract organic results
    if "organic_results" in search_results:
        organic_results = search_results["organic_results"]
        
        # Filter for duke.edu domains if requested
        if filter_domain:
            duke_results = [result for result in organic_results 
                           if "duke.edu" in result.get("link", "")]
            
            # Prioritize pratt.duke.edu results
            pratt_results = [result for result in duke_results 
                            if "pratt.duke.edu" in result.get("link", "")]
            
            other_duke_results = [result for result in duke_results 
                                 if "pratt.duke.edu" not in result.get("link", "")]
            
            # Combine with pratt results first, then other duke results
            filtered_results = pratt_results + other_duke_results
            
            # If we have fewer than 5 duke.edu results, add some non-duke results
            if len(filtered_results) < 5:
                non_duke_results = [result for result in organic_results 
                                   if "duke.edu" not in result.get("link", "")]
                filtered_results.extend(non_duke_results[:5 - len(filtered_results)])
                
            processed_results = filtered_results
        else:
            processed_results = organic_results
        
        # Extract the most useful information from each result
        for result in processed_results[:8]:  # Limit to top 8 results
            processed_data["organic_results"].append({
                "title": result.get("title", ""),
                "link": result.get("link", ""),
                "snippet": result.get("snippet", ""),
                "source": result.get("source", "")
            })
    
    # Extract knowledge graph information if available
    if "knowledge_graph" in search_results:
        kg = search_results["knowledge_graph"]
        processed_data["knowledge_graph"] = {
            "title": kg.get("title", ""),
            "type": kg.get("type", ""),
            "description": kg.get("description", ""),
            "website": kg.get("website", ""),
            "address": kg.get("address", "")
        }
    
    # Extract related questions if available
    if "related_questions" in search_results:
        for question in search_results["related_questions"][:4]:  # Limit to top 4 questions
            processed_data["related_questions"].append({
                "question": question.get("question", ""),
                "answer": question.get("answer", "")
            })
    
    return processed_data

In [6]:
def get_pratt_info_from_serpapi(query="Duke Pratt School of Engineering", api_key="9339dbe03e129628964af59694c4709f334ee7bf84e7c0c1e335cbc9ea0bbaf6", filter_domain=True):
    """
    Retrieve information about Duke's Pratt School of Engineering using SerpAPI.
    
    Parameters:
        query (str): The search query to send to SerpAPI.
        api_key (str): Your SerpAPI API key.
        filter_domain (bool): If True, prioritize results from duke.edu and pratt.duke.edu.
    
    Returns:
        str: JSON-formatted processed search results about the Pratt School of Engineering.
    """
    # Construct the SerpAPI URL with the query
    encoded_query = quote(query)
    url = f"https://serpapi.com/search.json?q={encoded_query}&engine=google&num=10&api_key={api_key}"
    
    try:
        # Make the request to SerpAPI
        response = requests.get(url)
        response.raise_for_status()
        
        # Parse the JSON response
        search_results = response.json()
        
        # Process and filter the results
        processed_results = process_serpapi_results(search_results, filter_domain)
        
        return json.dumps(processed_results)
        
    except Exception as e:
        return json.dumps({"error": f"Failed to fetch data from SerpAPI: {str(e)}"})

In [7]:
def get_specific_pratt_info(topic="general", subtopic=None, api_key="9339dbe03e129628964af59694c4709f334ee7bf84e7c0c1e335cbc9ea0bbaf6"):
    """
    Retrieve specific information about Duke's Pratt School of Engineering using SerpAPI.
    """
    # Map topics to specific search queries
    topic_queries = {
        "general": "Duke Pratt School of Engineering overview information",
        "academics": "Duke Pratt School of Engineering academic programs degrees majors",
        "admissions": "Duke Pratt School of Engineering admissions requirements application deadlines",
        "ai_meng": "Duke Pratt AI for Product Innovation MEng program curriculum courses",
        "student_life": "Duke Pratt School of Engineering student life experience campus",
        "research": "Duke Pratt School of Engineering research areas labs projects",
        "faculty": "Duke Pratt School of Engineering faculty professors researchers",
        "events": "Duke Pratt School of Engineering events workshops seminars"
    }
    
    # Map subtopics for more specific queries
    subtopic_queries = {
        "academics": {
            "undergraduate": "Duke Pratt School of Engineering undergraduate programs BSE degrees majors",
            "graduate": "Duke Pratt School of Engineering graduate programs masters PhD",
            "courses": "Duke Pratt School of Engineering course offerings classes",
            "requirements": "Duke Pratt School of Engineering degree requirements curriculum"
        },
        "admissions": {
            "undergraduate": "Duke Pratt School of Engineering undergraduate admissions requirements deadlines",
            "graduate": "Duke Pratt School of Engineering graduate admissions requirements deadlines",
            "deadlines": "Duke Pratt School of Engineering application deadlines",
            "requirements": "Duke Pratt School of Engineering application requirements"
        },
        "ai_meng": {
            "curriculum": "Duke Pratt AI for Product Innovation MEng program curriculum courses",
            "admissions": "Duke Pratt AI for Product Innovation MEng program admissions requirements",
            "careers": "Duke Pratt AI for Product Innovation MEng program career outcomes jobs",
            "faculty": "Duke Pratt AI for Product Innovation MEng program faculty instructors"
        }
    }
    
    # Check if the topic is valid
    if topic not in topic_queries:
        return json.dumps({
            "error": f"Topic '{topic}' not found",
            "available_topics": list(topic_queries.keys())
        })
    
    # Construct the query based on topic and subtopic
    if subtopic and topic in subtopic_queries and subtopic in subtopic_queries[topic]:
        query = subtopic_queries[topic][subtopic]
    else:
        query = topic_queries[topic]
    
    # Call the SerpAPI search function
    return get_pratt_info_from_serpapi(query, api_key)

In [9]:
get_pratt_info_from_serpapi('duke pratt')

'{"search_metadata": {"query": "", "total_results": 10200000}, "organic_results": [{"title": "Duke Pratt School of Engineering: A Student Experience That ...", "link": "https://pratt.duke.edu/", "snippet": "Highly Ranked Programs \\u00b7 Biomedical Engineering \\u00b7 Civil Engineering \\u00b7 Computer Engineering \\u00b7 Electrical Engineering \\u00b7 Environmental Engineering \\u00b7 Materials ...", "source": "Duke Pratt School of Engineering"}, {"title": "Duke University (Pratt) - Best Engineering Schools", "link": "https://www.usnews.com/best-graduate-schools/top-engineering-schools/duke-university-02130", "snippet": "Duke University (Pratt) is ranked No. 20 (tie) out of 198 in Best Engineering Schools. Schools were assessed on their performance across a set of widely ...", "source": "U.S. News & World Report"}, {"title": "Pratt School of Engineering", "link": "https://en.wikipedia.org/wiki/Pratt_School_of_Engineering", "snippet": "The Pratt School of Engineering is the engineering

In [10]:
get_specific_pratt_info()

'{"search_metadata": {"query": "", "total_results": 603000}, "organic_results": [{"title": "Duke Pratt School of Engineering: A Student Experience That ...", "link": "https://pratt.duke.edu/", "snippet": "Dive into Duke Engineering campus life, where challenges and opportunities create a unique, world-class student experience.", "source": "Duke Pratt School of Engineering"}, {"title": "Our Story | Duke Pratt School of Engineering", "link": "https://pratt.duke.edu/about/", "snippet": "A professional school only since 1966, Duke Engineering is young, scrappy and hungry. We aren\'t afraid to place big bets on emerging ideas.", "source": "Duke Pratt School of Engineering"}, {"title": "Undergraduate Admissions | Duke Pratt School of Engineering", "link": "https://pratt.duke.edu/admissions/undergrad/", "snippet": "Unleash your potential at Duke Engineering. As an undergraduate, you\'ll tackle real-world problems and ignite your passion at Duke.", "source": "Duke Pratt School of Engineering"}

In [11]:
def get_events_from_duke_api(feed_type: str = "json",
                             future_days: int = 45,
                             groups: list = ['All'],
                             categories: list = ['All'],
                             filter_method_group: bool = True,
                             filter_method_category: bool = True) -> str:
    """
    Fetch events from Duke University's public calendar API with optional filters.

    Parameters:
        feed_type (str): Format of the returned data. Acceptable values include:
                         'rss', 'js', 'ics', 'csv', 'json', 'jsonp'. Defaults to 'json'.
        future_days (int): Number of days into the future for which to fetch events.
                           Defaults to 45.
        groups (list):  The organizer or host groups of the events or the related groups in events. For example,
                        '+DataScience (+DS)' refers to events hosted by the DataScience program.
                        Use 'All' to include events from all groups. 
        categories (list): 
                        The thematic or topical category of the events. For example,
                        'Academic Calendar Dates', 'Alumni/Reunion', or 'Artificial Intelligence'.
                         Use 'All' to include events from all categories.
        filter_method_group (bool): 
            - True: Event must match ALL specified groups (AND).
            - False: Event may match ANY of the specified groups (OR).
        filter_method_category (bool): 
            - True: Event must match ALL specified categories (AND).
            - False: Event may match ANY of the specified categories (OR).

    Returns:
        str: Raw calendar data (e.g., in JSON, XML, or ICS format) or an error message.
    """
    
    # When feed_type is not one of these types, add the simple feed_type parameter.
    feed_type_param = ""
    if feed_type not in ['rss', 'js', 'ics', 'csv']:
        feed_type_param = "feed_type=simple"
    
    feed_type_url = feed_type_param if feed_type_param else ""

    if filter_method_group:
        if 'All' in groups:
            group_url = ""
        else:
            for group in groups:
                group_url+='&gfu[]='+quote(group, safe="")
    else:
        if 'All' in groups:
            group_url = ""
        else:
            group_url = "&gf[]=" + quote(groups[0], safe="")
            for group in groups[1:]:
                group_url += "&gf[]=" + quote(group, safe="")

    if filter_method_category:
        if 'All' in categories:
            category_url = ""
        else:
            for category in categories:
                category_url += '&cfu[]=' + quote(category, safe="")
    else:
        if 'All' in categories:
            category_url = ""
        else:
            for category in categories:
                category_url += "&cf[]=" + quote(category, safe="")

    url = f'https://calendar.duke.edu/events/index.{feed_type}?{category_url}{group_url}&future_days={future_days}&{feed_type_url}'

    response = requests.get(url)

    if response.status_code == 200:
        return response.text
    else:
        return f"Failed to fetch data: {response.status_code}"

In [12]:
get_events_from_duke_api()

'{"events":[{"id":"CAL-8a000483-92c3adf6-0194-a8f4e1ce-00004ef7demobedework@mysite.edu","start_timestamp":"2025-01-29T17:00:00Z","end_timestamp":"2025-04-21T16:00:00Z","summary":"Tavola Italiana Spring 2025","description":"Please use link to sign up to secure your spot, first come, first served!\\nhttps://docs.google.com/document/d/1_kjnPJPnfZC_Cije_vbjhzRPFfqVgtqsxceuZVumUG0/edit?tab=t.0\\n\\n(ATTENZIONE - every single event has its own table, you only need to write your name, course, and instructor\'s name in the table of the event you selected - you do NOT need to write the name of the event anywhere)\\n\\nNB.: If you forget to sign up, you can still attend the event, but if the number of students has reached the max cap, priority will be given to those students who signed up first.","status":"CONFIRMED","sponsor":"Romance Studies","co_sponsors":null,"location":{"address":"different locations"},"contact":{"name":"Pierpaolo Spagnolo","email":"pierpaolo.spagnolo@duke.edu"},"categories

In [13]:
def get_curriculum_with_subject_from_duke_api(subject: str):

    """

    Retrieve curriculum information from Duke University's API by specifying a subject code, allowing you to access brief details about available courses.

    Parameters:
        subject (str): The subject to get curriculumn data for. For example, the subject is 'ARABIC-Arabic'.

    Returns:
        str: Raw curriculum data in JSON format or an error message. If valid result, the response will contain each course's course id and course offer number for further queries.
        The value of course id is the value of 'crse_id' in the response, and the value of course offer number is the value of 'crse_offer_nbr' in the response.
    """

    subject_url = quote(subject, safe="")

    url = f'https://streamer.oit.duke.edu/curriculum/courses/subject/{subject_url}?access_token=19d3636f71c152dd13840724a8a48074'

    response = requests.get(url)

    if response.status_code == 200:
        return response.text
    else:
        return f"Failed to fetch data: {response.status_code}"

In [15]:
get_curriculum_with_subject_from_duke_api('deep learning')

'Failed to fetch data: 500'

In [17]:
def get_detailed_course_information_from_duke_api(course_id: str, course_offer_number: str):

    """
    
    Retrieve curriculum information from Duke University's API by specifying a course ID and course offer number, allowing you to access detailed information about a specific course.

    Parameters:
        course_id (str): The course ID to get curriculum data for. For example, the course ID is 029248' for General African American Studies.
        course_offer_number (str): The course offer number to get curriculum data for. For example, the course offer number is '1' for General African American Studies.

    Returns:
        str: Raw curriculum data in JSON format or an error message.

    """

    url = f'https://streamer.oit.duke.edu/curriculum/courses/crse_id/{course_id}/crse_offer_nbr/{course_offer_number}'

    response = requests.get(url)

    if response.status_code == 200:
        return response.text
    else:
        return f"Failed to fetch data: {response.status_code}"

In [19]:
get_detailed_course_information_from_duke_api('590', '590')

'Failed to fetch data: 401'

In [20]:
def get_people_information_from_duke_api(name: str):

    """
    
    Retrieve people information from Duke University's API by specifying a name, allowing you to access detailed information about a specific person.

    Parameters:
        name (str): The name to get people data for. For example, the name is 'John Doe'.

    Returns:
        str: Raw people data in JSON format or an error message.

    """

    name_url = quote(name, safe="")

    url = f'https://streamer.oit.duke.edu/ldap/people?q={name_url}&access_token=19d3636f71c152dd13840724a8a48074'

    response = requests.get(url)

    if response.status_code == 200:
        return response.text
    else:
        return f"Failed to fetch data: {response.status_code}"