In [None]:
import requests
import pandas as pd
from config import RecGov_API_Key
import time
import json
from flask import Flask, jsonify



Request URL: https://ridb.recreation.gov/api/v1/facilities?limit=50&offset=0&state=AZ&lastupdated=10-01-2018
Request URL: https://ridb.recreation.gov/api/v1/facilities?limit=50&offset=50&state=AZ&lastupdated=10-01-2018
Request URL: https://ridb.recreation.gov/api/v1/facilities?limit=50&offset=100&state=AZ&lastupdated=10-01-2018
Reached maximum number of records 150. Ending.
No data found in API response.
Facilities DataFrame:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 31 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   Enabled                    150 non-null    bool   
 1   FACILITYADDRESS            150 non-null    object 
 2   FacilityAccessibilityText  150 non-null    object 
 3   FacilityAdaAccess          150 non-null    object 
 4   FacilityDescription        150 non-null    object 
 5   FacilityDirections         150 non-null    object 
 6   FacilityEmai

In [None]:


base_url = 'https://ridb.recreation.gov/api/v1/'

# Create a function to fetch data from the Recreation.gov API
def fetch_ridb_data(api_key, endpoint, params=None, max_records=100):
    """Fetches data from the Recreation.gov API for various endpoints, handling pagination and rate limits.

    Args:
        api_key: Your Recreation.gov API key.
        endpoint: The API endpoint to hit (e.g., 'facilities', 'activities').
        params: A dictionary of query parameters.
        max_records: The maximum number of records to fetch in total.

    Returns:
        A list of dictionaries representing the data from the API.
    """

    headers = {
        'accept': 'application/json',
        'apikey': api_key
    }
    all_data = []
    offset = 0
    limit = 50
    rate_limit_delay = 1.1
    while len(all_data) < max_records:
        current_params = {
            'limit': limit,
            'offset': offset,
        }
        if params:
            current_params.update(params)
        full_url = f"{base_url}{endpoint}"
        # Create the URL string and print it
        url_string = f"{full_url}?{requests.compat.urlencode(current_params)}"
        print(f"Request URL: {url_string}")

        try:
            response = requests.get(full_url, headers=headers, params=current_params)
            response.raise_for_status()
            data = response.json()

            if not data or "RECDATA" not in data:
                print(f"No RECDATA found at offset {offset}. Ending.")
                break

            all_data.extend(data["RECDATA"])
            offset += limit

            time.sleep(rate_limit_delay)
            if data.get("METADATA", {}).get("RESULTS", {}).get("CURRENT_COUNT", 0) < limit:
                print(f"No more records available at offset {offset}. Ending.")
                break
        except requests.exceptions.RequestException as e:
            print(f"Error during API request at offset {offset}: {e}")
            break

        if len(all_data) >= max_records:
            print(f"Reached maximum number of records {max_records}. Ending.")
            break

    return all_data



In [None]:
# Create a function to fetch related data from the Recreation.gov API
def fetch_ridb_related_data(api_key, endpoint, facility_id, params=None):
    """Fetches related data from the Recreation.gov API for a specific facility.

    Args:
        api_key: Your Recreation.gov API key.
        endpoint: The API endpoint to hit (e.g., 'activities', 'campsites').
        facility_id: The ID of the facility to fetch data for.
        params: A dictionary of query parameters.
    
    Returns:
        A list of dictionaries representing the data from the API.
    """
    headers = {
        'accept': 'application/json',
        'apikey': api_key
    }
    
    full_url = f"{base_url}facilities/{facility_id}/{endpoint}"
    
    try:
      response = requests.get(full_url, headers=headers, params=params)
      response.raise_for_status()
      data = response.json()
      if data and "RECDATA" in data:
        return data["RECDATA"]
      else:
        return [] # return empty list
    except requests.exceptions.RequestException as e:
      print(f"Error during API request: {e}")
      return []



In [None]:
# Create a function to create a pandas DataFrame from the extracted API data
def create_dataframe(data, data_key="RECDATA"):
    """Creates a pandas DataFrame from the extracted API data.

    Args:
        data: The list of dictionaries representing the data.
        data_key: The key to use when accessing the data (default is 'RECDATA')

    Returns:
        A pandas DataFrame containing the API data or an empty DataFrame if there was an error.
    """

    if data:
        return pd.DataFrame(data)
    else:
        print(f"No data found in API response.")
        return pd.DataFrame()
    
def process_facilities_data(api_key, facilities_data):
    """Creates multiple DataFrames from the facilities data and related data.

    Args:
        api_key: Your Recreation.gov API key.
        facilities_data: The list of dictionaries representing the facilities from the API.

    Returns:
        A tuple containing the facilities_df, activities_df, campsites_df, and events_df.
    """
    if not facilities_data:
        return pd.DataFrame(), pd.DataFrame(), pd.DataFrame(), pd.DataFrame()
    
    facilities_df = create_dataframe(facilities_data)
    activities_data = []
    campsites_data = []
    events_data = []

    for facility in facilities_data:
        facility_id = facility.get('FacilityID')
        if facility_id:
            activities = fetch_ridb_related_data(api_key, "activities", facility_id)
            for act in activities:
                act['FacilityID'] = facility_id #assign the facility id as a foreign key to be used in tables
                activities_data.append(act)

            campsites = fetch_ridb_related_data(api_key, "campsites", facility_id)
            for camp in campsites:
                camp['FacilityID'] = facility_id #assign the facility id as a foreign key to be used in tables
                campsites_data.append(camp)
            events = fetch_ridb_related_data(api_key, "events", facility_id)
            for event in events:
                event['FacilityID'] = facility_id #assign the facility id as a foreign key to be used in tables
                events_data.append(event)
    
    
    facilities_df = facilities_df.drop(columns = ["ACTIVITY", "CAMPSITE", "EVENT"]) #Drop the columns that have blank data

    activities_df = create_dataframe(activities_data)
    campsites_df = create_dataframe(campsites_data)
    events_df = create_dataframe(events_data)
    return facilities_df, activities_df, campsites_df, events_df



In [None]:
# Set up the API request, processes the Data and output Data Frames

if __name__ == "__main__":
    # Example 1: Fetching facilities
    facilities_params = {
        'state': 'AZ',
        'lastupdated': '10-01-2018',
    }
    facilities_endpoint = 'facilities'
    facilities_data = fetch_ridb_data(RecGov_API_Key, facilities_endpoint, facilities_params, 150)
    if facilities_data:
        facilities_df, activities_df, campsites_df, events_df = process_facilities_data(RecGov_API_Key, facilities_data)

        if not facilities_df.empty:
            print("Facilities DataFrame:")
            facilities_df.info()
            print(facilities_df.head())
        else:
            print("Failed to create the facilities Dataframe")
        if not activities_df.empty:
            print("\nActivities DataFrame:")
            activities_df.info()
            print(activities_df.head())
        else:
           print("\nFailed to create the activities Dataframe")
        if not campsites_df.empty:
            print("\nCampsites DataFrame:")
            campsites_df.info()
            print(campsites_df.head())
        else:
           print("\nFailed to create the campsites Dataframe")
        if not events_df.empty:
           print("\nEvents DataFrame:")
           events_df.info()
           print(events_df.head())
        else:
           print("\nFailed to create the events Dataframe")