# National Park Vacation Route Tool

## Data Source

Data was obtained from the National Park Service (https://www.nps.gov/subjects/developer/api-documentation.htm).

## Data Preparation

### Import Libraries

In [9]:
# Import requests for JSON
import requests

# Import libraries required for this analysis
import numpy as np
import pandas as pd

# For displaying plots and maps
import seaborn as sns
import matplotlib.pyplot as plt
from folium import Map, Figure
import folium

# For timing training/prediction
import time

# Display all fields
pd.set_option('display.max_columns', None)

# Ignore warnings
import warnings
warnings.filterwarnings('ignore')

### Import Raw Data from NPS API (external script)

In [3]:
# (Moved to fetch_nps_data.py)
"""
# Available fields for each park

id
url
*fullName
parkCode
name
*description
*designation
*latitude
*longitude
latLong (combined "lat:..., long:...")
*activities (array of objects with id and name)
topics
*states
contacts (e.g., phone, email)
entranceFees (array)
entrancePasses
fees
directionsInfo
directionsUrl
operatingHours
addresses
images (photos array)
weatherInfo


API_KEY = "fpyJ9NycrgZX5mK8f0n90c4qXGPcYAsBPwt4BLJk"
url = "https://developer.nps.gov/api/v1/parks"

def fetch_all_parks(api_key):
    all_parks = []
    start = 0
    limit = 50

    while True:
        params = {
            "limit": limit,
            "start": start,
            "api_key": api_key
        }

        response = requests.get(url, params=params)
        response.raise_for_status()  # raise error for bad response
        data = response.json().get("data", [])

        if not data:
            break  # no more data

        all_parks.extend(data)
        start += limit  # go to next page

    return all_parks

# Fetch data
parks_raw = fetch_all_parks(API_KEY)

# Convert to DataFrame
records = []
for park in parks_raw:
    activity_list = park.get('activities', [])
    activity_names = [a.get('name', '') for a in activity_list]
    
    records.append({
        'name': park.get('fullName', ''),
        'latitude': park.get('latitude', ''),
        'longitude': park.get('longitude', ''),
        'designation': park.get('designation', ''),
        'states': park.get('states', ''),
        'description': park.get('description', ''),
        'activities': ', '.join(activity_names)
    })

parks = pd.DataFrame(records)

# Save as CSV
parks.to_csv("../data/nps_parks_with_activities.csv", index=False)
"""
# Run from fetch_nps_data.py
# %run ../scripts/fetch_nps_data.py

'\n# Available fields for each park\n\nid\nurl\n*fullName\nparkCode\nname\n*description\n*designation\n*latitude\n*longitude\nlatLong (combined "lat:..., long:...")\n*activities (array of objects with id and name)\ntopics\n*states\ncontacts (e.g., phone, email)\nentranceFees (array)\nentrancePasses\nfees\ndirectionsInfo\ndirectionsUrl\noperatingHours\naddresses\nimages (photos array)\nweatherInfo\n\n\nAPI_KEY = "fpyJ9NycrgZX5mK8f0n90c4qXGPcYAsBPwt4BLJk"\nurl = "https://developer.nps.gov/api/v1/parks"\n\ndef fetch_all_parks(api_key):\n    all_parks = []\n    start = 0\n    limit = 50\n\n    while True:\n        params = {\n            "limit": limit,\n            "start": start,\n            "api_key": api_key\n        }\n\n        response = requests.get(url, params=params)\n        response.raise_for_status()  # raise error for bad response\n        data = response.json().get("data", [])\n\n        if not data:\n            break  # no more data\n\n        all_parks.extend(data)\n    

### Import Generated CSV to Dataframe

In [4]:
parks = pd.read_csv('../data/nps_parks_with_activities.csv')

### Inspect Data

In [5]:
# Print dataframe head
print('First five rows of dataframe')
display(parks.head())
print()
    
# Print dataframe sample
print('Random five rows of dataframe')
display(parks.sample(5))
print()
    
# Check for missing values
print('Check for Missing Values')
print(parks.isna().sum())
print()

# Check data types
print('Check Data Types')
print(parks.info())
print()

# Check values for each column
print('Describe Dataframe')
print(parks.describe(include = 'all'))
print()
    
# Check for duplicates
print('Count of Duplicated Rows')
print(parks.duplicated().sum())

First five rows of dataframe


Unnamed: 0,name,latitude,longitude,designation,states,description,amenities,activities
0,Abraham Lincoln Birthplace National Historical...,37.585866,-85.673305,National Historical Park,KY,For over a century people from around the worl...,,"Astronomy, Stargazing, Food, Picnicking, Guide..."
1,Acadia National Park,44.409286,-68.247501,National Park,ME,Acadia National Park protects the natural beau...,,"Arts and Culture, Cultural Demonstrations, Ast..."
2,Adams National Historical Park,42.255396,-71.011604,National Historical Park,MA,From the sweet little farm at the foot of Penn...,,"Guided Tours, Self-Guided Tours - Walking, Liv..."
3,African American Civil War Memorial,38.9166,-77.026,,DC,"Over 200,000 African-American soldiers and sai...",,"Guided Tours, Self-Guided Tours - Walking"
4,African Burial Ground National Monument,40.714527,-74.004474,National Monument,NY,The African Burial Ground is the oldest and la...,,"Arts and Culture, Guided Tours, Junior Ranger ..."



Random five rows of dataframe


Unnamed: 0,name,latitude,longitude,designation,states,description,amenities,activities
252,Kennesaw Mountain National Battlefield Park,33.953707,-84.592142,National Battlefield Park,GA,Kennesaw Mountain National Battlefield Park is...,,"Biking, Road Biking, Guided Tours, Self-Guided..."
112,Cuyahoga Valley National Park,41.260939,-81.571167,National Park,OH,Though a short distance from the urban areas o...,,"Arts and Culture, Craft Demonstrations, Cultur..."
144,Fort Bowie National Historic Site,32.149259,-109.451132,National Historic Site,AZ,"For nearly 25 years, Fort Bowie stood at the c...",,"Hiking, Front-Country Hiking, Horse Trekking, ..."
231,Ice Age National Scenic Trail,43.98957,-89.561202,National Scenic Trail,WI,"The Ice Age National Scenic Trail spans 1,200 ...",,"Camping, Backcountry Camping, Fishing, Hands-O..."
408,Statue Of Liberty National Monument,40.6898,-74.0451,National Monument,NY,A gift of friendship from the people of France...,,"Food, Dining, Picnicking, Guided Tours, Self-G..."



Check for Missing Values
name             0
latitude         1
longitude        1
designation     35
states           0
description      0
amenities      474
activities      10
dtype: int64

Check Data Types
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 474 entries, 0 to 473
Data columns (total 8 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   name         474 non-null    object 
 1   latitude     473 non-null    float64
 2   longitude    473 non-null    float64
 3   designation  439 non-null    object 
 4   states       474 non-null    object 
 5   description  474 non-null    object 
 6   amenities    0 non-null      float64
 7   activities   464 non-null    object 
dtypes: float64(3), object(5)
memory usage: 29.8+ KB
None

Describe Dataframe
                                                     name    latitude  \
count                                                 474  473.000000   
unique                                     

**Observations:**

- There are a few missing values - if we want to include those values on our map, these should be removed. These are latitude, longitude, designation, and activities
- Datatypes are all correct
- Amenities needs to be fixed (API)


- The instructions want a maximum of 9 geographical locations, so we will have to trim a subset of this data to use
- We also have to remove National Parks not accessible by roads or extremely far away (e.g. Alaska, Hawaii)

### Visualization of All National Parks in the US

In [10]:
parks = parks.dropna(subset=['latitude', 'longitude'])

# Example data (replace with your DataFrame)
data = {
    'name': parks['name'],
    'latitude': parks['latitude'],
    'longitude': parks['longitude']
}

df = pd.DataFrame(data)

fig = Figure(width=1200, height=800)
m = Map(location=[39.8283, -98.5795], zoom_start=4)
fig.add_child(m)

for _, row in df.iterrows():
    folium.Marker(
        location=[row['latitude'], row['longitude']],
        popup=row['name']
    ).add_to(m)

fig