# National Park Vacation Route Tool

## 1. Data

### 1.1. Data Sources

- Data was obtained from the National Park Service (https://www.nps.gov/index.htm) using their API (https://www.nps.gov/subjects/developer/api-documentation.htm).
- United States Latitude and Longitude Data was obtained from Kaggle (https://www.kaggle.com/datasets/tennerimaheshwar/us-state-and-territory-latitude-and-longitude-data)

## 2. Import External Libraries and Data

### 2.1. Import Libraries

In [19]:
# JSON requests
import requests
import json

# Data manipulation
import numpy as np
import pandas as pd

# Displaying plots and maps
import seaborn as sns
import matplotlib.pyplot as plt
import folium as fl # change if SWE need

# Dummy and greedy nearest neighbors algorithm requirements
import math
import googlemaps
import random

# Timing the algorithm
import time

# Neatly display dataframes
from tabulate import tabulate

# Display all fields with pandas
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

# Ignore warnings
import warnings
warnings.filterwarnings('ignore')

### 2.2. Inspect Raw Data JSON Response from NPS API

In [20]:
# Inspect full JSON response of the first park to determine fields we need and data structure
api_key = "fpyJ9NycrgZX5mK8f0n90c4qXGPcYAsBPwt4BLJk"
url = "https://developer.nps.gov/api/v1/parks"

params = {
    "limit": 1,
    "start": 0,
    "api_key": api_key
}

response = requests.get(url, params=params)
data = response.json()

# Print the full JSON response of the first park
print(json.dumps(data["data"][0], indent=2))

{
  "id": "77E0D7F0-1942-494A-ACE2-9004D2BDC59E",
  "url": "https://www.nps.gov/abli/index.htm",
  "fullName": "Abraham Lincoln Birthplace National Historical Park",
  "parkCode": "abli",
  "description": "For over a century people from around the world have come to rural Central Kentucky to honor the humble beginnings of our 16th president, Abraham Lincoln. His early life on Kentucky's frontier shaped his character and prepared him to lead the nation through Civil War. Visit our country's first memorial to Lincoln, built with donations from young and old, and the site of his childhood home.",
  "latitude": "37.5858662",
  "longitude": "-85.67330523",
  "latLong": "lat:37.5858662, long:-85.67330523",
  "activities": [
    {
      "id": "13A57703-BB1A-41A2-94B8-53B692EB7238",
      "name": "Astronomy"
    },
    {
      "id": "D37A0003-8317-4F04-8FB0-4CF0A272E195",
      "name": "Stargazing"
    },
    {
      "id": "1DFACD97-1B9C-4F5A-80F2-05593604799E",
      "name": "Food"
    },
   

### 2.3. Get Park Data with National Park Service API

In [21]:
# Get NPS park data
# Run from fetch_nps_data.py
# %run ../scripts/fetch_nps_data.py

### 2.4. Import Generated Parks CSV and State Coordinates CSVs to Dataframe

In [None]:
parks = pd.read_csv('../data/raw/parks.csv')
states = pd.read_csv('../data/raw/states.csv')

### 2.5. Inspect Data (Parks)

In [23]:
# Print dataframe head
print('First five rows of dataframe')
display(parks.head())
print()
    
# Print dataframe sample
print('Random five rows of dataframe')
display(parks.sample(5))
print()
    
# Check for missing values (make sure amenities is included)
print('Check for Missing Values')
print(parks.isna().sum())
print()

# Check data types
print('Check Data Types')
print(parks.info())
print()

# Check values for each column
print('Describe Dataframe')
print(parks.describe(include = 'all'))
print()
    
# Check for duplicates
print('Count of Duplicated Rows')
print(parks.duplicated().sum())
print()

# Check for leading/trailing whitespace in strings
cols = ['name', 'designation', 'states', 'description', 'activities']   # string columns
for col in cols:
    if col in parks.columns:
        # Convert to string just in case, then check
        has_ws = parks[col].astype(str).apply(lambda x: x != x.strip())
        count = has_ws.sum()
        if count > 0:
            print(f"Column '{col}' has {count} rows with leading/trailing whitespace.")
        else: print(f"There is no leading/trailing whitespace in {col}.")  
print()

# Number of unique activities (important for web interface)
print(f'Unique Activities: {parks['activities'].nunique()}')
print()

# Number of unique amenities (important for web interface)
print(f'Unique Amenities: {parks['amenities'].nunique()}')

First five rows of dataframe


Unnamed: 0,id,name,latitude,longitude,designation,states,description,directionsInfo,directionsUrl,weatherInfo,url,activities,topics,image_url,image_caption,contact_phone,contact_email,physical_address,mailing_address,operating_hours_description,standard_hours,entrance_fee_cost,entrance_fee_title,entrance_fee_description,entrance_pass_cost,entrance_pass_title,entrance_pass_description,amenities
0,77E0D7F0-1942-494A-ACE2-9004D2BDC59E,Abraham Lincoln Birthplace National Historical...,37.585866,-85.673305,National Historical Park,KY,For over a century people from around the worl...,The Birthplace Unit of the park is located app...,http://www.nps.gov/abli/planyourvisit/directio...,There are four distinct seasons in Central Ken...,https://www.nps.gov/abli/index.htm,"Astronomy, Stargazing, Food, Picnicking, Guide...","Birthplace, Presidents, Animals, Birds, Caves,...",https://www.nps.gov/common/uploads/structured_...,"Over 200,000 people a year come to walk up the...",2703583137,ABLI_Administration@nps.gov,"2995 Lincoln Farm Road, Hodgenville, KY 42748","2995 Lincoln Farm Road, Hodgenville, KY 42748",Memorial Building:\nopen 9:00 am - 4:30 pm eas...,"wednesday: 9:00AM - 5:00PM, monday: 9:00AM - 5...",,,,,,,Accessible Rooms
1,6DA17C86-088E-4B4D-B862-7C1BD5CF236B,Acadia National Park,44.409286,-68.247501,National Park,ME,Acadia National Park protects the natural beau...,"From Boston take I-95 north to Augusta, Maine,...",http://www.nps.gov/acad/planyourvisit/directio...,"Located on Mount Desert Island in Maine, Acadi...",https://www.nps.gov/acad/index.htm,"Arts and Culture, Cultural Demonstrations, Ast...","Arts, Painting, Photography, Poetry and Litera...",https://www.nps.gov/common/uploads/structured_...,As the tallest point on the eastern seaboard C...,2072883338,acadia_information@nps.gov,"25 Visitor Center Road, Bar Harbor, ME 04609","PO Box 177, Bar Harbor, ME 04609",Acadia National Park is open year-round. Check...,"wednesday: All Day, monday: All Day, thursday:...",6.0,Timed Entry Reservation - Location,Vehicle reservations are not required for any ...,70.0,Annual Entrance - Park,The Acadia Annual Pass is valid only at Acadia...,Accessible Rooms
2,E4C7784E-66A0-4D44-87D0-3E072F5FEF43,Adams National Historical Park,42.255396,-71.011604,National Historical Park,MA,From the sweet little farm at the foot of Penn...,"Traveling on U.S. Interstate 93, take exit 7 -...",http://www.nps.gov/adam/planyourvisit/directio...,"Be prepared for hot, humid weather. The histor...",https://www.nps.gov/adam/index.htm,"Guided Tours, Self-Guided Tours - Walking, Liv...","American Revolution, Birthplace, Colonization ...",https://www.nps.gov/common/uploads/structured_...,The Birthplaces of John and John Quincy Adams ...,617-770-1175,ADAM_Visitor_Center@nps.gov,"1250 Hancock St., Quincy, MA 02169","135 Adams Street, Quincy, MA 02169",The Visitor Center is open 9:00 am to 5:00 pm ...,"wednesday: 9:00AM - 5:00PM, monday: Closed, th...",15.0,Entrance - Per Person,Entrance into the historic homes at Adams Nati...,45.0,Annual Entrance - Park,Adams National Historical Park has a digital a...,Historical/Interpretive Information/Exhibits
3,1A47416F-DAA3-4137-9F30-14AF86B4E547,African American Civil War Memorial,38.9166,-77.026,,DC,"Over 200,000 African-American soldiers and sai...",The memorial is located at the corner of Vermo...,http://www.nps.gov/afam/planyourvisit/directio...,Washington DC gets to see all four seasons. Hu...,https://www.nps.gov/afam/index.htm,"Guided Tours, Self-Guided Tours - Walking","African American Heritage, Monuments and Memor...",https://www.nps.gov/common/uploads/structured_...,A poignant reminder of our nations past,2024266841,national_mall@nps.gov,"1925 Vermont Avenue Northwest, Washington, DC ...","1100 Ohio Drive SW, Washington, DC 20242",The African American Civil War Memorial is alw...,"wednesday: All Day, monday: All Day, thursday:...",,,,,,,Historical/Interpretive Information/Exhibits
4,E6E1D22A-7A89-47F8-813C-B611059A8CF9,African Burial Ground National Monument,40.714527,-74.004474,National Monument,NY,The African Burial Ground is the oldest and la...,The African Burial Ground National Monument is...,http://www.nps.gov/afbg/planyourvisit/directio...,http://forecast.weather.gov/MapClick.php?CityN...,https://www.nps.gov/afbg/index.htm,"Arts and Culture, Guided Tours, Junior Ranger ...","African American Heritage, Archeology, Burial,...",https://www.nps.gov/common/uploads/structured_...,African Burial Ground Memorial,2122384367,african_burial_ground@nps.gov,"African Burial Ground National Monument, New Y...","African Burial Ground NM, New York, NY 10005",INDOOR VISITOR CENTER AND MUSEUM,"wednesday: 10:00AM - 4:00PM, monday: Closed, t...",,,,,,,



Random five rows of dataframe


Unnamed: 0,id,name,latitude,longitude,designation,states,description,directionsInfo,directionsUrl,weatherInfo,url,activities,topics,image_url,image_caption,contact_phone,contact_email,physical_address,mailing_address,operating_hours_description,standard_hours,entrance_fee_cost,entrance_fee_title,entrance_fee_description,entrance_pass_cost,entrance_pass_title,entrance_pass_description,amenities
307,126A6227-E39F-44F4-A279-B1EA257CBCDA,Muir Woods National Monument,37.89658,-122.580805,National Monument,CA,"Walk among old growth coast redwoods, cooling ...",From San Francisco: Muir Woods is located 11 m...,http://www.nps.gov/muwo/planyourvisit/directio...,The coast redwood forest is cool most times of...,https://www.nps.gov/muwo/index.htm,"Food, Dining, Guided Tours, Hiking, Junior Ran...","Animals, Birds, Fish, Forests and Woodlands, C...",https://www.nps.gov/common/uploads/structured_...,Entrance to Muir Woods National Monument,415-561-2850,goga_muwo_socialmedia@nps.gov,"Muir Woods National Monument, Mill Valley, CA ...","Muir Woods National Monument, Mill Valley, CA ...","Muir Woods is open every day of the year, with...","wednesday: 8:00AM - 5:00PM, monday: 8:00AM - 5...",,,,,,,Food/Drink - Cafeteria
156,7FF57085-3B53-498D-86B5-96F5F1493A06,Fort Point National Historic Site,37.808374,-122.473747,National Historic Site,CA,From its vantage point overlooking the spectac...,Fort Point is located at the south anchorage o...,http://www.nps.gov/fopo/planyourvisit/directio...,Fort Point is typically cold and windy through...,https://www.nps.gov/fopo/index.htm,"Arts and Culture, Theater, Fishing, Living His...","Architecture and Building, African American He...",https://www.nps.gov/common/uploads/structured_...,Fort Point offers stunning views from under th...,415-561-4959,goga_fopo_socialmedia@nps.gov,"Fort Point National Historic Site, San Francis...","Fort Point National Historic Site, San Francis...",The days in which Fort Point is open varies th...,"wednesday: Closed, monday: 10:00AM - 5:00PM, t...",,,,,,,Bicycle - Rack
371,3D026091-7EFB-405D-A8F2-D9109DE9740A,Richmond National Battlefield Park,37.487745,-77.291078,National Battlefield Park,VA,The center of Confederate manufacturing fueled...,Richmond National Battlefield Park has two vis...,http://www.nps.gov/rich/planyourvisit/directio...,"Summer, hot and humid. Fall and Spring, mild. ...",https://www.nps.gov/rich/index.htm,"Guided Tours, Self-Guided Tours - Walking, Hik...","African American Heritage, Burial, Cemetery an...",https://www.nps.gov/common/uploads/structured_...,Earthworks like these at Fort Harrison can be ...,804-226-1981,stephanie_pooler@nps.gov,"3215 E. Broad Street, Richmond, VA 23223","3215 E. Broad Street, Richmond, VA 23223",Visitor center focusing on Civil War medical p...,"wednesday: 9:00AM - 4:30PM, monday: Closed, th...",,,,,,,Parking - Auto
32,E7E4E103-5FA9-45AC-8843-05DE554CDBB7,Belmont-Paul Women's Equality National Monument,38.892315,-77.003819,National Monument,DC,Home to the National Woman's Party for more th...,The Belmont-Paul Women's Equality NM is locate...,http://www.nps.gov/bepa/planyourvisit/directio...,Washington DC gets to see all four seasons. Hu...,https://www.nps.gov/bepa/index.htm,"Guided Tours, Self-Guided Tours - Walking, Jun...","African American Heritage, Hispanic American H...",https://www.nps.gov/common/uploads/structured_...,411th Unit of the National Park Service,(202) 543-2240,bepa_info@nps.gov,"144 Constitution Ave NE, Washington, DC 20002","1100 Ohio Drive SW, Washington, DC 20242",Belmont-Paul Women's Equality National Monumen...,"wednesday: Closed, monday: Closed, thursday: C...",,,,,,,Gifts/Souvenirs/Books
209,BEE3E4AA-D61B-4197-A2A0-567D15734766,Hagerman Fossil Beds National Monument,42.790741,-114.944307,National Monument,ID,"During the Pliocene, this place looked quite d...",The Thousand Springs Visitor Center is located...,http://www.nps.gov/hafo/planyourvisit/directio...,"Hagerman, Idaho, gets 10 inches of rain per ye...",https://www.nps.gov/hafo/index.htm,"Auto and ATV, Scenic Driving, Food, Picnicking...","Native American Heritage, Westward Expansion, ...",https://www.nps.gov/common/uploads/structured_...,The Thousand Springs Visitor Center is the onl...,2089334105,hafo_information@nps.gov,"17970 U.S. Hwy 30, Hagerman, ID 83332","775 East 2830 South, Hagerman, ID 83332",The road leading to the Monument is a public r...,"wednesday: Sunrise to Sunset, monday: Sunrise ...",,,,,,,Gifts/Souvenirs/Books



Check for Missing Values
id                               0
name                             0
latitude                         1
longitude                        1
designation                     35
states                           0
description                      0
directionsInfo                   2
directionsUrl                    0
weatherInfo                      1
url                              0
activities                      10
topics                           8
image_url                        0
image_caption                    0
contact_phone                   16
contact_email                    0
physical_address                 0
mailing_address                  0
operating_hours_description      6
standard_hours                   5
entrance_fee_cost              360
entrance_fee_title             360
entrance_fee_description       360
entrance_pass_cost             379
entrance_pass_title            379
entrance_pass_description      379
amenities                    

## Inspect Data (States)

In [24]:
# Print dataframe head
print('First five rows of dataframe')
display(states.head())
print()
    
# Print dataframe sample
print('Random five rows of dataframe')
display(states.sample(5))
print()
    
# Check for missing values
print('Check for Missing Values')
print(states.isna().sum())
print()

# Check data types
print('Check Data Types')
print(states.info())
print()

# Check values for each column
print('Describe Dataframe')
print(states.describe(include = 'all'))
print()
    
# Check for duplicates
print('Count of Duplicated Rows')
print(states.duplicated().sum())
print()

# Check for leading/trailing whitespace in strings
cols = ['state&teritory', 'Name']  # string columns
for col in cols:
    if col in states.columns:
        # Convert to string just in case, then check
        has_ws = states[col].astype(str).apply(lambda x: x != x.strip())
        count = has_ws.sum()
        if count > 0:
            print(f"Column '{col}' has {count} rows with leading/trailing whitespace.")
        else: print(f"There is no leading/trailing whitespace in {col}.")  

First five rows of dataframe


Unnamed: 0,state&teritory,latitude,longitude,Name
0,AK,63.588753,-154.493062,Alaska
1,AL,32.318231,-86.902298,Alabama
2,AR,35.20105,-91.831833,Arkansas
3,AZ,34.048928,-111.093731,Arizona
4,CA,36.778261,-119.417932,California



Random five rows of dataframe


Unnamed: 0,state&teritory,latitude,longitude,Name
44,TX,31.968599,-99.901813,Texas
4,CA,36.778261,-119.417932,California
53,DC,38.942142,-77.025955,District of Columbia
37,OR,43.804133,-120.554201,Oregon
35,OH,40.417287,-82.907123,Ohio



Check for Missing Values
state&teritory    0
latitude          0
longitude         0
Name              0
dtype: int64

Check Data Types
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 61 entries, 0 to 60
Data columns (total 4 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   state&teritory  61 non-null     object 
 1   latitude        61 non-null     float64
 2   longitude       61 non-null     float64
 3   Name            61 non-null     object 
dtypes: float64(2), object(2)
memory usage: 2.0+ KB
None

Describe Dataframe
       state&teritory   latitude   longitude    Name
count              61  61.000000   61.000000      61
unique             59        NaN         NaN      61
top                DC        NaN         NaN  Alaska
freq                2        NaN         NaN       1
mean              NaN  34.065078  -71.870531     NaN
std               NaN  17.899831   71.809011     NaN
min               NaN -66.105720 -170.13221

### 2.6. Observations

#### 2.6.1. Parks

- There are a few missing values - if we want to include those values on our map, these should be removed. These are latitude, longitude, designation, directionsInfo, weatherInfo, activities, topics, contact_phone, operating_hours_description, standard_hours, entrance_fee_cost, and entrance_fee_title
- Datatypes are all correct
- There is no leading or trailing whitespace to strip
- Activities between parks have a lot of overlap (431 unique vs. 474 parks!) and need to be streamlined/categorized
- The instructions want a maximum of 9 geographical locations, so we will have to trim a subset of this data to use
- We also have to remove National Parks not accessible by roads or extremely far away (AK, HI, PR, AS, and any other island parks)

#### 2.6.2. States

- The header `state&teritory` is a bit sloppy and also misspelled and will be changed to `abbreviation`, as well as making `Name` lowercase
- Otherwise, everything appears fine

## 3. Data Processing and Visualization

### 3.1. Clean Data

#### 3.1.1. Parks

- Remove rows with missing coordinates and other important information
- Remove undesired parks (we have 474, probably way too many)
- Remove parks too far for a roadtrip or not accessible by land (AK, HI, PR, AS, and any other island parks)
- Create new column - tuple of latitude and longitude

In [25]:
# Drop rows with missing coordinates
parks = parks.dropna(subset=['latitude', 'longitude'])

# Drop parks in Alaska, Hawaii, Puerto Rico, American Samoa, St. Croix, Virgin Islands, and not accessible by car (Dry Tortugas)
exclude_states = ['AK', 'HI', 'PR', 'AS', 'SC', 'VI']
parks = parks[~parks['states'].str.contains('|'.join(exclude_states))]
parks = parks[parks['name'].str.strip() != 'Dry Tortugas National Park']

# Create new coordinates column (tuple of latitude and longitude) for each park
parks['coordinates'] = list(zip(parks['latitude'], parks['longitude']))
print(parks['coordinates'].head(5))

# Drop parks with missing fields that will be in the dashboard



0     (37.5858662, -85.67330523)
1        (44.409286, -68.247501)
2     (42.2553961, -71.01160356)
3             (38.9166, -77.026)
4    (40.71452681, -74.00447358)
Name: coordinates, dtype: object



#### 3.1.2. States

- Update column header labels
- Create new column - tuple of latitude and longitude

In [26]:
states.rename(columns={'state&teritory': 'abbreviation', 'Name': 'name'}, inplace=True)
print(states.columns)
print()

states['coordinates'] = list(zip(states['latitude'], states['longitude']))
print(states['coordinates'].head(5))

print(states.head(5))

Index(['abbreviation', 'latitude', 'longitude', 'name'], dtype='object')

0    (63.588753, -154.493062)
1     (32.318231, -86.902298)
2      (35.20105, -91.831833)
3    (34.048928, -111.093731)
4    (36.778261, -119.417932)
Name: coordinates, dtype: object
  abbreviation   latitude   longitude        name               coordinates
0           AK  63.588753 -154.493062      Alaska  (63.588753, -154.493062)
1           AL  32.318231  -86.902298     Alabama   (32.318231, -86.902298)
2           AR  35.201050  -91.831833    Arkansas    (35.20105, -91.831833)
3           AZ  34.048928 -111.093731     Arizona  (34.048928, -111.093731)
4           CA  36.778261 -119.417932  California  (36.778261, -119.417932)


### 3.2. Map of National Parks in the Contiguous United States

In [27]:
fig = fl.Figure(width=1200, height=800)
m = fl.Map(location=[39.8283, -98.5795], zoom_start=4)
fig.add_child(m)

for _, row in parks.iterrows():
    fl.Marker(
        location=[row['latitude'], row['longitude']],
        popup=row['name']
    ).add_to(m)

fig

### 3.3. Select Parks to Include for Routing Algorithm

In [None]:
"""
1. Grand Canyon National Park (Arizona)
🔥 One of the most iconic parks globally

🧭 Over 4.7 million annual visitors

🚩 Known for its immense canyon carved by the Colorado River

2. Zion National Park (Utah)
⛰️ Towering red cliffs and narrow slot canyons

🚶‍♂️ Popular hikes: Angel’s Landing, The Narrows

👥 Over 4.6 million visitors

3. Yosemite National Park (California)
🏞️ Famous for waterfalls, granite cliffs (El Capitan, Half Dome)

🌲 Dense forested valleys

👥 Over 3.5 million visitors annually

4. Yellowstone National Park (Wyoming, Montana, Idaho)
🌋 Home to geysers (Old Faithful), hot springs, and bison

📅 First national park in the world

👥 ~4 million annual visitors

5. Rocky Mountain National Park (Colorado)
🏔️ High alpine terrain and Trail Ridge Road

❄️ Extremely popular for hiking and wildlife

👥 Over 4 million visitors

6. Grand Teton National Park (Wyoming)
🏞️ Jaw-dropping mountain ranges and lakes

📸 Often paired with Yellowstone by travelers

👥 ~3.4 million visitors

7. Sequoia & Kings Canyon National Parks (California)
🌲 Home to giant sequoia trees, including General Sherman

🏞️ Deep canyons and alpine wilderness

👥 Popular for nature lovers and family roadtrips

8. Arches National Park (Utah)
🪨 Over 2,000 natural stone arches

☀️ Popular for photography and short hikes

👥 ~1.5 million visitors

9. Mount Rainier National Park (Washington)
❄️ Dominated by the glaciated peak of Mount Rainier

🌼 Famous wildflower meadows and hiking trails

👥 ~1.6 million visitors
"""

# Drop irrelevant fields
relevant_fields = ['name', 'latitude', 'longitude', 'description', 'url', 'image_url']
parks_master = parks[relevant_fields]

# Trim down to 9 parks x 3 regions - sarting with WEST
listofparks_w = ['Grand Canyon National Park', 
                 'Zion National Park', 
                 'Yosemite National Park', 
                 'Yellowstone National Park', 
                 'Rocky Mountain National Park', 
                 'Grand Teton National Park', 
                 'Sequoia & Kings Canyon National Parks',
                 'Arches National Park',
                 'Mount Rainier National Park']
parks_w = parks_master[parks_master['name'].isin(listofparks_w)].reset_index(drop=True)
print(parks_w)

# Create JSON of parks_w
df.to_json('../_____/contexts/parks_w.json', orient='records', indent=2)

                                    name   latitude   longitude  \
0                   Arches National Park  38.722618 -109.586367   
1             Grand Canyon National Park  36.000117 -112.121516   
2              Grand Teton National Park  43.818536 -110.705467   
3            Mount Rainier National Park  46.860754 -121.704388   
4           Rocky Mountain National Park  40.355692 -105.697288   
5  Sequoia & Kings Canyon National Parks  36.712773 -118.587429   
6              Yellowstone National Park  44.598244 -110.547169   
7                 Yosemite National Park  37.848833 -119.557187   
8                     Zion National Park  37.298393 -113.026514   

                                         description  \
0  Discover a landscape of contrasting colors, la...   
1  Entirely within the state of Arizona, the park...   
2  Soaring over a landscape rich with wildlife, p...   
3  Ascending to 14,410 feet above sea level, Moun...   
4  Rocky Mountain National Park's 415 square mile

Route Algorithm Notes about Variables

- West 9 parks (start with these)
- Central 9 parks
- East 9 parks

- Default home_state is CA

- `states` dataframe with state abbreviations including center coordinates as a tuple - add W/C/E category

- `parks` dataframe with ALL park data including coordinates as a tuple

- `parks_w` dataframe of subset of 9 west selected parks to feature for the custom route
- `parks_c` 
- `parks_e` 

- `parks_w.json` JSON object of parks_w dataframe used by SE to pull fields

- `array_w` np.array containing origins, destinations, travel distance (mi), and travel time (hrs)
- `array_c` 
- `array_e`

- `route_w` list of indices of the order of visiting the 9 parks
- `route_c`
- `route_e`





## 4. Route Algorithm

### 4.1. Baseline Model (Random Chance)

In [41]:
# Create model that randomly chooses next destination

# Create origin dictionary using parks_w
keys_w = parks_w['name']

values_w = [(parks_w.loc[parks_w['name'] == name, 'latitude'].values[0],  # clean this up to take the coordinate tuple directly
           parks_w.loc[parks_w['name'] == name, 'longitude'].values[0])
          for name in keys_w]

origins_w = dict(zip(keys_w, values_w))

def dummy_route(n, start=0):
    """
    Given n parks and a starting location start, creates a random route
    """
    # Create a list of all indices
    indices = list(range(n))
    
    # Remove the starting point and shuffle the rest
    indices.remove(start)
    random.shuffle(indices)
    
    # Add the start back at the beginning
    route = [start] + indices
    
    return route

# Testing the function on 9 parks
# Timing the route calculation
start = time.time()

random_route_w = dummy_route(9)

end = time.time()
elapsed = (end - start)

print(f'The randomly generated route is {random_route_w} and took {elapsed:.0f} seconds to calculate.')

The randomly generated route is [0, 4, 6, 2, 1, 8, 7, 3, 5] and took 0 seconds to calculate.


### 4.2. Create Google Maps Travel Data Arrays

These must be created for each set of parks ahead of time to avoid calling the API again (too slow for a dynamic map):
- 9 parks
- Pre-set trips

In [44]:
# Initialize Google Maps API client
api_key_g = 'AIzaSyBsZE5PsKrO7cQP1vUILx4j9HMCdPK3x_g'
gmaps = googlemaps.Client(key=api_key_g)

# Example park coordinates for testing
# origins = {'Yellowstone': (44.4280, -110.5885)}      # Yellowstone
# destinations = {'Yosemite': (37.8651, -119.5383)} # Yosemite

# Create origin dictionary using parks_w
keys = parks_w['name']

values_w = [(parks_w.loc[parks_w['name'] == name, 'latitude'].values[0],        # redundant/cleanup to get coordinates tuple directly
           parks_w.loc[parks_w['name'] == name, 'longitude'].values[0])
          for name in keys]

# To compute trips from A to B
origins_w = dict(zip(keys, values))

# To compute trips from B to A
destinations_w = origins_w

# Initialize distance matrix
travel_array_w = np.zeros((len(origins_w) * (len(destinations_w) - 1), 4), dtype = object)

# Loop through all dictionary items
i = 0
for origin_name, origin_coords in origins_w.items():
    for dest_name, dest_coords in destinations_w.items():
        # Skip pairs where origin = destination (distance 0)
        if origin_name == dest_name:
            continue
        else:
            # Travel data from origin to destination
            result = gmaps.distance_matrix(origins=[origin_coords], destinations=[dest_coords], mode='driving')
            distance_meters = result['rows'][0]['elements'][0]['distance']['value']
            duration_seconds = result['rows'][0]['elements'][0]['duration']['value']

            travel[i, 0] = origin_name
            travel[i, 1] = dest_name
            travel[i, 2] = np.round((distance_meters / 1609.344), 2)
            travel[i, 3] = np.round((duration_seconds / 3600), 2)

            i += 1

            print(f"{origin_name} → {dest_name} = {np.round((distance_meters/1609.344),2)} mi ({np.round((duration_seconds/3600),2)} hrs)") # convert to miles and hours

            time.sleep(1)  # Pause to avoid API rate limits

# Print array
print("\nTravel Matrix:")
print(travel)

# Save array
np.save('../data/arrays/travel_array_w.npy', travel)

Arches National Park → Grand Canyon National Park = 1780.64 mi (27.1 hrs)
Arches National Park → Grand Teton National Park = 1151.26 mi (17.73 hrs)
Arches National Park → Mount Rainier National Park = 2243.87 mi (34.74 hrs)
Arches National Park → Rocky Mountain National Park = 1712.32 mi (25.41 hrs)
Arches National Park → Sequoia & Kings Canyon National Parks = 1130.76 mi (17.12 hrs)
Arches National Park → Yellowstone National Park = 2243.87 mi (34.49 hrs)
Arches National Park → Yosemite National Park = 1159.08 mi (17.76 hrs)
Arches National Park → Zion National Park = 1059.83 mi (16.68 hrs)
Grand Canyon National Park → Arches National Park = 1780.6 mi (27.0 hrs)
Grand Canyon National Park → Grand Teton National Park = 2080.83 mi (30.14 hrs)
Grand Canyon National Park → Mount Rainier National Park = 924.12 mi (14.4 hrs)
Grand Canyon National Park → Rocky Mountain National Park = 313.58 mi (5.33 hrs)
Grand Canyon National Park → Sequoia & Kings Canyon National Parks = 2117.7 mi (31.05 h

### 4.3. Create Matrix for Greedy Nearest Neighbors Algorithm

In [46]:
# Create new dataframe with just park names and coordinates
parks_gnn_w = parks_w[['name', 'latitude', 'longitude']].reset_index(drop=True)
print(parks_gnn_w)

                                    name   latitude   longitude
0                   Arches National Park  38.722618 -109.586367
1             Grand Canyon National Park  36.000117 -112.121516
2              Grand Teton National Park  43.818536 -110.705467
3            Mount Rainier National Park  46.860754 -121.704388
4           Rocky Mountain National Park  40.355692 -105.697288
5  Sequoia & Kings Canyon National Parks  36.712773 -118.587429
6              Yellowstone National Park  44.598244 -110.547169
7                 Yosemite National Park  37.848833 -119.557187
8                     Zion National Park  37.298393 -113.026514


### 4.4. Define Vacation Starting Point

In [None]:
# Convert to function

# Start at index 0 (first park) if not specified
# start_index = 0 # Default if none specified

# Specify traveler's home state and lookup coordinates
home_state = 'NY'    # Specified on dashboard (SE)
home_state_coords = states.loc[states['abbreviation'] == home_state, 'coordinates'].values[0]
print(f'The user\'s home state is {home_state} and their starting coordinates for travel are {home_state_coords}.')



# Function version


def determineStartingPoint(home_state):
    home_state_coords = states.loc[states['abbreviation'] == home_state, 'coordinates'].values[0]
    return starting_point

The user's home state is NY and their starting coordinates for travel are (43.299428, -74.217933).


### 4.5. Greedy Nearest Neighbors Algorithm

In [33]:
# Set up GNN matrix
park_list = list(origins.keys())
park_indices = {name: idx for idx, name in enumerate(park_list)}
n = len(origins)
gnn_matrix = np.full((n, n), np.inf)  # fill with inf to start

# Populate matrix from flat `travel` array
for row in travel:
    origin, destination, dist_km, _ = row
    i = park_indices[origin]
    j = park_indices[destination]
    gnn_matrix[i][j] = dist_km

# Greedy Nearest Neighbor function
def greedy_nearest_neighbor(gnn_matrix, start=0):
    n = gnn_matrix.shape[0]
    visited = [False] * n
    route = [start] # 0 or state-specific coordinate
    visited[start] = True

    current = start
    for _ in range(n-1):
        # Find nearest unvisited park
        distances = gnn_matrix[current]
        nearest = None
        nearest_dist = float('inf')
        for i in range(n):
            if not visited[i] and distances[i] < nearest_dist:
                nearest = i
                nearest_dist = distances[i]

        route.append(nearest)
        visited[nearest] = True
        current = nearest

    return route

# Example usage:
# Assume travel matrix is your NxN numpy array

# Time the route calculation
start = time.time()

route = greedy_nearest_neighbor(gnn_matrix, start=0)

end = time.time()
elapsed = end - start

print(f'The algorithm generated route is {route} and took {elapsed:.0f} seconds to calculate.')

The algorithm generated route is [0, 8, 4, 1, 6, 3, 5, 7, 2] and took 0 seconds to calculate.


### 4.6. Visualization of Routes

In [43]:
def plot_route_on_map(route_indices, park_coords, park_names):
    """
    route_indices: list of indices in visiting order
    park_coords: list of (lat, lon) tuples, same order as park_names
    park_names: list of park names
    """
    
    # Start map centered on the United States
    start_lat, start_lon = park_coords[route_indices[0]]
    m = fl.Map(location=[39.8283, -98.5795], zoom_start=4)

    # Plot markers and lines
    route_points = []
    for idx in route_indices:
        lat, lon = park_coords[idx]
        fl.Marker(
            location=[lat, lon],
            popup=park_names[idx],
            tooltip=park_names[idx]
        ).add_to(m)
        route_points.append((lat, lon))

    # Draw lines connecting parks in order of algorithm route (blue)
    fl.PolyLine(route_points, color='blue', weight=3, opacity=0.5).add_to(m)
    
    return m

# Run function to generate map
park_coords = list(origins_w.values())  # your parks lat/lon in order
park_names = list(origins_w.keys()) # names of origin parks

m = plot_route_on_map(route, park_coords, park_names)
m.save("../routes/route_map.html")  # Save to HTML file
m

# Draw lines connecting parks in order of random route (red)
# Get park names from indices
random_route_park_names = [park_list[i] for i in random_route_w]
# Get coordinates from indices
random_route_coords_w = [park_coords[i] for i in random_route_w]
fl.PolyLine(random_route_coords_w, color='red', weight=3, opacity=0.5).add_to(m)

fig = fl.Figure(width=1200, height=800)
fig.add_child(m)
fig

### 4.7. Trip Information Tables

In [35]:
# Creating a more logical display table of stops, travel distance and time per stop, and total travel distance and time
# This can be displayed in a window near the map on the dashboard

# Convert travel matrix into df
travel_df = pd.DataFrame(travel, columns=['Origin', 'Destination', 'Distance_mi', 'Duration_hr'])

# Combine into a function
def build_route_table_with_totals(route, park_list, travel_df):
    route_info = []
    total_distance = 0
    total_duration = 0

    for i in range(len(route) - 1):
        origin = park_list[route[i]]
        destination = park_list[route[i + 1]]

        # Look up distance and duration
        row = travel_df[(travel_df['Origin'] == origin) & (travel_df['Destination'] == destination)]

        if not row.empty:
            distance = row['Distance_mi'].values[0]
            duration = row['Duration_hr'].values[0]
            total_distance += distance
            total_duration += duration
        else:
            distance = None
            duration = None

        route_info.append({
            'From': origin,
            'To': destination,
            'Distance (mi)': distance,
            'Duration (hr)': duration
        })

    # Append total row
    route_info.append({
        'From': 'Totals',
        'To': '',
        'Distance (mi)': round(total_distance, 2),
        'Duration (hr)': round(total_duration, 2)
    })

    return pd.DataFrame(route_info)

# Display in an easy to read format
def display_route_table(table, title="Route Summary"):
    print(f"\n\033[1m{title}\033[0m")  # Bold title
    print(tabulate(
        table,
        headers="keys",
        tablefmt="fancy_grid",
        floatfmt=".2f"
    ))

# Build the route table using dummy route
route_table_random = build_route_table_with_totals(random_route, park_list, travel_df)

# Build the route table using algorithm route
route_table = build_route_table_with_totals(route, park_list, travel_df)

# Display tables
display_route_table(route_table_random)
display_route_table(route_table)


[1mRoute Summary[0m
╒════╤════════════════════════════════════════════════╤════════════════════════════════════════════════╤═════════════════╤═════════════════╕
│    │ From                                           │ To                                             │   Distance (mi) │   Duration (hr) │
╞════╪════════════════════════════════════════════════╪════════════════════════════════════════════════╪═════════════════╪═════════════════╡
│  0 │ Keweenaw National Historical Park              │ Hubbell Trading Post National Historic Site    │         1780.64 │           27.10 │
├────┼────────────────────────────────────────────────┼────────────────────────────────────────────────┼─────────────────┼─────────────────┤
│  1 │ Hubbell Trading Post National Historic Site    │ African Burial Ground National Monument        │         2182.30 │           32.20 │
├────┼────────────────────────────────────────────────┼────────────────────────────────────────────────┼─────────────────┼─────────

### 4.8. Other Visualizations

In [36]:
# Bar plot of travel distances, travel times
