# National Park Vacation Route Tool

## 1. Data

### 1.1. Data Sources

- Data was obtained from the National Park Service (https://www.nps.gov/index.htm) using their API (https://www.nps.gov/subjects/developer/api-documentation.htm).
- United States Latitude and Longitude Data was obtained from Kaggle (https://www.kaggle.com/datasets/tennerimaheshwar/us-state-and-territory-latitude-and-longitude-data)

## 2. Import External Libraries and Data

### 2.1. Import Libraries

In [15]:
# JSON requests
import requests
import json

# Data manipulation
import numpy as np
import pandas as pd

# Displaying plots and maps
import seaborn as sns
import matplotlib.pyplot as plt
import folium as fl # change if SWE need

# Dummy and greedy nearest neighbors algorithm requirements
import math
import googlemaps
import random

# Timing the algorithm
import time

# Neatly display dataframes
from tabulate import tabulate

# Display all fields with pandas
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

# Ignore warnings
import warnings
warnings.filterwarnings('ignore')

### 2.2. Inspect Raw Data JSON Response from NPS API

In [16]:
# Inspect full JSON response of the first park to determine fields we need and data structure
api_key = "fpyJ9NycrgZX5mK8f0n90c4qXGPcYAsBPwt4BLJk"
url = "https://developer.nps.gov/api/v1/parks"

params = {
    "limit": 1,
    "start": 0,
    "api_key": api_key
}

response = requests.get(url, params=params)
data = response.json()

# Print the full JSON response of the first park
print(json.dumps(data["data"][0], indent=2))

{
  "id": "77E0D7F0-1942-494A-ACE2-9004D2BDC59E",
  "url": "https://www.nps.gov/abli/index.htm",
  "fullName": "Abraham Lincoln Birthplace National Historical Park",
  "parkCode": "abli",
  "description": "For over a century people from around the world have come to rural Central Kentucky to honor the humble beginnings of our 16th president, Abraham Lincoln. His early life on Kentucky's frontier shaped his character and prepared him to lead the nation through Civil War. Visit our country's first memorial to Lincoln, built with donations from young and old, and the site of his childhood home.",
  "latitude": "37.5858662",
  "longitude": "-85.67330523",
  "latLong": "lat:37.5858662, long:-85.67330523",
  "activities": [
    {
      "id": "13A57703-BB1A-41A2-94B8-53B692EB7238",
      "name": "Astronomy"
    },
    {
      "id": "D37A0003-8317-4F04-8FB0-4CF0A272E195",
      "name": "Stargazing"
    },
    {
      "id": "1DFACD97-1B9C-4F5A-80F2-05593604799E",
      "name": "Food"
    },
   

### 2.3. Get Park Data with National Park Service API

In [17]:
# Get NPS park data
# Run from fetch_nps_data.py
# %run ../scripts/fetch_nps_data.py

### 2.4. Import Generated Parks CSV and State Coordinates CSVs to Dataframe

In [18]:
parks = pd.read_csv('../data/parks.csv')
states = pd.read_csv('../data/states.csv')

### 2.5. Inspect Data (Parks)

In [19]:
# Print dataframe head
print('First five rows of dataframe')
display(parks.head())
print()
    
# Print dataframe sample
print('Random five rows of dataframe')
display(parks.sample(5))
print()
    
# Check for missing values (make sure amenities is included)
print('Check for Missing Values')
print(parks.isna().sum())
print()

# Check data types
print('Check Data Types')
print(parks.info())
print()

# Check values for each column
print('Describe Dataframe')
print(parks.describe(include = 'all'))
print()
    
# Check for duplicates
print('Count of Duplicated Rows')
print(parks.duplicated().sum())
print()

# Check for leading/trailing whitespace in strings
cols = ['name', 'designation', 'states', 'description', 'activities']   # string columns
for col in cols:
    if col in parks.columns:
        # Convert to string just in case, then check
        has_ws = parks[col].astype(str).apply(lambda x: x != x.strip())
        count = has_ws.sum()
        if count > 0:
            print(f"Column '{col}' has {count} rows with leading/trailing whitespace.")
        else: print(f"There is no leading/trailing whitespace in {col}.")  
print()

# Number of unique activities (important for web interface)
print(f'Unique Activities: {parks['activities'].nunique()}')
print()

# Number of unique amenities (important for web interface)
print(f'Unique Amenities: {parks['amenities'].nunique()}')

First five rows of dataframe


Unnamed: 0,id,name,latitude,longitude,designation,states,description,directionsInfo,directionsUrl,weatherInfo,url,activities,topics,image_url,image_caption,contact_phone,contact_email,physical_address,mailing_address,operating_hours_description,standard_hours,entrance_fee_cost,entrance_fee_title,entrance_fee_description,entrance_pass_cost,entrance_pass_title,entrance_pass_description,amenities
0,77E0D7F0-1942-494A-ACE2-9004D2BDC59E,Abraham Lincoln Birthplace National Historical...,37.585866,-85.673305,National Historical Park,KY,For over a century people from around the worl...,The Birthplace Unit of the park is located app...,http://www.nps.gov/abli/planyourvisit/directio...,There are four distinct seasons in Central Ken...,https://www.nps.gov/abli/index.htm,"Astronomy, Stargazing, Food, Picnicking, Guide...","Birthplace, Presidents, Animals, Birds, Caves,...",https://www.nps.gov/common/uploads/structured_...,"Over 200,000 people a year come to walk up the...",2703583137,ABLI_Administration@nps.gov,"2995 Lincoln Farm Road, Hodgenville, KY 42748","2995 Lincoln Farm Road, Hodgenville, KY 42748",Memorial Building:\nopen 9:00 am - 4:30 pm eas...,"wednesday: 9:00AM - 5:00PM, monday: 9:00AM - 5...",,,,,,,Accessible Rooms
1,6DA17C86-088E-4B4D-B862-7C1BD5CF236B,Acadia National Park,44.409286,-68.247501,National Park,ME,Acadia National Park protects the natural beau...,"From Boston take I-95 north to Augusta, Maine,...",http://www.nps.gov/acad/planyourvisit/directio...,"Located on Mount Desert Island in Maine, Acadi...",https://www.nps.gov/acad/index.htm,"Arts and Culture, Cultural Demonstrations, Ast...","Arts, Painting, Photography, Poetry and Litera...",https://www.nps.gov/common/uploads/structured_...,As the tallest point on the eastern seaboard C...,2072883338,acadia_information@nps.gov,"25 Visitor Center Road, Bar Harbor, ME 04609","PO Box 177, Bar Harbor, ME 04609",Acadia National Park is open year-round. Check...,"wednesday: All Day, monday: All Day, thursday:...",6.0,Timed Entry Reservation - Location,Vehicle reservations are not required for any ...,70.0,Annual Entrance - Park,The Acadia Annual Pass is valid only at Acadia...,Accessible Rooms
2,E4C7784E-66A0-4D44-87D0-3E072F5FEF43,Adams National Historical Park,42.255396,-71.011604,National Historical Park,MA,From the sweet little farm at the foot of Penn...,"Traveling on U.S. Interstate 93, take exit 7 -...",http://www.nps.gov/adam/planyourvisit/directio...,"Be prepared for hot, humid weather. The histor...",https://www.nps.gov/adam/index.htm,"Guided Tours, Self-Guided Tours - Walking, Liv...","American Revolution, Birthplace, Colonization ...",https://www.nps.gov/common/uploads/structured_...,The Birthplaces of John and John Quincy Adams ...,617-770-1175,ADAM_Visitor_Center@nps.gov,"1250 Hancock St., Quincy, MA 02169","135 Adams Street, Quincy, MA 02169",The Visitor Center is open 9:00 am to 5:00 pm ...,"wednesday: 9:00AM - 5:00PM, monday: Closed, th...",15.0,Entrance - Per Person,Entrance into the historic homes at Adams Nati...,45.0,Annual Entrance - Park,Adams National Historical Park has a digital a...,Historical/Interpretive Information/Exhibits
3,1A47416F-DAA3-4137-9F30-14AF86B4E547,African American Civil War Memorial,38.9166,-77.026,,DC,"Over 200,000 African-American soldiers and sai...",The memorial is located at the corner of Vermo...,http://www.nps.gov/afam/planyourvisit/directio...,Washington DC gets to see all four seasons. Hu...,https://www.nps.gov/afam/index.htm,"Guided Tours, Self-Guided Tours - Walking","African American Heritage, Monuments and Memor...",https://www.nps.gov/common/uploads/structured_...,A poignant reminder of our nations past,2024266841,national_mall@nps.gov,"1925 Vermont Avenue Northwest, Washington, DC ...","1100 Ohio Drive SW, Washington, DC 20242",The African American Civil War Memorial is alw...,"wednesday: All Day, monday: All Day, thursday:...",,,,,,,Historical/Interpretive Information/Exhibits
4,E6E1D22A-7A89-47F8-813C-B611059A8CF9,African Burial Ground National Monument,40.714527,-74.004474,National Monument,NY,The African Burial Ground is the oldest and la...,The African Burial Ground National Monument is...,http://www.nps.gov/afbg/planyourvisit/directio...,http://forecast.weather.gov/MapClick.php?CityN...,https://www.nps.gov/afbg/index.htm,"Arts and Culture, Guided Tours, Junior Ranger ...","African American Heritage, Archeology, Burial,...",https://www.nps.gov/common/uploads/structured_...,African Burial Ground Memorial,2122384367,african_burial_ground@nps.gov,"African Burial Ground National Monument, New Y...","African Burial Ground NM, New York, NY 10005",INDOOR VISITOR CENTER AND MUSEUM,"wednesday: 10:00AM - 4:00PM, monday: Closed, t...",,,,,,,



Random five rows of dataframe


Unnamed: 0,id,name,latitude,longitude,designation,states,description,directionsInfo,directionsUrl,weatherInfo,url,activities,topics,image_url,image_caption,contact_phone,contact_email,physical_address,mailing_address,operating_hours_description,standard_hours,entrance_fee_cost,entrance_fee_title,entrance_fee_description,entrance_pass_cost,entrance_pass_title,entrance_pass_description,amenities
471,9854D136-AFC0-4966-BB40-FE9323B56A49,Yucca House National Monument,37.247789,-108.686127,National Monument,CO,Yucca House National Monument preserves a larg...,"From Cortez, take Hwy. 491 south approximately...",http://www.nps.gov/yuho/planyourvisit/directio...,Spring and Fall are mild with daytime temperat...,https://www.nps.gov/yuho/index.htm,"Guided Tours, Self-Guided Tours - Walking","Archeology, Native American Heritage, Animals",https://www.nps.gov/common/uploads/structured_...,Cattails define marshy locations watered by sp...,970-529-4465,meve_general_information@nps.gov,"Off County Rd 20.5, Cortez, CO 81321","PO Box 8, Mesa Verde, CO 81330","Open year-round, weather pending. No services ...","wednesday: All Day, monday: All Day, thursday:...",,,,,,,Parking - Auto
278,3725FD28-4693-481E-888C-855484E73800,Lower East Side Tenement Museum National Histo...,40.718768,-73.990012,National Historic Site,NY,The Tenement Museum tells the stories of worki...,"The Lower East Side Tenement Museum, is locate...",https://www.tenement.org/plan-a-visit/#,Check out the 10 day forecast for New York City!,https://www.nps.gov/loea/index.htm,"Arts and Culture, Guided Tours, Living History...","Architecture and Building, African American He...",https://www.nps.gov/common/uploads/structured_...,Inside the Levine Parlor,8779753786,lestm@tenement.org,"103 Orchard Street, New York, NY 10002","103 Orchard Street, New York, NY 10002",Visit the Tenement Museum virtually or in pers...,"wednesday: 10:00AM - 5:30PM, monday: 10:00AM -...",30.0,Entrance - Per Person,The Museum is only accessible by guided teneme...,,,,
344,58BCDB2B-5A91-4892-8470-76D802313884,Pea Ridge National Military Park,36.45438,-94.034684,National Military Park,AR,"On March 7-8, 1862, over 23,000 soldiers fough...","The entrance road is located on Highway 62, 1....",http://www.nps.gov/peri/planyourvisit/directio...,Pea Ridge National Military Park is located in...,https://www.nps.gov/peri/index.htm,"Biking, Road Biking, Guided Tours, Self-Guided...","Military, Infantry and Militia, Artillery, Cav...",https://www.nps.gov/common/uploads/structured_...,Used as a field hospital during the Battle of ...,479-451-8122,PERI_Interpretation@nps.gov,"15930 National Park Drive, Garfield, AR 72732","15930 National Park Drive, Garfield, AR 72732",Pea Ridge National Military Park The battlefie...,"wednesday: 6:00AM - Sunset, monday: 6:00AM - S...",,,,,,,Accessible Rooms
329,4BE453C2-6527-484F-90D6-98057901EBDF,North Country National Scenic Trail,46.334401,-90.816563,National Scenic Trail,"MI,MN,ND,NY,OH,PA,VT,WI",Come to the North Country. Trek the hills and ...,There are numerous trailheads to access the tr...,http://www.nps.gov/noco/planyourvisit/directio...,"With the trail going through more than 4,800 m...",https://www.nps.gov/noco/index.htm,"Biking, Mountain Biking, Camping, Backcountry ...","Colonization and Settlement, Explorers and Exp...",https://www.nps.gov/common/uploads/structured_...,"The trail links scenic, natural, historic, and...",6163197906,NOCO_Administration@nps.gov,"318 East Main Street, Suite K, Lowell, MI 49331","318 East Main Street, Suite K, Lowell, MI 49331",The North Country Trail is generally open at a...,"wednesday: All Day, monday: All Day, thursday:...",,,,,,,
407,F22ED7C4-DDB8-43F8-AAF2-D474864352DA,Star-Spangled Banner National Historic Trail,39.287251,-76.603432,National Historic Trail,"MD,VA,DC",The Star-Spangled Banner National Historic Tra...,The Trail's headquarters is at Fort McHenry Na...,https://www.nps.gov/stsp/planyourvisit/directi...,The Star-Spangled Banner National Historic Tra...,https://www.nps.gov/stsp/index.htm,"Arts and Culture, Cultural Demonstrations, Liv...","African American Heritage, Arts, Painting, Pho...",https://www.nps.gov/common/uploads/structured_...,The PRIDE OF BALTIMORE II is a sailing ambassa...,410.962.4290,Star-Spangled-Trail_info@nps.gov,"2400 East Fort Avenue, Baltimore, MD 21230","2400 East Fort Avenue, Baltimore, MD 21230",The Star-Spangled Banner National Historic Tra...,"wednesday: All Day, monday: All Day, thursday:...",,,,,,,Accessible Rooms



Check for Missing Values
id                               0
name                             0
latitude                         1
longitude                        1
designation                     35
states                           0
description                      0
directionsInfo                   2
directionsUrl                    0
weatherInfo                      1
url                              0
activities                      10
topics                           8
image_url                        0
image_caption                    0
contact_phone                   16
contact_email                    0
physical_address                 0
mailing_address                  0
operating_hours_description      6
standard_hours                   5
entrance_fee_cost              360
entrance_fee_title             360
entrance_fee_description       360
entrance_pass_cost             379
entrance_pass_title            379
entrance_pass_description      379
amenities                    

## Inspect Data (States)

In [20]:
# Print dataframe head
print('First five rows of dataframe')
display(states.head())
print()
    
# Print dataframe sample
print('Random five rows of dataframe')
display(states.sample(5))
print()
    
# Check for missing values
print('Check for Missing Values')
print(states.isna().sum())
print()

# Check data types
print('Check Data Types')
print(states.info())
print()

# Check values for each column
print('Describe Dataframe')
print(states.describe(include = 'all'))
print()
    
# Check for duplicates
print('Count of Duplicated Rows')
print(states.duplicated().sum())
print()

# Check for leading/trailing whitespace in strings
cols = ['state&teritory', 'Name']  # string columns
for col in cols:
    if col in states.columns:
        # Convert to string just in case, then check
        has_ws = states[col].astype(str).apply(lambda x: x != x.strip())
        count = has_ws.sum()
        if count > 0:
            print(f"Column '{col}' has {count} rows with leading/trailing whitespace.")
        else: print(f"There is no leading/trailing whitespace in {col}.")  

First five rows of dataframe


Unnamed: 0,state&teritory,latitude,longitude,Name
0,AK,63.588753,-154.493062,Alaska
1,AL,32.318231,-86.902298,Alabama
2,AR,35.20105,-91.831833,Arkansas
3,AZ,34.048928,-111.093731,Arizona
4,CA,36.778261,-119.417932,California



Random five rows of dataframe


Unnamed: 0,state&teritory,latitude,longitude,Name
21,ME,45.253783,-69.445469,Maine
42,SD,43.969515,-99.901813,South Dakota
33,NV,38.80261,-116.419389,Nevada
13,ID,44.068202,-114.742041,Idaho
58,PW,7.51498,134.58252,Palau



Check for Missing Values
state&teritory    0
latitude          0
longitude         0
Name              0
dtype: int64

Check Data Types
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 61 entries, 0 to 60
Data columns (total 4 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   state&teritory  61 non-null     object 
 1   latitude        61 non-null     float64
 2   longitude       61 non-null     float64
 3   Name            61 non-null     object 
dtypes: float64(2), object(2)
memory usage: 2.0+ KB
None

Describe Dataframe
       state&teritory   latitude   longitude    Name
count              61  61.000000   61.000000      61
unique             59        NaN         NaN      61
top                DC        NaN         NaN  Alaska
freq                2        NaN         NaN       1
mean              NaN  34.065078  -71.870531     NaN
std               NaN  17.899831   71.809011     NaN
min               NaN -66.105720 -170.13221

### 2.6. Observations

#### 2.6.1. Parks

- There are a few missing values - if we want to include those values on our map, these should be removed. These are latitude, longitude, designation, directionsInfo, weatherInfo, activities, topics, contact_phone, operating_hours_description, standard_hours, entrance_fee_cost, and entrance_fee_title
- Datatypes are all correct
- There is no leading or trailing whitespace to strip
- Activities between parks have a lot of overlap (431 unique vs. 474 parks!) and need to be streamlined/categorized
- The instructions want a maximum of 9 geographical locations, so we will have to trim a subset of this data to use
- We also have to remove National Parks not accessible by roads or extremely far away (AK, HI, PR, AS, and any other island parks)

#### 2.6.2. States

- The header `state&teritory` is a bit sloppy and also misspelled and will be changed to `abbreviation`, as well as making `Name` lowercase
- Otherwise, everything appears fine

## 3. Data Processing and Visualization

### 3.1. Clean Data

#### 3.1.1. Parks

- Remove rows with missing coordinates and other important information
- Remove undesired parks (we have 474, probably way too many)
- Remove parks too far for a roadtrip or not accessible by land (AK, HI, PR, AS, and any other island parks)
- Create new column - tuple of latitude and longitude

In [21]:
# Drop rows with missing coordinates
parks = parks.dropna(subset=['latitude', 'longitude'])

# Drop parks in Alaska, Hawaii, Puerto Rico, American Samoa, St. Croix, Virgin Islands, and not accessible by car (Dry Tortugas)
exclude_states = ['AK', 'HI', 'PR', 'AS', 'SC', 'VI']
parks = parks[~parks['states'].str.contains('|'.join(exclude_states))]
parks = parks[parks['name'].str.strip() != 'Dry Tortugas National Park']

# Create new coordinates column (tuple of latitude and longitude) for each park
parks['coordinates'] = list(zip(parks['latitude'], parks['longitude']))
print(parks['coordinates'].head(5))

# Drop parks with missing fields that will be in the dashboard



0     (37.5858662, -85.67330523)
1        (44.409286, -68.247501)
2     (42.2553961, -71.01160356)
3             (38.9166, -77.026)
4    (40.71452681, -74.00447358)
Name: coordinates, dtype: object



#### 3.1.2. States

- Update column header labels
- Create new column - tuple of latitude and longitude

In [22]:
states.rename(columns={'state&teritory': 'abbreviation', 'Name': 'name'}, inplace=True)
print(states.columns)
print()

states['coordinates'] = list(zip(states['latitude'], states['longitude']))
print(states['coordinates'].head(5))

print(states.head(5))

Index(['abbreviation', 'latitude', 'longitude', 'name'], dtype='object')

0    (63.588753, -154.493062)
1     (32.318231, -86.902298)
2      (35.20105, -91.831833)
3    (34.048928, -111.093731)
4    (36.778261, -119.417932)
Name: coordinates, dtype: object
  abbreviation   latitude   longitude        name               coordinates
0           AK  63.588753 -154.493062      Alaska  (63.588753, -154.493062)
1           AL  32.318231  -86.902298     Alabama   (32.318231, -86.902298)
2           AR  35.201050  -91.831833    Arkansas    (35.20105, -91.831833)
3           AZ  34.048928 -111.093731     Arizona  (34.048928, -111.093731)
4           CA  36.778261 -119.417932  California  (36.778261, -119.417932)


## Data Visualization

### Map of National Parks in the Contiguous United States

In [23]:
fig = fl.Figure(width=1200, height=800)
m = fl.Map(location=[39.8283, -98.5795], zoom_start=4)
fig.add_child(m)

for _, row in parks.iterrows():
    fl.Marker(
        location=[row['latitude'], row['longitude']],
        popup=row['name']
    ).add_to(m)

fig

### 3.2. Select Parks to Include for Routing Algorithm

In [24]:
# Trim down to 9 parks
# In this stage of the project you need to collect data on locations that will be included. 
# Each location should contain longitude and latitude as well as location name for later visualization. 
# We recommend to select at maximum 9 locations to find the optimal route in the reasonable time.

# selected_parks = 

## 4. Route Algorithm

### 4.1. Baseline Model (Random Chance)

In [None]:
# Create model that randomly chooses next destination

# Create origin dictionary using random sample of 9 parks
keys = parks['name'].sample(9, random_state=1)

values = [(parks.loc[parks['name'] == name, 'latitude'].values[0],  # clean this up to take the coordinate tuple directly
           parks.loc[parks['name'] == name, 'longitude'].values[0])
          for name in keys]

origins = dict(zip(keys, values))

def dummy_route(n, start=0):
    """
    Given n parks and a starting location start, creates a random route
    """
    # Create a list of all indices
    indices = list(range(n))
    
    # Remove the starting point and shuffle the rest
    indices.remove(start)
    random.shuffle(indices)
    
    # Add the start back at the beginning
    route = [start] + indices
    
    return route

# Testing the function on 9 parks
# Timing the route calculation
start = time.time()

random_route = dummy_route(9)

end = time.time()
elapsed = (end - start)

print(f'The randomly generated route is {random_route} and took {elapsed:.0f} seconds to calculate.')

The randomly generated route is [0, 6, 7, 1, 5, 8, 2, 4, 3] and took 0 seconds to calculate.


### 4.1. Create Google Maps Travel Data Matrix

In [None]:
# Initialize Google Maps API client
api_key_g = 'AIzaSyBsZE5PsKrO7cQP1vUILx4j9HMCdPK3x_g'
gmaps = googlemaps.Client(key=api_key_g)

# Example park coordinates for testing
# origins = {'Yellowstone': (44.4280, -110.5885)}      # Yellowstone
# destinations = {'Yosemite': (37.8651, -119.5383)} # Yosemite

# Create origin dictionary using random sample of 9 parks
keys = parks['name'].sample(9, random_state=1)

values = [(parks.loc[parks['name'] == name, 'latitude'].values[0],
           parks.loc[parks['name'] == name, 'longitude'].values[0])
          for name in keys]

# To compute trips from A to B
origins = dict(zip(keys, values))

# To compute trips from B to A
destinations = origins

# Initialize distance matrix
travel = np.zeros((len(origins) * (len(destinations) - 1), 4), dtype = object)

# Loop through all dictionary items
i = 0
for origin_name, origin_coords in origins.items():
    for dest_name, dest_coords in destinations.items():
        # Skip pairs where origin = destination (distance 0)
        if origin_name == dest_name:
            continue
        else:
            # Travel data from origin to destination
            result = gmaps.distance_matrix(origins=[origin_coords], destinations=[dest_coords], mode='driving')
            distance_meters = result['rows'][0]['elements'][0]['distance']['value']
            duration_seconds = result['rows'][0]['elements'][0]['duration']['value']

            travel[i, 0] = origin_name
            travel[i, 1] = dest_name
            travel[i, 2] = np.round((distance_meters / 1000), 2)
            travel[i, 3] = np.round((duration_seconds / 3600), 2)

            i += 1

            print(f"{origin_name} → {dest_name} = {np.round((distance_meters/1000),2)} km ({np.round((duration_seconds/3600),2)} hrs)")

            time.sleep(1)  # Pause to avoid API rate limits

# Print matrix
print("\nTravel Matrix:")
print(travel)

Keweenaw National Historical Park → Hubbell Trading Post National Historic Site = 2865.66 km (27.1 hrs)
Keweenaw National Historical Park → George Washington Birthplace National Monument = 1852.78 km (17.73 hrs)
Keweenaw National Historical Park → Muir Woods National Monument = 3611.16 km (34.74 hrs)
Keweenaw National Historical Park → Capitol Reef National Park = 2755.71 km (25.41 hrs)
Keweenaw National Historical Park → Gloria Dei Church National Historic Site = 1819.78 km (17.12 hrs)
Keweenaw National Historical Park → Presidio of San Francisco = 3611.15 km (34.49 hrs)
Keweenaw National Historical Park → African Burial Ground National Monument = 1865.36 km (17.76 hrs)
Keweenaw National Historical Park → Little Bighorn Battlefield National Monument = 1705.63 km (16.68 hrs)
Hubbell Trading Post National Historic Site → Keweenaw National Historical Park = 2865.6 km (27.0 hrs)
Hubbell Trading Post National Historic Site → George Washington Birthplace National Monument = 3348.78 km (30.1

### 4.2. Create Matrix for Greedy Nearest Neighbors Algorithm

In [27]:
# Create new dataframe with just park names and coordinates
parks_gnn = parks[['name', 'latitude', 'longitude']].sample(9, random_state=1).reset_index()
print(parks_gnn)

   index                                            name   latitude  \
0    253               Keweenaw National Historical Park  47.179231   
1    229     Hubbell Trading Post National Historic Site  35.707273   
2    181  George Washington Birthplace National Monument  38.193626   
3    307                    Muir Woods National Monument  37.896580   
4     71                      Capitol Reef National Park  38.282165   
5    190        Gloria Dei Church National Historic Site  39.934350   
6    363                       Presidio of San Francisco  37.797000   
7      4         African Burial Ground National Monument  40.714527   
8    272    Little Bighorn Battlefield National Monument  45.556330   

    longitude  
0  -88.522753  
1 -109.559951  
2  -76.920795  
3 -122.580805  
4 -111.247048  
5  -75.143989  
6 -122.467100  
7  -74.004474  
8 -107.418361  


### 4.3. Define Vacation Starting Point

In [28]:
# Start at index 0 but we may need to add custom starting options
# Choose state you live in to determine first park on trip
start_index = 0 # Default if none specified

# Specify traveler's home state
home_state = 'NY'    # Specified on dashboard (SE)
home_state_coords = states.loc[states['abbreviation'] == home_state, 'coordinates'].values[0]
print(f'The user\'s home state is {home_state} and their starting coordinates for travel are {home_state_coords}.')

The user's home state is NY and their starting coordinates for travel are (43.299428, -74.217933).


### 4.4. Greedy Nearest Neighbors Algorithm

In [128]:
# Set up GNN matrix
park_list = list(origins.keys())
park_indices = {name: idx for idx, name in enumerate(park_list)}
n = len(origins)
gnn_matrix = np.full((n, n), np.inf)  # fill with inf to start

# Populate matrix from flat `travel` array
for row in travel:
    origin, destination, dist_km, _ = row
    i = park_indices[origin]
    j = park_indices[destination]
    gnn_matrix[i][j] = dist_km

# Greedy Nearest Neighbor function
def greedy_nearest_neighbor(gnn_matrix, start=0):
    n = gnn_matrix.shape[0]
    visited = [False] * n
    route = [start] # 0 or state-specific coordinate
    visited[start] = True

    current = start
    for _ in range(n-1):
        # Find nearest unvisited park
        distances = gnn_matrix[current]
        nearest = None
        nearest_dist = float('inf')
        for i in range(n):
            if not visited[i] and distances[i] < nearest_dist:
                nearest = i
                nearest_dist = distances[i]

        route.append(nearest)
        visited[nearest] = True
        current = nearest

    return route

# Example usage:
# Assume travel matrix is your NxN numpy array

# Time the route calculation
start = time.time()

route = greedy_nearest_neighbor(gnn_matrix, start=0)

end = time.time()
elapsed = end - start

print(f'The algorithm generated route is {route} and took {elapsed:.0f} seconds to calculate.')

The algorithm generated route is [0, 8, 4, 1, 6, 3, 5, 7, 2] and took 0 seconds to calculate.


### 4.5. Visualization of Routes

In [30]:
def plot_route_on_map(route_indices, park_coords, park_names):
    """
    route_indices: list of indices in visiting order
    park_coords: list of (lat, lon) tuples, same order as park_names
    park_names: list of park names
    """
    
    # Start map centered on the United States
    start_lat, start_lon = park_coords[route_indices[0]]
    m = fl.Map(location=[39.8283, -98.5795], zoom_start=4)

    # Plot markers and lines
    route_points = []
    for idx in route_indices:
        lat, lon = park_coords[idx]
        fl.Marker(
            location=[lat, lon],
            popup=park_names[idx],
            tooltip=park_names[idx]
        ).add_to(m)
        route_points.append((lat, lon))

    # Draw lines connecting parks in order of algorithm route (blue)
    fl.PolyLine(route_points, color='blue', weight=3, opacity=0.5).add_to(m)
    
    return m

# Run function to generate map
park_coords = list(origins.values())  # your parks lat/lon in order
park_names = list(origins.keys()) # names of origin parks

m = plot_route_on_map(route, park_coords, park_names)
m.save("../routes/route_map.html")  # Save to HTML file
m

# Draw lines connecting parks in order of random route (red)
# Get park names from indices
random_route_park_names = [park_list[i] for i in random_route]
# Get coordinates from indices
random_route_coords = [park_coords[i] for i in random_route]
fl.PolyLine(random_route_coords, color='red', weight=3, opacity=0.5).add_to(m)

fig = fl.Figure(width=1200, height=800)
fig.add_child(m)
fig

### 4.7. Trip Information

In [None]:
# Creating a more logical display table of stops, travel distance and time per stop, and total travel distance and time
# This can be displayed in a window near the map on the dashboard

# Convert travel matrix into df
travel_df = pd.DataFrame(travel, columns=['Origin', 'Destination', 'Distance_km', 'Duration_hr'])

# Combine into a function
def build_route_table_with_totals(route, park_list, travel_df):
    route_info = []
    total_distance = 0
    total_duration = 0

    for i in range(len(route) - 1):
        origin = park_list[route[i]]
        destination = park_list[route[i + 1]]

        # Look up distance and duration
        row = travel_df[(travel_df['Origin'] == origin) & (travel_df['Destination'] == destination)]

        if not row.empty:
            distance = row['Distance_km'].values[0]
            duration = row['Duration_hr'].values[0]
            total_distance += distance
            total_duration += duration
        else:
            distance = None
            duration = None

        route_info.append({
            'From': origin,
            'To': destination,
            'Distance (km)': distance,
            'Duration (hr)': duration
        })

    # Append total row
    route_info.append({
        'From': 'Totals',
        'To': '',
        'Distance (km)': round(total_distance, 2),
        'Duration (hr)': round(total_duration, 2)
    })

    return pd.DataFrame(route_info)

# Display in an easy to read format
def display_route_table(table, title="Route Summary"):
    print(f"\n\033[1m{title}\033[0m")  # Bold title
    print(tabulate(
        table,
        headers="keys",
        tablefmt="fancy_grid",
        floatfmt=".2f"
    ))

# Build the route table using dummy route
route_table_random = build_route_table_with_totals(random_route, park_list, travel_df)

# Build the route table using algorithm route
route_table = build_route_table_with_totals(route, park_list, travel_df)

# Display tables
display_route_table(route_table_random)
display_route_table(route_table)


[1mRoute Summary[0m
╒════╤════════════════════════════════════════════════╤════════════════════════════════════════════════╤═════════════════╤═════════════════╕
│    │ From                                           │ To                                             │   Distance (km) │   Duration (hr) │
╞════╪════════════════════════════════════════════════╪════════════════════════════════════════════════╪═════════════════╪═════════════════╡
│  0 │ Keweenaw National Historical Park              │ African Burial Ground National Monument        │         1865.36 │           17.76 │
├────┼────────────────────────────────────────────────┼────────────────────────────────────────────────┼─────────────────┼─────────────────┤
│  1 │ African Burial Ground National Monument        │ Capitol Reef National Park                     │         3557.91 │           32.70 │
├────┼────────────────────────────────────────────────┼────────────────────────────────────────────────┼─────────────────┼─────────