#### Set up Dependencies and import any keys needed

In [48]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import json
import os
import random 

import gmaps 
import us
from census import Census

# Import API key
from api_keys import g_key
from api_keys import weather_api_key
from api_keys import gas_api_key
from api_keys import eia_key
from api_keys import census_api_key
from api_keys import nps_key
c = Census(census_api_key)


##### set up West States 

In [3]:

West = ['Colorado', 'Wyoming', 'Montana', 'Idaho', 'Washington', 'Oregon', 'Utah', 'Nevada', 'California', 'Alaska', 'Hawaii']



##### Make a call to census data to pull info for all states, put into df, rename columns
##### May not need but put it in a csv just in case

In [5]:

census_data = c.acs5.get(("NAME", "B19013_001E", "B01003_001E", "B01002_001E",
                          "B19301_001E",
                          "B17001_002E",
                          "B23025_005E"), {'for': 'state:*'})



#Convert to DataFrame
census_df = pd.DataFrame(census_data)

# Column Reordering
census_df = census_pd.rename(columns={"B01003_001E": "Population",
                                      "B01002_001E": "Median Age",
                                      "B19013_001E": "Household Income",
                                      "B19301_001E": "Per Capita Income",
                                      "B17001_002E": "Poverty Count",
                                      "B23025_005E": "Unemployment Count",
                                      "NAME": "Name", "state": "State"})


#census_pd.head()
census_df.to_csv("./output_data/census_data_allstates.csv", encoding="utf-8", index=False)
census_df.head()

Unnamed: 0,Name,Household Income,Population,Median Age,Per Capita Income,Poverty Count,Unemployment Count,State
0,Puerto Rico,19775.0,3468963.0,40.1,12081.0,1543220.0,220597.0,72
1,Alabama,46472.0,4850771.0,38.7,25746.0,849699.0,165391.0,1
2,Alaska,76114.0,738565.0,33.9,35065.0,73380.0,29548.0,2
3,Arizona,53510.0,6809946.0,37.2,27964.0,1128046.0,225911.0,4
4,Arkansas,43813.0,2977944.0,37.9,24426.0,524211.0,85128.0,5


##### This file was provided to us in a separate activity
##### Gives centroids of each state

In [11]:
# Read in the csv containing state centroid coordinates
centroids = pd.read_csv("./Resources/state_centroids.csv")
centroids.head()

Unnamed: 0,State,Latitude,Longitude
0,Alabama,32.7794,-86.8287
1,Alaska,64.0685,-152.2782
2,Arizona,34.2744,-111.6602
3,Arkansas,34.8938,-92.4426
4,California,37.1841,-119.4696


In [12]:
# create a dictionary of states and abbreviations
us_state_abbrev = {
'Alabama': 'AL', 'Alaska': 'AK', 'Arizona': 'AZ', 'Arkansas': 'AR', 'California': 'CA', 'Colorado': 'CO',
'Connecticut': 'CT', 'Delaware': 'DE', 'Florida': 'FL', 'Georgia': 'GA', 'Hawaii': 'HI', 'Idaho': 'ID',
'Illinois': 'IL', 'Indiana': 'IN', 'Iowa': 'IA', 'Kansas': 'KS', 'Kentucky': 'KY', 'Louisiana': 'LA',
'Maine': 'ME', 'Maryland': 'MD', 'Massachusetts': 'MA', 'Michigan': 'MI', 'Minnesota': 'MN', 'Mississippi': 'MS',
'Missouri': 'MO', 'Montana': 'MT', 'Nebraska': 'NE', 'Nevada': 'NV', 'New Hampshire': 'NH', 'New Jersey': 'NJ',
'New Mexico': 'NM', 'New York': 'NY', 'North Carolina': 'NC', 'North Dakota': 'ND', 'Ohio': 'OH', 'Oklahoma': 'OK',
'Oregon': 'OR', 'Pennsylvania': 'PA', 'Rhode Island': 'RI', 'South Carolina': 'SC', 'South Dakota': 'SD',
'Tennessee': 'TN', 'Texas': 'TX', 'Utah': 'UT', 'Vermont': 'VT', 'Virginia': 'VA', 'Washington': 'WA',
'West Virginia': 'WV', 'Wisconsin': 'WI', 'Wyoming': 'WY'}



##### Merge the centroids and census data to get a lat/lon for each state

In [13]:
# Merge the datasets using the sate columns
census_data = pd.merge(census_pd, centroids, how="left", left_on="Name", right_on="State")

# Save the updated dataframe as a csv
census_data.to_csv("./Resources/state_census_data.csv", encoding="utf-8", index=False)

#df['State'] = df['State'].map(us_state_abbrev).fillna(df['State'])

census_data['State_y'] = census_data['State_y'].map(us_state_abbrev).fillna(census_data['State_y'])
census_data.rename(columns={'State_y':'State Code'})
census_data.head()

Unnamed: 0,Name,Household Income,Population,Median Age,Per Capita Income,Poverty Count,Unemployment Count,State_x,State_y,Latitude,Longitude
0,Puerto Rico,19775.0,3468963.0,40.1,12081.0,1543220.0,220597.0,72,Puerto Rico,18.2001,-66.6645
1,Alabama,46472.0,4850771.0,38.7,25746.0,849699.0,165391.0,1,AL,32.7794,-86.8287
2,Alaska,76114.0,738565.0,33.9,35065.0,73380.0,29548.0,2,AK,64.0685,-152.2782
3,Arizona,53510.0,6809946.0,37.2,27964.0,1128046.0,225911.0,4,AZ,34.2744,-111.6602
4,Arkansas,43813.0,2977944.0,37.9,24426.0,524211.0,85128.0,5,AR,34.8938,-92.4426


In [19]:
df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/2014_us_cities.csv')

df.head()

Unnamed: 0,name,pop,lat,lon
0,New York,8287238,40.730599,-73.986581
1,Los Angeles,3826423,34.053717,-118.242727
2,Chicago,2705627,41.875555,-87.624421
3,Houston,2129784,29.758938,-95.367697
4,Philadelphia,1539313,39.952335,-75.163789


In [21]:
df = census_data[['Name', 'Population', 'Latitude', 'Longitude']]
df = df.rename(columns={'Name':"name", 'Population':'pop', 'Latitude':'lat', 'Longitude':'lon'})
df.head()


Unnamed: 0,name,pop,lat,lon
0,Puerto Rico,3468963.0,18.2001,-66.6645
1,Alabama,4850771.0,32.7794,-86.8287
2,Alaska,738565.0,64.0685,-152.2782
3,Arizona,6809946.0,34.2744,-111.6602
4,Arkansas,2977944.0,34.8938,-92.4426


In [67]:
import plotly.graph_objects as go
import pandas as pd

df = census_data[['Name', 'Population', 'Latitude', 'Longitude']]
df = df.rename(columns={'Name':"name", 'Population':'pop', 'Latitude':'lat', 'Longitude':'lon'})
df.head()

df['text'] = df['name'] + '<br>Population ' + (df['pop']/1000000).astype(str)+' million'

colors = ["royalblue","crimson","lightseagreen","orange","lightgrey"]
cities = []
scale = 10000

fig = go.Figure()
   
fig = go.Figure(data=go.Scattergeo(
        lon = df['lon'],
        lat = df['lat'],
        text = df['text'],
        mode = 'markers',
        marker_color = df['pop'],
        marker = dict(
            size = df['pop']/scale,
            #color = df['pop'],
            color = colors[3],
            line_color='rgb(40,40,40)',
            line_width=0.5,
            sizemode = 'area'),
        name = '{}'.format(df['name'])))
  
    
fig.update_layout(
        title_text = 'US state populations<br>(Census Data)',
        #showlegend = True,
        geo = dict(
            scope = 'usa',
            landcolor = 'rgb(217, 217, 217)',
        )
    )

fig.show()

##### Pull out State Population Lat Lon

In [81]:
# Create a datframe with just the data we  want for the population quesion
pop_df = census_data[['Name', 'Population', 'Latitude', 'Longitude']].copy()

pop_df['StateCode'] = pop_df['Name'].map(us_state_abbrev).fillna(pop_df['Name'])
pop_df

Unnamed: 0,Name,Population,Latitude,Longitude,StateCode
0,Puerto Rico,3468963.0,18.2001,-66.6645,Puerto Rico
1,Alabama,4850771.0,32.7794,-86.8287,AL
2,Alaska,738565.0,64.0685,-152.2782,AK
3,Arizona,6809946.0,34.2744,-111.6602,AZ
4,Arkansas,2977944.0,34.8938,-92.4426,AR
5,California,38982847.0,37.1841,-119.4696,CA
6,Colorado,5436519.0,38.9972,-105.5478,CO
7,Connecticut,3594478.0,41.6219,-72.7273,CT
8,District of Columbia,672391.0,38.9101,-77.0147,District of Columbia
9,Delaware,943732.0,38.9896,-75.505,DE


### Parks Data

In [56]:
URL = 'https://irmaservices.nps.gov/v2/rest/unit/?unitCodes={*}&format=json'
response = requests.get(URL).json()

#print(response)

len(response)
# months = ['January', 'February', 'March', 'April', 'May', 'June', 'July' 'August', 'September', 'October', 'November', 'December'] 
# for month in range(0,len(response)-1):
#     print(f"{unitcode} National Park had {response[month]['RecreationVisitors']} visitors" +
#           f" in the month of {months[month]} in 2019")


# set up dataframe column names
column_names = ['FullName',
                'UnitCode',
                'UnitName',
                'StateCode']
    
#print(response)
national_parks = []
for park in response:
    if(park['UnitDesignationName'] == 'National Park'):
#         print(f" {park['UnitDesignationName']}:{park['UnitCode']}: {park['FullName']} -" +
#               f" region {park['Region']},{park['StateCodes']} ")
        national_parks.append([park['FullName'], 
                          park['UnitCode'], 
                          park['UnitName'],
                          park['StateCodes']])
#park.keys()
#dict_keys(['UnitCode', 'UnitName', 'FullName', 
# 'UnitDesignationCode', 'UnitDesignationName', 'UnitSubTypeCode', 
#'UnitSubTypeName', 'UnitLifecycle', 'Network', 'NetworkName', 'Region', 'RegionName', 'StateCodes'])
national_parks
np_df = pd.DataFrame(national_parks, columns=column_names)
np_df

Unnamed: 0,FullName,UnitCode,UnitName,StateCode
0,Acadia National Park,ACAD,Acadia,[ME]
1,Arches National Park,ARCH,Arches,[UT]
2,Badlands National Park,BADL,Badlands,[SD]
3,Big Bend National Park,BIBE,Big Bend,[TX]
4,Biscayne National Park,BISC,Biscayne,[FL]
...,...,...,...,...
58,Wind Cave National Park,WICA,Wind Cave,[SD]
59,Wrangell-St. Elias National Park,WRSG,Wrangell-St. Elias,[AK]
60,Yellowstone National Park,YELL,Yellowstone,"[ID, MT, WY]"
61,Yosemite National Park,YOSE,Yosemite,[CA]


### Make a row for eash state in the list 
* census_data = pd.merge(census_pd, centroids, how="left", left_on="Name", right_on="State")


In [57]:
# Make a ROW for each state that a park straddles 

lst_col = 'StateCode'

r = pd.DataFrame({
      col:np.repeat(np_df[col].values, np_df[lst_col].str.len())
      for col in np_df.columns.drop(lst_col)}
    ).assign(**{lst_col:np.concatenate(np_df[lst_col].values)})[np_df.columns]
r

Unnamed: 0,FullName,UnitCode,UnitName,StateCode
0,Acadia National Park,ACAD,Acadia,ME
1,Arches National Park,ARCH,Arches,UT
2,Badlands National Park,BADL,Badlands,SD
3,Big Bend National Park,BIBE,Big Bend,TX
4,Biscayne National Park,BISC,Biscayne,FL
...,...,...,...,...
63,Yellowstone National Park,YELL,Yellowstone,ID
64,Yellowstone National Park,YELL,Yellowstone,MT
65,Yellowstone National Park,YELL,Yellowstone,WY
66,Yosemite National Park,YOSE,Yosemite,CA


##### Map state abbreviation state name and merge - still figuring this one out

In [58]:
r.head(3)

Unnamed: 0,FullName,UnitCode,UnitName,StateCode
0,Acadia National Park,ACAD,Acadia,ME
1,Arches National Park,ARCH,Arches,UT
2,Badlands National Park,BADL,Badlands,SD


In [60]:
# Save the updated dataframe as a csv
np_df.to_csv("./Resources/state_park_data.csv", encoding="utf-8", index=False)
#np_df.head()

In [61]:
#notes some are empty
unitcode = "COSW" #Rocky Mountain National Park (unitcode)
URL = f'https://irmaservices.nps.gov/v3/rest/stats/visitation?unitCodes={unitcode}&startMonth=01&startYear=2019&endMonth=12&endYear=2019&format=json'
print(URL)

https://irmaservices.nps.gov/v3/rest/stats/visitation?unitCodes=COSW&startMonth=01&startYear=2019&endMonth=12&endYear=2019&format=json


#### PRINT the LAT/LON of each park

In [62]:
def convert_lat_lon(latlon):
    latlon = (data["data"][0]["latLong"])

    lat,lon = latlon.split(", ")

    l, lat=lat.split(':')
    l, lon=lon.split(':')

    lat = float(lat)
    lon = float(lon)
    return([lat, lon])
 

In [63]:
### Testing convert_lat_lon function
# ll = convert_lat_lon('lat:37.29839254, long:-113.0265138')
# print(ll[0])
# print(ll[1])

In [64]:
from api_keys import nps_key
parks_key = nps_key #NOTE NEED A KEY
park = 'ACAD'
url = f"https://developer.nps.gov/api/v1/parks?parkCode={park}&api_key={nps_key}"
data = requests.get(url).json()
print(data["data"][0]['states'])

ME


In [65]:
from api_keys import nps_key
parks_key = nps_key #NOTE NEED A KEY
latlon=[]
for park in np_df['UnitCode']:
    url = f"https://developer.nps.gov/api/v1/parks?parkCode={park}&api_key={nps_key}"
    try:
        data = requests.get(url).json()
        # Prepare and execute output
        # print(data["data"][0]["fullName"] + " can be found at " + data["data"][0]["latLong"] + ".")
        ll = convert_lat_lon(data["data"][0]["latLong"])
        state = data["data"][0]['states']
        print(state)
        latlon.append([state, park, data["data"][0]["fullName"], ll[0], ll[1]])
    except:
        print(f"Empty Cell...Skipping {park}")

park_lat_lon_df = pd.DataFrame(latlon, columns = ['State','ParkCode','Park', 'Lat', 'Lon'])
#park_lat_lon_df

ME
UT
SD
TX
FL
CO
UT
UT
UT
NM
CA
SC
Empty Cell...Skipping COSW
OR
OH
Empty Cell...Skipping DENG
CA,NV
FL
FL
Empty Cell...Skipping GAAG
MT
Empty Cell...Skipping GLBG
NV
AZ
Empty Cell...Skipping GRDG
NC,TN
WY
TX
HI
HI
AR
IN
MI
MO
CA
Empty Cell...Skipping KATG
AK
Empty Cell...Skipping KICA
AK
Empty Cell...Skipping LACG
CA
KY
CO
WA
WA
AS
WA
AZ
CA
CA
CO
AZ
Empty Cell...Skipping SEQU
VA
ND
VI
MN
NM
SD
Empty Cell...Skipping WRSG
ID,MT,WY
CA
UT


In [66]:
park_lat_lon_df.head(3)

Unnamed: 0,State,ParkCode,Park,Lat,Lon
0,ME,ACAD,Acadia National Park,44.307775,-68.300633
1,UT,ARCH,Arches National Park,38.722618,-109.586367
2,SD,BADL,Badlands National Park,43.685848,-102.482942


### MAP the PARKs

In [None]:
# Create a map using state centroid coordinates to set markers
marker_locations = park_lat_lon_df[['Lat', 'Lon']]
# Create a marker_layer using the park list to fill the info box
fig = gmaps.figure()
markers = gmaps.marker_layer(marker_locations,
    info_box_content=[f"Park: {park}" for park in park_lat_lon_df['Park']])
fig.add_layer(markers)
fig

##### print a list of unit codes - for debugging

In [None]:
for unitcode in np_df['UnitCode']:
    print(f"Unit Code = {unitcode}")

### VISITOR DATA 

In [70]:
column_names = ['UnitCode','Month', 'Visitors']
query_year = "2019"
park_visitor_data = []

months = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December'] 
# for month in range(0, 12):
#         print(f"{months[month]}")
len(months)
for unitcode in np_df['UnitCode']:
    #print(f"Processing Unit Code = {unitcode} for {query_year}...")   
    # construct the query
    URL = f'https://irmaservices.nps.gov/v3/rest/stats/visitation?unitCodes={unitcode}&startMonth=01&startYear={query_year}&endMonth=12&endYear={query_year}&format=json'

    response = requests.get(URL).json()

    for month in range(0,len(response)):
     #     print(f"{month}")
    #     print(f"{unitcode} National Park had {response[month]['RecreationVisitors']} visitors" +
    #           f" in the month of {months[month]} in 2019")
          park_visitor_data.append([unitcode, 
                          month,
                          response[month]['RecreationVisitors']])
          
# # print("Processing complete")    
visitor_df = pd.DataFrame(park_visitor_data, columns=column_names)


In [78]:
visitor_df.head(3)

Unnamed: 0,UnitCode,Month,Visitors
0,ACAD,0,8532
1,ACAD,1,9154
2,ACAD,2,20161


In [79]:
#park_lat_lon_df = pd.DataFrame(latlon, columns = ['State','ParkCode','Park', 'Lat', 'Lon'])
park_lat_lon_df = park_lat_lon_df.rename(columns={'ParkCode':'UnitCode'})
park_lat_lon_df.head(3)


Unnamed: 0,State,UnitCode,Park,Lat,Lon
0,ME,ACAD,Acadia National Park,44.307775,-68.300633
1,UT,ARCH,Arches National Park,38.722618,-109.586367
2,SD,BADL,Badlands National Park,43.685848,-102.482942


In [80]:
# MERGE ATTEMPT 
visitor_by_month_df = visitor_df.merge(park_lat_lon_df, on='UnitCode', how='left')
#df1.merge(df2,how='left', left_on='Column1', right_on='ColumnA')
visitor_by_month_df = visitor_by_month_df.rename(columns={'State':'StateCode'})
visitor_by_month_df.head(2)

Unnamed: 0,UnitCode,Month,Visitors,StateCode,Park,Lat,Lon
0,ACAD,0,8532,ME,Acadia National Park,44.307775,-68.300633
1,ACAD,1,9154,ME,Acadia National Park,44.307775,-68.300633


In [82]:
pop_df.head(3)

Unnamed: 0,Name,Population,Latitude,Longitude,StateCode
0,Puerto Rico,3468963.0,18.2001,-66.6645,Puerto Rico
1,Alabama,4850771.0,32.7794,-86.8287,AL
2,Alaska,738565.0,64.0685,-152.2782,AK


In [83]:
#### ANOTHER MERGE ATTEMPT
visitor_pop_df = visitor_by_month_df.merge(pop_df, on='StateCode', how='left')
visitor_pop_df.head(5)

Unnamed: 0,UnitCode,Month,Visitors,StateCode,Park,Lat,Lon,Name,Population,Latitude,Longitude
0,ACAD,0,8532,ME,Acadia National Park,44.307775,-68.300633,Maine,1330158.0,45.3695,-69.2428
1,ACAD,1,9154,ME,Acadia National Park,44.307775,-68.300633,Maine,1330158.0,45.3695,-69.2428
2,ACAD,2,20161,ME,Acadia National Park,44.307775,-68.300633,Maine,1330158.0,45.3695,-69.2428
3,ACAD,3,68046,ME,Acadia National Park,44.307775,-68.300633,Maine,1330158.0,45.3695,-69.2428
4,ACAD,4,226156,ME,Acadia National Park,44.307775,-68.300633,Maine,1330158.0,45.3695,-69.2428


In [88]:
by_year = visitor_pop_df.groupby('UnitCode').sum()
by_year = visitor_pop_df.groupby(['UnitCode', 'Park', 'Lat', 'Lon', 'Population'], as_index=False)['Visitors'].sum()
by_year

Unnamed: 0,UnitCode,Park,Lat,Lon,Population,Visitors
0,ACAD,Acadia National Park,44.307775,-68.300633,1330158.0,3437286
1,ARCH,Arches National Park,38.722618,-109.586367,2993941.0,1659702
2,BADL,Badlands National Park,43.685848,-102.482942,855444.0,970998
3,BIBE,Big Bend National Park,29.298178,-103.22979,27419612.0,463832
4,BISC,Biscayne National Park,25.490587,-80.210239,20278447.0,708522
5,BLCA,Black Canyon Of The Gunnison National Park,38.577799,-107.724276,5436519.0,432818
6,BRCA,Bryce Canyon National Park,37.583991,-112.182669,2993941.0,2594904
7,CANY,Canyonlands National Park,38.245558,-109.880162,2993941.0,733996
8,CARE,Capitol Reef National Park,38.282165,-111.247048,2993941.0,1226519
9,CAVE,Carlsbad Caverns National Park,32.140895,-104.552969,2084828.0,440691


In [None]:
# # Save the updated dataframe as a csv
# visitor_df.to_csv("./Resources/state_park_visitor_data.csv", encoding="utf-8", index=False)
# visitor_df.head()  #VALIDATED AGAINST PARK WEBSITE DATA

In [98]:
# PLOT VISITOR DATA
df = by_year[['Park', 'Visitors', 'Lat', 'Lon', 'Population']]
df = df.rename(columns={'Park':"name", 'Population':'pop', 'Lat':'lat', 'Lon':'lon'})
df['Percentage Visitors'] = (df['Visitors']/df['pop'])*100

df['text'] = df['name'] + '<br>National Park visitors ' + (df['Percentage Visitors'].astype(str))+' %'
df.head()

colors = ["royalblue","crimson","lightseagreen","orange","lightgrey"]
cities = []
scale = 1

fig = go.Figure()
   
fig = go.Figure(data=go.Scattergeo(
        lon = df['lon'],
        lat = df['lat'],
        text = df['text'],
        mode = 'markers',
        marker_color = df['pop'],
        marker = dict(
            size = df['Percentage Visitors']/scale,
            #color = df['pop'],
            color = colors[3],
            line_color='rgb(40,40,40)',
            line_width=0.5,
            sizemode = 'area'),
           name = '{}'.format(df['name'])))
  
    
fig.update_layout(
        title_text = 'Percentage of State Population that are National Park Visitors ',
        #showlegend = True,
        geo = dict(
            scope = 'usa',
            landcolor = 'rgb(217, 217, 217)',
        )
    )

fig.show()


### PLOTLY


In [None]:
census_pd.head()

In [None]:
by_year.head()

In [51]:
visitor_pop_df.head()

Unnamed: 0,UnitCode,Month,Visitors,StateCode,Park,Lat,Lon,Name,Population,Latitude,Longitude
0,ACAD,0,8532,ME,Acadia National Park,44.307775,-68.300633,Maine,1330158.0,45.3695,-69.2428
1,ACAD,1,9154,ME,Acadia National Park,44.307775,-68.300633,Maine,1330158.0,45.3695,-69.2428
2,ACAD,2,20161,ME,Acadia National Park,44.307775,-68.300633,Maine,1330158.0,45.3695,-69.2428
3,ACAD,3,68046,ME,Acadia National Park,44.307775,-68.300633,Maine,1330158.0,45.3695,-69.2428
4,ACAD,4,226156,ME,Acadia National Park,44.307775,-68.300633,Maine,1330158.0,45.3695,-69.2428


In [63]:
import plotly.express as px

df = visitor_pop_df
fig = px.scatter(df, x="Month", y="Visitors",color="UnitCode", hover_name="Park", size="Visitors")
fig.show()