# Temperature vs. National Park Visitors in the USA

#### This notebook consists of data gathering from the Dark Sky API, data exploration, data cleanup to compare the temperature of various national parks. 

### Prework: Gathering the visitor data for each Park. 

In [89]:
import matplotlib.pyplot as plt
import numpy as np 
import os 
import pandas as pd
import requests 
import json
import time 
import scipy.stats as st
from citipy import citipy
import datetime

output_data_file1 = "Output_Images/TemperaturevsVisitorCount.png"
output_data_file2 = "Output_Images/TerrainMapWeather.png"

#National Park Service API 

API_Key_Park = 'x4r30IhC7l6xEe3DCmQmuiefih8dcjFcPutVzRhh'
base_url = "https://api.nps.gov/api/v1/parks?stateCode="

In [4]:
URL = 'https://irmaservices.nps.gov/v2/rest/unit/?unitCodes={unitcode}&format=json'
response = requests.get(URL).json()

column_names = ['FullName','UnitCode','UnitName',
                'StateCode']

state_Codes = []

national_parks = []
for park in response:
    if(park['UnitDesignationName'] == 'National Park'):
        national_parks.append([park['FullName'], 
                          park['UnitCode'], 
                          park['UnitName'],
                          park['StateCodes']])
        state_Codes.append(park['StateCodes'])
    
national_parks
np_df = pd.DataFrame(national_parks, columns=column_names)

In [14]:
column_names = ['Park Code','Visitors','Park Name']
query_year = "2019"
park_visitor_data = []

for unitcode in np_df['UnitCode']:
    URL = f'https://irmaservices.nps.gov/v3/rest/stats/visitation?unitCodes={unitcode}&startMonth=01&startYear={query_year}&endMonth=12&endYear={query_year}&format=json'
    response = requests.get(URL).json()
    
    for month in range(0,len(response)):
        park_visitor_data.append([unitcode, 
                          response[month]['RecreationVisitors'],
                          response[month]['UnitName']])
                   
visitor_df = pd.DataFrame(park_visitor_data, columns=column_names)
visitor_df1 = visitor_df.groupby('Park Code')
visitor_df_group = visitor_df1.sum()
visitor_df_group1 = visitor_df_group.reset_index()

Unnamed: 0,Park Code,Visitors
0,ACAD,3437286
1,ARCH,1659702
2,BADL,970998
3,BIBE,463832
4,BISC,708522


In [52]:
visitor_df_group1['Park Name'] = visitor_df['Park Name'].unique()
visitor_df_group1.tail()

Unnamed: 0,Park Code,Visitors,Park Name
50,WHSA,608785,White Sands NP
51,WICA,615350,Wind Cave NP
52,YELL,4020288,Yellowstone NP
53,YOSE,4422861,Yosemite NP
54,ZION,4488268,Zion NP


### Prework: Gather the latitude and longitude data

In [18]:
key = 'x4r30IhC7l6xEe3DCmQmuiefih8dcjFcPutVzRhh'
code = []
for x in range(len(np_df['UnitCode'])):
    try: 
        url = f"https://developer.nps.gov/api/v1/parks?parkCode={np_df['UnitCode'][x]}&api_key={key}"
        data = requests.get(url).json()
        latlon = (data['data'][0]['latLong'])
        lat,lon = latlon.split(",")
        l, lat=lat.split(':')
        l, lon=lon.split(':')
        lat=float(lat)
        lon=float(lon)
        code.append([np_df['UnitCode'][x],
                      lat, 
                      lon])
    except: 
        print('')














In [40]:
newcolumns = ['Park Code','Latitude','Longitude']
latlong_df = pd.DataFrame(code,columns = newcolumns)
latlong_df.head()

Unnamed: 0,Park Code,Latitude,Longitude
0,ACAD,44.307775,-68.300633
1,ARCH,38.722618,-109.586367
2,BADL,43.685848,-102.482942
3,BIBE,29.298178,-103.22979
4,BISC,25.490587,-80.210239


## Dark Sky API 

### Gathering the Data 

Dark Sky is a source of data similar to open weather map that allows the extraction of historical weather data. However, they only allow 1000 calls per day for one account. For this reason, please signup for an account and insert your key in the variable below. The link to sign up is below. 

https://darksky.net/dev

Insert the start date and the end date of interest into the variables in the %M/%D/%Y format. The default start date is 01/01/2019 and the default end date is 12/31/2019. 

In [84]:
lats = latlong_df['Latitude']
lngs = latlong_df['Longitude']
try: 
    KEY = '074beff881b5dd000396a924c1e98612'
except: 
    print('Make sure you entered a correct Key from Dark Sky')

In [33]:
Start_Date = '01/01/2019'
End_Date = '12/31/2019'

Start_Date_Stamp = round(time.mktime(datetime.datetime.strptime(Start_Date, "%m/%d/%Y").timetuple()))
End_Date_Stamp = round(time.mktime(datetime.datetime.strptime(End_Date, "%m/%d/%Y").timetuple()))

time = np.arange(Start_Date_Stamp, End_Date_Stamp, 2670000).tolist()

In [34]:
temp = []
for i in range(len(lats)): 
        for t in time:
            url = f'https://api.darksky.net/forecast/{KEY}/{lats[i]},{lngs[i]},{t}'
            RESPONSE = requests.get(url).json()
            temp.append([RESPONSE['currently']['time'],latlong_df['State'][i],RESPONSE['currently']['temperature']])

#### Converting the UNIX timestamp to regular date 

In [36]:
from datetime import datetime
temp_df = pd.DataFrame(temp, columns = ['Unit Time','Park Code','Temperature'])
temp_df

for time in range(len(temp_df['Unit Time'])):
    temp_df['Unit Time'][time] = datetime.fromtimestamp(temp_df['Unit Time'][time]).strftime('%m-%Y')

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_with_indexer(indexer, value)


#### Finding the visitor number for each of the parks found in the DataSky dataset

In [53]:
Visitor=[]
ParkNames = []
for i in range(len(visitor_df)):
    for j in range(len(latlong_df)):
        if visitor_df['Park Code'][i] == latlong_df['Park Code'][j]: 
            Visitor.append(visitor_df['Visitors'][i])   
            ParkNames.append(visitor_df['Park Name'][i])

In [59]:
ParkNames = pd.DataFrame(ParkNames,columns = ['Park Name'])

temp_df['Visitor Count'] = Visitor
temp_df.head()

temp_df['Size_Temp'] = ""

for i in range(len(temp_df['Temperature'])): 
    if temp_df['Temperature'][i] <= 0: 
        temp_df['Size_Temp'][i] = .01 
    else: 
        temp_df['Size_Temp'][i] = temp_df['Temperature'][i]
        
temp_df['Size_Temp'] = temp_df['Size_Temp'].astype(float)



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



#### Averaging the temperature through the specified dates to find the average temperature for each park

In [60]:
avg_temp = temp_df.groupby('Park Code')
avg_temp_group = avg_temp.mean()
avg_temp_group = avg_temp_group.reset_index()
avg_temp_group['Lat'] =latlong_df['Latitude']
avg_temp_group['Lng'] =latlong_df['Longitude']
avg_temp_group['Park Name'] = ParkNames['Park Name'].unique()
avg_temp_group.tail()

Unnamed: 0,Park Code,Temperature,Visitor Count,Size_Temp,Lat,Lng,Park Name
48,WHSA,65.7625,50732.083333,65.7625,32.779079,-106.333346,White Sands NP
49,WICA,45.161667,51279.166667,45.161667,43.580124,-103.439471,Wind Cave NP
50,YELL,32.131667,335024.0,33.26,44.598244,-110.547169,Yellowstone NP
51,YOSE,41.29,368571.75,41.29,37.848833,-119.557187,Yosemite NP
52,ZION,51.0925,374022.333333,51.0925,37.298393,-113.026514,Zion NP


#### Plot 1: Temperature vs. Visitor Count for each national park throughout the month. 

In [90]:
import plotly.express as px
fig = px.scatter(temp_df, x="Temperature", y="Visitor Count", color="Park Code",size='Visitor Count', hover_data=["Unit Time","Park Code"] )
#fig.update_layout(title='Vistor Count vs. Number of National Parks For Every National Park')
fig.write_image(output_data_file1)
fig.show()

#### Plot 2: Terrain Map of the US outlining the location of the parks with a colorscale of a temperature and a size of a visitor count. 

In [91]:
import plotly.graph_objects as go
import plotly.express as px

# df['text'] = df['name'] + '<br>National Park visitors ' + (df['Percentage Visitors'].astype(str))+' %'
# df.head()


scale = 1000

fig = go.Figure()
   
fig = px.scatter_mapbox(avg_temp_group, lat="Lat", lon="Lng", hover_name="Park Name", 
                        hover_data=["Temperature", "Visitor Count"],
                        zoom=3, height=300, 
                        size = avg_temp_group['Visitor Count']/scale,            
                        color = 'Temperature', 
                        color_continuous_scale='bluered')
fig.update_layout(
    mapbox_style="white-bg",
    mapbox_layers=[
        {
            "below": 'traces',
            "sourcetype": "raster",
            "source": [
                "https://basemap.nationalmap.gov/arcgis/rest/services/USGSImageryOnly/MapServer/tile/{z}/{y}/{x}"
            ]
        }])

fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})

fig.update_layout(
        title_text = 'Terrain Map showing each National Park',
        geo = dict(
            scope = 'usa',
            landcolor = 'rgb(217, 217, 217)',
        ))

fig.write_image(output_data_file2)
fig.show()