# National Park ETL
----



In [1]:
import pandas as pd
import numpy as np
import os
from api_keys import np_api_key
import requests
import json
import pymongo
import calendar
from pprint import pprint


In [2]:
# File to Load (temporary => to concat "Unit Code")
visits_2019 = "Data/2019_Visits.csv"

# Read csv
visits_2019_df= pd.read_csv(visits_2019)
visits_2019_df["Unit Code"] = visits_2019_df["Unit Code"].str.lower()
visits_2019_df

Unnamed: 0,Park,Unit Code,Park Type,Region,State,Year,Recreation Visits
0,Abraham Lincoln Birthplace NHP,abli,National Historical Park,Southeast,KY,2019,238226
1,Acadia NP,acad,National Park,Northeast,ME,2019,3437286
2,Adams NHP,adam,National Historical Park,Northeast,MA,2019,187400
3,African Burial Ground NM,afbg,National Monument,Northeast,NY,2019,47427
4,Agate Fossil Beds NM,agfo,National Monument,Midwest,NE,2019,16657
...,...,...,...,...,...,...,...
374,Wupatki NM,wupa,National Monument,Intermountain,AZ,2019,187059
375,Yellowstone NP,yell,National Park,Intermountain,WY,2019,4020288
376,Yosemite NP,yose,National Park,Pacific West,CA,2019,4422861
377,Yukon-Charley Rivers NPRES,yuch,National Preserve,Alaska,AK,2019,1114


In [3]:
# File to Load "Last Decade Data"
annual_file = "Data/AnnualVisits.csv"

# Read csv
annual_visits_df= pd.read_csv(annual_file)
annual_visits_df

Unnamed: 0,Park Name,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,Average
0,Abraham Lincoln Birthplace NHP,177122.0,163568.0,169515.0,163928.0,241264.0,148605.0,252495.0,269580,260964,238226,208527
1,Acadia NP,2504208.0,2374645.0,2431052.0,2254922.0,2563129.0,2811184.0,3303393.0,3509271,3537575,3437286,2872667
2,Adams NHP,73339.0,219975.0,336031.0,285501.0,178468.0,183632.0,199301.0,255562,121007,187400,204022
3,African Burial Ground NM,117113.0,108585.0,77624.0,93234.0,54861.0,56348.0,46526.0,43182,45035,47427,68994
4,Agate Fossil Beds NM,12509.0,11617.0,11824.0,10504.0,12440.0,13264.0,15555.0,32038,16238,16657,15265
...,...,...,...,...,...,...,...,...,...,...,...,...
374,Wupatki NM,221083.0,216165.0,201365.0,190878.0,195962.0,212828.0,223172.0,236454,205122,187059,209009
375,Yellowstone NP,3640185.0,3394326.0,3447729.0,3188030.0,3513484.0,4097710.0,4257177.0,4116524,4115000,4020288,3779045
376,Yosemite NP,3901408.0,3951393.0,3853404.0,3691191.0,3882642.0,4150217.0,5028868.0,4336890,4009436,4422861,4122831
377,Yukon-Charley Rivers NPRES,6211.0,1718.0,1393.0,3914.0,2329.0,1133.0,1146.0,952,1272,1114,2118


In [4]:
# checking null values
annual_visits_df.isnull().sum()

Park Name     0
2010         16
2011         12
2012         12
2013          9
2014          9
2015          7
2016          3
2017          0
2018          0
2019          0
Average       0
dtype: int64

In [5]:
# replacing null values with nan
clean_annual_visits_df = annual_visits_df.replace(r'^\s*$', np.nan, regex=True)

In [6]:
# filling nan with o
clean_annual_visits_df.fillna(0, inplace=True)

In [7]:
# checking again null values
clean_annual_visits_df.isnull().sum()

Park Name    0
2010         0
2011         0
2012         0
2013         0
2014         0
2015         0
2016         0
2017         0
2018         0
2019         0
Average      0
dtype: int64

In [8]:
# adding Unit Code to annual_vists_df
annual_visits = pd.concat([clean_annual_visits_df, visits_2019_df], axis=1)
annual_visits

Unnamed: 0,Park Name,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,Average,Park,Unit Code,Park Type,Region,State,Year,Recreation Visits
0,Abraham Lincoln Birthplace NHP,177122.0,163568.0,169515.0,163928.0,241264.0,148605.0,252495.0,269580,260964,238226,208527,Abraham Lincoln Birthplace NHP,abli,National Historical Park,Southeast,KY,2019,238226
1,Acadia NP,2504208.0,2374645.0,2431052.0,2254922.0,2563129.0,2811184.0,3303393.0,3509271,3537575,3437286,2872667,Acadia NP,acad,National Park,Northeast,ME,2019,3437286
2,Adams NHP,73339.0,219975.0,336031.0,285501.0,178468.0,183632.0,199301.0,255562,121007,187400,204022,Adams NHP,adam,National Historical Park,Northeast,MA,2019,187400
3,African Burial Ground NM,117113.0,108585.0,77624.0,93234.0,54861.0,56348.0,46526.0,43182,45035,47427,68994,African Burial Ground NM,afbg,National Monument,Northeast,NY,2019,47427
4,Agate Fossil Beds NM,12509.0,11617.0,11824.0,10504.0,12440.0,13264.0,15555.0,32038,16238,16657,15265,Agate Fossil Beds NM,agfo,National Monument,Midwest,NE,2019,16657
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
374,Wupatki NM,221083.0,216165.0,201365.0,190878.0,195962.0,212828.0,223172.0,236454,205122,187059,209009,Wupatki NM,wupa,National Monument,Intermountain,AZ,2019,187059
375,Yellowstone NP,3640185.0,3394326.0,3447729.0,3188030.0,3513484.0,4097710.0,4257177.0,4116524,4115000,4020288,3779045,Yellowstone NP,yell,National Park,Intermountain,WY,2019,4020288
376,Yosemite NP,3901408.0,3951393.0,3853404.0,3691191.0,3882642.0,4150217.0,5028868.0,4336890,4009436,4422861,4122831,Yosemite NP,yose,National Park,Pacific West,CA,2019,4422861
377,Yukon-Charley Rivers NPRES,6211.0,1718.0,1393.0,3914.0,2329.0,1133.0,1146.0,952,1272,1114,2118,Yukon-Charley Rivers NPRES,yuch,National Preserve,Alaska,AK,2019,1114


In [9]:
# leave some columns
visits_last_decade = annual_visits[["Park Name","Unit Code","Region","State","Park Type","2010","2011","2012","2013","2014","2015","2016","2017","2018","2019","Average"]]
visits_last_decade


Unnamed: 0,Park Name,Unit Code,Region,State,Park Type,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,Average
0,Abraham Lincoln Birthplace NHP,abli,Southeast,KY,National Historical Park,177122.0,163568.0,169515.0,163928.0,241264.0,148605.0,252495.0,269580,260964,238226,208527
1,Acadia NP,acad,Northeast,ME,National Park,2504208.0,2374645.0,2431052.0,2254922.0,2563129.0,2811184.0,3303393.0,3509271,3537575,3437286,2872667
2,Adams NHP,adam,Northeast,MA,National Historical Park,73339.0,219975.0,336031.0,285501.0,178468.0,183632.0,199301.0,255562,121007,187400,204022
3,African Burial Ground NM,afbg,Northeast,NY,National Monument,117113.0,108585.0,77624.0,93234.0,54861.0,56348.0,46526.0,43182,45035,47427,68994
4,Agate Fossil Beds NM,agfo,Midwest,NE,National Monument,12509.0,11617.0,11824.0,10504.0,12440.0,13264.0,15555.0,32038,16238,16657,15265
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
374,Wupatki NM,wupa,Intermountain,AZ,National Monument,221083.0,216165.0,201365.0,190878.0,195962.0,212828.0,223172.0,236454,205122,187059,209009
375,Yellowstone NP,yell,Intermountain,WY,National Park,3640185.0,3394326.0,3447729.0,3188030.0,3513484.0,4097710.0,4257177.0,4116524,4115000,4020288,3779045
376,Yosemite NP,yose,Pacific West,CA,National Park,3901408.0,3951393.0,3853404.0,3691191.0,3882642.0,4150217.0,5028868.0,4336890,4009436,4422861,4122831
377,Yukon-Charley Rivers NPRES,yuch,Alaska,AK,National Preserve,6211.0,1718.0,1393.0,3914.0,2329.0,1133.0,1146.0,952,1272,1114,2118


In [10]:
#rename columns
renamed_visits_last_decade = visits_last_decade.rename(columns = {"Park Name":"park_name", "Unit Code":"park_code", "Average":"average"})
renamed_visits_last_decade

Unnamed: 0,park_name,park_code,Region,State,Park Type,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,average
0,Abraham Lincoln Birthplace NHP,abli,Southeast,KY,National Historical Park,177122.0,163568.0,169515.0,163928.0,241264.0,148605.0,252495.0,269580,260964,238226,208527
1,Acadia NP,acad,Northeast,ME,National Park,2504208.0,2374645.0,2431052.0,2254922.0,2563129.0,2811184.0,3303393.0,3509271,3537575,3437286,2872667
2,Adams NHP,adam,Northeast,MA,National Historical Park,73339.0,219975.0,336031.0,285501.0,178468.0,183632.0,199301.0,255562,121007,187400,204022
3,African Burial Ground NM,afbg,Northeast,NY,National Monument,117113.0,108585.0,77624.0,93234.0,54861.0,56348.0,46526.0,43182,45035,47427,68994
4,Agate Fossil Beds NM,agfo,Midwest,NE,National Monument,12509.0,11617.0,11824.0,10504.0,12440.0,13264.0,15555.0,32038,16238,16657,15265
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
374,Wupatki NM,wupa,Intermountain,AZ,National Monument,221083.0,216165.0,201365.0,190878.0,195962.0,212828.0,223172.0,236454,205122,187059,209009
375,Yellowstone NP,yell,Intermountain,WY,National Park,3640185.0,3394326.0,3447729.0,3188030.0,3513484.0,4097710.0,4257177.0,4116524,4115000,4020288,3779045
376,Yosemite NP,yose,Pacific West,CA,National Park,3901408.0,3951393.0,3853404.0,3691191.0,3882642.0,4150217.0,5028868.0,4336890,4009436,4422861,4122831
377,Yukon-Charley Rivers NPRES,yuch,Alaska,AK,National Preserve,6211.0,1718.0,1393.0,3914.0,2329.0,1133.0,1146.0,952,1272,1114,2118


In [11]:
# Generating a list of Unit Code, to then match with the original park names of the API
park_code_list = renamed_visits_last_decade["park_code"].to_list()
park_code_list

['abli',
 'acad',
 'adam',
 'afbg',
 'agfo',
 'alfl',
 'alpo',
 'amis',
 'ande',
 'anjo',
 'ania',
 'anti',
 'apis',
 'apco',
 'arch',
 'arpo',
 'arho',
 'asis',
 'azru',
 'badl',
 'band',
 'bepa',
 'beol',
 'bela',
 'bibe',
 'bicy',
 'biho',
 'biso',
 'bith',
 'bica',
 'bisc',
 'blca',
 'blri',
 'blue',
 'bowa',
 'boaf',
 'bost',
 'brvb',
 'brca',
 'buis',
 'buff',
 'cabr',
 'cana',
 'cari',
 'cach',
 'cany',
 'caco',
 'caha',
 'cakr',
 'calo',
 'care',
 'cavo',
 'carl',
 'cave',
 'cawo',
 'cagr',
 'casa',
 'cacl',
 'cato',
 'cebr',
 'cech',
 'chcu',
 'cham',
 'chis',
 'chpi',
 'chyo',
 'chat',
 'choh',
 'chch',
 'chic',
 'chir',
 'chri',
 'ciro',
 'clba',
 'colo',
 'colm',
 'cong',
 'coro',
 'cowp',
 'crla',
 'crmo',
 'cuga',
 'cuis',
 'cure',
 'cuva',
 'daav',
 'deso',
 'deva',
 'dewa',
 'dena',
 'depo',
 'deto',
 'dino',
 'drto',
 'edal',
 'efmo',
 'eise',
 'elma',
 'elmo',
 'elro',
 'euon',
 'ever',
 'feha',
 'fiis',
 'fila',
 'flni',
 'flfo',
 'foth',
 'fobo',
 'foca',
 'foda',
 

In [12]:
# API Call to retrieve all available parks
park_base_url = "https://developer.nps.gov/api/v1/parks?"

parks=[]

counter = 0
while (counter <= 450):
    
    park_url= f"{park_base_url}&api_key={np_api_key}&start={counter}"
    print(park_url)
    response_json = requests.get(park_url).json()
    park_data = response_json["data"]
    
    for i in park_data:
        parks.append(i)     
        counter = counter + 1
parks

https://developer.nps.gov/api/v1/parks?&api_key=JGGsSc8rimZ25uQ9m77wysJ2onYUjFW3Q7aVD1uf&start=0
https://developer.nps.gov/api/v1/parks?&api_key=JGGsSc8rimZ25uQ9m77wysJ2onYUjFW3Q7aVD1uf&start=50
https://developer.nps.gov/api/v1/parks?&api_key=JGGsSc8rimZ25uQ9m77wysJ2onYUjFW3Q7aVD1uf&start=100
https://developer.nps.gov/api/v1/parks?&api_key=JGGsSc8rimZ25uQ9m77wysJ2onYUjFW3Q7aVD1uf&start=150
https://developer.nps.gov/api/v1/parks?&api_key=JGGsSc8rimZ25uQ9m77wysJ2onYUjFW3Q7aVD1uf&start=200
https://developer.nps.gov/api/v1/parks?&api_key=JGGsSc8rimZ25uQ9m77wysJ2onYUjFW3Q7aVD1uf&start=250
https://developer.nps.gov/api/v1/parks?&api_key=JGGsSc8rimZ25uQ9m77wysJ2onYUjFW3Q7aVD1uf&start=300
https://developer.nps.gov/api/v1/parks?&api_key=JGGsSc8rimZ25uQ9m77wysJ2onYUjFW3Q7aVD1uf&start=350
https://developer.nps.gov/api/v1/parks?&api_key=JGGsSc8rimZ25uQ9m77wysJ2onYUjFW3Q7aVD1uf&start=400
https://developer.nps.gov/api/v1/parks?&api_key=JGGsSc8rimZ25uQ9m77wysJ2onYUjFW3Q7aVD1uf&start=450


[{'contacts': {'phoneNumbers': [{'phoneNumber': '2024266841',
     'description': '',
     'extension': '',
     'type': 'Voice'}],
   'emailAddresses': [{'description': '',
     'emailAddress': 'national_mall@nps.gov'}]},
  'states': 'DC',
  'longitude': '-77.025977',
  'activities': [],
  'entranceFees': [{'cost': '0.0000',
    'description': 'No Entrance Fee to enter park site.',
    'title': 'No Entrance Fee'}],
  'directionsInfo': 'The memorial is located at the corner of Vermont Avenue, 10th St, and U Street NW, near the U Street/African-American Civil War Memorial/Cardozo Metro Station.',
  'entrancePasses': [{'cost': '0.0000',
    'description': 'No Passes',
    'title': 'No Passes'}],
  'directionsUrl': 'http://www.nps.gov/afam/planyourvisit/directions.htm',
  'url': 'https://www.nps.gov/afam/index.htm',
  'weatherInfo': 'Washington DC gets to see all four seasons. Humidity will make the temps feel hotter in summer and colder in winter.\n\nSpring (March - May) Temp: Average hi

In [13]:
# creating a new dictionary to store the original name for each code of "park_code_list"
park_names = {}
for park in parks:
    for code in park_code_list:
        if code == park["parkCode"]:
            park_names[park["fullName"]] = park["parkCode"]
park_names
    
    

{"Belmont-Paul Women's Equality National Monument": 'bepa',
 'Big Cypress National Preserve': 'bicy',
 'Bryce Canyon National Park': 'brca',
 'Cape Cod National Seashore': 'caco',
 'Charles Pinckney National Historic Site': 'chpi',
 'Cuyahoga Valley National Park': 'cuva',
 'Dry Tortugas National Park': 'drto',
 'Effigy Mounds National Monument': 'efmo',
 'El Malpais National Monument': 'elma',
 'First Ladies National Historic Site': 'fila',
 'Fort Bowie National Historic Site': 'fobo',
 'Fort Davis National Historic Site': 'foda',
 'Fort Stanwix National Monument': 'fost',
 'Frederick Douglass National Historic Site': 'frdo',
 'Fredericksburg & Spotsylvania National Military Park': 'frsp',
 'Friendship Hill National Historic Site': 'frhi',
 'Gates Of The Arctic National Park & Preserve': 'gaar',
 'Gauley River National Recreation Area': 'gari',
 'Golden Gate National Recreation Area': 'goga',
 'Governors Island National Monument': 'gois',
 'Kenai Fjords National Park': 'kefj',
 'Knife

In [14]:
# creating a dataframe with last decade available parks with the original API names
np_original_names = pd.DataFrame.from_dict(park_names, orient='index')
np_original_names

Unnamed: 0,0
Belmont-Paul Women's Equality National Monument,bepa
Big Cypress National Preserve,bicy
Bryce Canyon National Park,brca
Cape Cod National Seashore,caco
Charles Pinckney National Historic Site,chpi
...,...
Stones River National Battlefield,stri
Walnut Canyon National Monument,waca
Wright Brothers National Memorial,wrbr
Wupatki National Monument,wupa


In [15]:
# sort parks alphabetical
sort_np_original_names = np_original_names.sort_values(0)

In [16]:
# cleaning sorted parks
renamed_np_original = sort_np_original_names.reset_index().rename(columns = {0:"park_code","index":"park_name"})
renamed_np_original

Unnamed: 0,park_name,park_code
0,Abraham Lincoln Birthplace National Historical...,abli
1,Acadia National Park,acad
2,Adams National Historical Park,adam
3,African Burial Ground National Monument,afbg
4,Agate Fossil Beds National Monument,agfo
...,...,...
362,World War II Memorial,wwii
363,Yellowstone National Park,yell
364,Yosemite National Park,yose
365,Yukon - Charley Rivers National Preserve,yuch


In [17]:
# merge available original names with last decade parks dataframe
last_dec_merge = pd.merge(renamed_visits_last_decade,renamed_np_original, on = "park_code")
last_dec_merge

Unnamed: 0,park_name_x,park_code,Region,State,Park Type,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,average,park_name_y
0,Abraham Lincoln Birthplace NHP,abli,Southeast,KY,National Historical Park,177122.0,163568.0,169515.0,163928.0,241264.0,148605.0,252495.0,269580,260964,238226,208527,Abraham Lincoln Birthplace National Historical...
1,Acadia NP,acad,Northeast,ME,National Park,2504208.0,2374645.0,2431052.0,2254922.0,2563129.0,2811184.0,3303393.0,3509271,3537575,3437286,2872667,Acadia National Park
2,Adams NHP,adam,Northeast,MA,National Historical Park,73339.0,219975.0,336031.0,285501.0,178468.0,183632.0,199301.0,255562,121007,187400,204022,Adams National Historical Park
3,African Burial Ground NM,afbg,Northeast,NY,National Monument,117113.0,108585.0,77624.0,93234.0,54861.0,56348.0,46526.0,43182,45035,47427,68994,African Burial Ground National Monument
4,Agate Fossil Beds NM,agfo,Midwest,NE,National Monument,12509.0,11617.0,11824.0,10504.0,12440.0,13264.0,15555.0,32038,16238,16657,15265,Agate Fossil Beds National Monument
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
362,Wupatki NM,wupa,Intermountain,AZ,National Monument,221083.0,216165.0,201365.0,190878.0,195962.0,212828.0,223172.0,236454,205122,187059,209009,Wupatki National Monument
363,Yellowstone NP,yell,Intermountain,WY,National Park,3640185.0,3394326.0,3447729.0,3188030.0,3513484.0,4097710.0,4257177.0,4116524,4115000,4020288,3779045,Yellowstone National Park
364,Yosemite NP,yose,Pacific West,CA,National Park,3901408.0,3951393.0,3853404.0,3691191.0,3882642.0,4150217.0,5028868.0,4336890,4009436,4422861,4122831,Yosemite National Park
365,Yukon-Charley Rivers NPRES,yuch,Alaska,AK,National Preserve,6211.0,1718.0,1393.0,3914.0,2329.0,1133.0,1146.0,952,1272,1114,2118,Yukon - Charley Rivers National Preserve


In [18]:
#cleaning, renaming
last_deca_visits = last_dec_merge.rename(columns={"park_name_y":"park_name","Region":"region","Park Type":"park_type", "State":"state"})
last_deca_visits.head()

Unnamed: 0,park_name_x,park_code,region,state,park_type,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,average,park_name
0,Abraham Lincoln Birthplace NHP,abli,Southeast,KY,National Historical Park,177122.0,163568.0,169515.0,163928.0,241264.0,148605.0,252495.0,269580,260964,238226,208527,Abraham Lincoln Birthplace National Historical...
1,Acadia NP,acad,Northeast,ME,National Park,2504208.0,2374645.0,2431052.0,2254922.0,2563129.0,2811184.0,3303393.0,3509271,3537575,3437286,2872667,Acadia National Park
2,Adams NHP,adam,Northeast,MA,National Historical Park,73339.0,219975.0,336031.0,285501.0,178468.0,183632.0,199301.0,255562,121007,187400,204022,Adams National Historical Park
3,African Burial Ground NM,afbg,Northeast,NY,National Monument,117113.0,108585.0,77624.0,93234.0,54861.0,56348.0,46526.0,43182,45035,47427,68994,African Burial Ground National Monument
4,Agate Fossil Beds NM,agfo,Midwest,NE,National Monument,12509.0,11617.0,11824.0,10504.0,12440.0,13264.0,15555.0,32038,16238,16657,15265,Agate Fossil Beds National Monument


In [19]:
# leaving essential columns
last_deca_visits = last_deca_visits[["park_name","park_code","region","state","park_type","2010","2011","2012","2013","2014","2015","2016","2017","2018","2019","average"]]
last_deca_visits

Unnamed: 0,park_name,park_code,region,state,park_type,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,average
0,Abraham Lincoln Birthplace National Historical...,abli,Southeast,KY,National Historical Park,177122.0,163568.0,169515.0,163928.0,241264.0,148605.0,252495.0,269580,260964,238226,208527
1,Acadia National Park,acad,Northeast,ME,National Park,2504208.0,2374645.0,2431052.0,2254922.0,2563129.0,2811184.0,3303393.0,3509271,3537575,3437286,2872667
2,Adams National Historical Park,adam,Northeast,MA,National Historical Park,73339.0,219975.0,336031.0,285501.0,178468.0,183632.0,199301.0,255562,121007,187400,204022
3,African Burial Ground National Monument,afbg,Northeast,NY,National Monument,117113.0,108585.0,77624.0,93234.0,54861.0,56348.0,46526.0,43182,45035,47427,68994
4,Agate Fossil Beds National Monument,agfo,Midwest,NE,National Monument,12509.0,11617.0,11824.0,10504.0,12440.0,13264.0,15555.0,32038,16238,16657,15265
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
362,Wupatki National Monument,wupa,Intermountain,AZ,National Monument,221083.0,216165.0,201365.0,190878.0,195962.0,212828.0,223172.0,236454,205122,187059,209009
363,Yellowstone National Park,yell,Intermountain,WY,National Park,3640185.0,3394326.0,3447729.0,3188030.0,3513484.0,4097710.0,4257177.0,4116524,4115000,4020288,3779045
364,Yosemite National Park,yose,Pacific West,CA,National Park,3901408.0,3951393.0,3853404.0,3691191.0,3882642.0,4150217.0,5028868.0,4336890,4009436,4422861,4122831
365,Yukon - Charley Rivers National Preserve,yuch,Alaska,AK,National Preserve,6211.0,1718.0,1393.0,3914.0,2329.0,1133.0,1146.0,952,1272,1114,2118


In [20]:
# defining index
last_deca_visits_df = last_deca_visits
last_deca_visits_df

Unnamed: 0,park_name,park_code,region,state,park_type,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,average
0,Abraham Lincoln Birthplace National Historical...,abli,Southeast,KY,National Historical Park,177122.0,163568.0,169515.0,163928.0,241264.0,148605.0,252495.0,269580,260964,238226,208527
1,Acadia National Park,acad,Northeast,ME,National Park,2504208.0,2374645.0,2431052.0,2254922.0,2563129.0,2811184.0,3303393.0,3509271,3537575,3437286,2872667
2,Adams National Historical Park,adam,Northeast,MA,National Historical Park,73339.0,219975.0,336031.0,285501.0,178468.0,183632.0,199301.0,255562,121007,187400,204022
3,African Burial Ground National Monument,afbg,Northeast,NY,National Monument,117113.0,108585.0,77624.0,93234.0,54861.0,56348.0,46526.0,43182,45035,47427,68994
4,Agate Fossil Beds National Monument,agfo,Midwest,NE,National Monument,12509.0,11617.0,11824.0,10504.0,12440.0,13264.0,15555.0,32038,16238,16657,15265
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
362,Wupatki National Monument,wupa,Intermountain,AZ,National Monument,221083.0,216165.0,201365.0,190878.0,195962.0,212828.0,223172.0,236454,205122,187059,209009
363,Yellowstone National Park,yell,Intermountain,WY,National Park,3640185.0,3394326.0,3447729.0,3188030.0,3513484.0,4097710.0,4257177.0,4116524,4115000,4020288,3779045
364,Yosemite National Park,yose,Pacific West,CA,National Park,3901408.0,3951393.0,3853404.0,3691191.0,3882642.0,4150217.0,5028868.0,4336890,4009436,4422861,4122831
365,Yukon - Charley Rivers National Preserve,yuch,Alaska,AK,National Preserve,6211.0,1718.0,1393.0,3914.0,2329.0,1133.0,1146.0,952,1272,1114,2118


In [21]:
# checking columns type
last_deca_visits_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 367 entries, 0 to 366
Data columns (total 16 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   park_name  367 non-null    object 
 1   park_code  367 non-null    object 
 2   region     367 non-null    object 
 3   state      356 non-null    object 
 4   park_type  367 non-null    object 
 5   2010       367 non-null    float64
 6   2011       367 non-null    float64
 7   2012       367 non-null    float64
 8   2013       367 non-null    float64
 9   2014       367 non-null    float64
 10  2015       367 non-null    float64
 11  2016       367 non-null    float64
 12  2017       367 non-null    int64  
 13  2018       367 non-null    int64  
 14  2019       367 non-null    int64  
 15  average    367 non-null    int64  
dtypes: float64(7), int64(4), object(5)
memory usage: 48.7+ KB


In [22]:
# comverting all columns to int64
last_deca_visits_df[["2010","2011","2012","2013","2014","2015","2016"]] = last_deca_visits_df[["2010","2011","2012","2013","2014","2015","2016"]].astype(np.int64) 
last_deca_visits_df

Unnamed: 0,park_name,park_code,region,state,park_type,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,average
0,Abraham Lincoln Birthplace National Historical...,abli,Southeast,KY,National Historical Park,177122,163568,169515,163928,241264,148605,252495,269580,260964,238226,208527
1,Acadia National Park,acad,Northeast,ME,National Park,2504208,2374645,2431052,2254922,2563129,2811184,3303393,3509271,3537575,3437286,2872667
2,Adams National Historical Park,adam,Northeast,MA,National Historical Park,73339,219975,336031,285501,178468,183632,199301,255562,121007,187400,204022
3,African Burial Ground National Monument,afbg,Northeast,NY,National Monument,117113,108585,77624,93234,54861,56348,46526,43182,45035,47427,68994
4,Agate Fossil Beds National Monument,agfo,Midwest,NE,National Monument,12509,11617,11824,10504,12440,13264,15555,32038,16238,16657,15265
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
362,Wupatki National Monument,wupa,Intermountain,AZ,National Monument,221083,216165,201365,190878,195962,212828,223172,236454,205122,187059,209009
363,Yellowstone National Park,yell,Intermountain,WY,National Park,3640185,3394326,3447729,3188030,3513484,4097710,4257177,4116524,4115000,4020288,3779045
364,Yosemite National Park,yose,Pacific West,CA,National Park,3901408,3951393,3853404,3691191,3882642,4150217,5028868,4336890,4009436,4422861,4122831
365,Yukon - Charley Rivers National Preserve,yuch,Alaska,AK,National Preserve,6211,1718,1393,3914,2329,1133,1146,952,1272,1114,2118


In [23]:
# transforming years columns into rows
last_10_year_records = last_deca_visits_df.melt(id_vars = ["park_name","park_code","region","state","park_type","average"],var_name = "year", value_name = "visitors")
last_10_year_records


Unnamed: 0,park_name,park_code,region,state,park_type,average,year,visitors
0,Abraham Lincoln Birthplace National Historical...,abli,Southeast,KY,National Historical Park,208527,2010,177122
1,Acadia National Park,acad,Northeast,ME,National Park,2872667,2010,2504208
2,Adams National Historical Park,adam,Northeast,MA,National Historical Park,204022,2010,73339
3,African Burial Ground National Monument,afbg,Northeast,NY,National Monument,68994,2010,117113
4,Agate Fossil Beds National Monument,agfo,Midwest,NE,National Monument,15265,2010,12509
...,...,...,...,...,...,...,...,...
3665,Wupatki National Monument,wupa,Intermountain,AZ,National Monument,209009,2019,187059
3666,Yellowstone National Park,yell,Intermountain,WY,National Park,3779045,2019,4020288
3667,Yosemite National Park,yose,Pacific West,CA,National Park,4122831,2019,4422861
3668,Yukon - Charley Rivers National Preserve,yuch,Alaska,AK,National Preserve,2118,2019,1114


In [24]:
# grouping and applying json format for all years
grouped_last_10_year_records = last_10_year_records.groupby(['park_name','park_code','region','average']).apply(lambda x: dict(zip(x['year'], x['visitors'])))
grouped_last_10_year_records_df = grouped_last_10_year_records.to_frame()
grouped_last_10_year_records_df


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,0
park_name,park_code,region,average,Unnamed: 4_level_1
Abraham Lincoln Birthplace National Historical Park,abli,Southeast,208527,"{'2010': 177122, '2011': 163568, '2012': 16951..."
Acadia National Park,acad,Northeast,2872667,"{'2010': 2504208, '2011': 2374645, '2012': 243..."
Adams National Historical Park,adam,Northeast,204022,"{'2010': 73339, '2011': 219975, '2012': 336031..."
African Burial Ground National Monument,afbg,Northeast,68994,"{'2010': 117113, '2011': 108585, '2012': 77624..."
Agate Fossil Beds National Monument,agfo,Midwest,15265,"{'2010': 12509, '2011': 11617, '2012': 11824, ..."
...,...,...,...,...
Wupatki National Monument,wupa,Intermountain,209009,"{'2010': 221083, '2011': 216165, '2012': 20136..."
Yellowstone National Park,yell,Intermountain,3779045,"{'2010': 3640185, '2011': 3394326, '2012': 344..."
Yosemite National Park,yose,Pacific West,4122831,"{'2010': 3901408, '2011': 3951393, '2012': 385..."
Yukon - Charley Rivers National Preserve,yuch,Alaska,2118,"{'2010': 6211, '2011': 1718, '2012': 1393, '20..."


In [25]:
last_decade_records_df = grouped_last_10_year_records_df.reset_index(["park_name","park_code","region","average"])
last_decade_records_df

Unnamed: 0,park_name,park_code,region,average,0
0,Abraham Lincoln Birthplace National Historical...,abli,Southeast,208527,"{'2010': 177122, '2011': 163568, '2012': 16951..."
1,Acadia National Park,acad,Northeast,2872667,"{'2010': 2504208, '2011': 2374645, '2012': 243..."
2,Adams National Historical Park,adam,Northeast,204022,"{'2010': 73339, '2011': 219975, '2012': 336031..."
3,African Burial Ground National Monument,afbg,Northeast,68994,"{'2010': 117113, '2011': 108585, '2012': 77624..."
4,Agate Fossil Beds National Monument,agfo,Midwest,15265,"{'2010': 12509, '2011': 11617, '2012': 11824, ..."
...,...,...,...,...,...
362,Wupatki National Monument,wupa,Intermountain,209009,"{'2010': 221083, '2011': 216165, '2012': 20136..."
363,Yellowstone National Park,yell,Intermountain,3779045,"{'2010': 3640185, '2011': 3394326, '2012': 344..."
364,Yosemite National Park,yose,Pacific West,4122831,"{'2010': 3901408, '2011': 3951393, '2012': 385..."
365,Yukon - Charley Rivers National Preserve,yuch,Alaska,2118,"{'2010': 6211, '2011': 1718, '2012': 1393, '20..."


In [26]:
# state df
park_by_state = last_deca_visits_df[['state','park_code']]
park_by_state


Unnamed: 0,state,park_code
0,KY,abli
1,ME,acad
2,MA,adam
3,NY,afbg
4,NE,agfo
...,...,...
362,AZ,wupa
363,WY,yell
364,CA,yose
365,AK,yuch


In [27]:
# merging states for last_decade_records_df
merge_last_10_year_records = pd.merge(park_by_state,last_decade_records_df, on ="park_code")
merge_last_10_year_records

Unnamed: 0,state,park_code,park_name,region,average,0
0,KY,abli,Abraham Lincoln Birthplace National Historical...,Southeast,208527,"{'2010': 177122, '2011': 163568, '2012': 16951..."
1,ME,acad,Acadia National Park,Northeast,2872667,"{'2010': 2504208, '2011': 2374645, '2012': 243..."
2,MA,adam,Adams National Historical Park,Northeast,204022,"{'2010': 73339, '2011': 219975, '2012': 336031..."
3,NY,afbg,African Burial Ground National Monument,Northeast,68994,"{'2010': 117113, '2011': 108585, '2012': 77624..."
4,NE,agfo,Agate Fossil Beds National Monument,Midwest,15265,"{'2010': 12509, '2011': 11617, '2012': 11824, ..."
...,...,...,...,...,...,...
362,AZ,wupa,Wupatki National Monument,Intermountain,209009,"{'2010': 221083, '2011': 216165, '2012': 20136..."
363,WY,yell,Yellowstone National Park,Intermountain,3779045,"{'2010': 3640185, '2011': 3394326, '2012': 344..."
364,CA,yose,Yosemite National Park,Pacific West,4122831,"{'2010': 3901408, '2011': 3951393, '2012': 385..."
365,AK,yuch,Yukon - Charley Rivers National Preserve,Alaska,2118,"{'2010': 6211, '2011': 1718, '2012': 1393, '20..."


In [28]:
# columns renaming
renamed_last_10_year_records_df = merge_last_10_year_records.rename(columns = {0:"years"})
renamed_last_10_year_records_df

Unnamed: 0,state,park_code,park_name,region,average,years
0,KY,abli,Abraham Lincoln Birthplace National Historical...,Southeast,208527,"{'2010': 177122, '2011': 163568, '2012': 16951..."
1,ME,acad,Acadia National Park,Northeast,2872667,"{'2010': 2504208, '2011': 2374645, '2012': 243..."
2,MA,adam,Adams National Historical Park,Northeast,204022,"{'2010': 73339, '2011': 219975, '2012': 336031..."
3,NY,afbg,African Burial Ground National Monument,Northeast,68994,"{'2010': 117113, '2011': 108585, '2012': 77624..."
4,NE,agfo,Agate Fossil Beds National Monument,Midwest,15265,"{'2010': 12509, '2011': 11617, '2012': 11824, ..."
...,...,...,...,...,...,...
362,AZ,wupa,Wupatki National Monument,Intermountain,209009,"{'2010': 221083, '2011': 216165, '2012': 20136..."
363,WY,yell,Yellowstone National Park,Intermountain,3779045,"{'2010': 3640185, '2011': 3394326, '2012': 344..."
364,CA,yose,Yosemite National Park,Pacific West,4122831,"{'2010': 3901408, '2011': 3951393, '2012': 385..."
365,AK,yuch,Yukon - Charley Rivers National Preserve,Alaska,2118,"{'2010': 6211, '2011': 1718, '2012': 1393, '20..."


In [29]:
# selecting final columns
renamed_last_10_year_records_df = renamed_last_10_year_records_df[["park_name","park_code","state","region","years","average"]]
renamed_last_10_year_records_df

Unnamed: 0,park_name,park_code,state,region,years,average
0,Abraham Lincoln Birthplace National Historical...,abli,KY,Southeast,"{'2010': 177122, '2011': 163568, '2012': 16951...",208527
1,Acadia National Park,acad,ME,Northeast,"{'2010': 2504208, '2011': 2374645, '2012': 243...",2872667
2,Adams National Historical Park,adam,MA,Northeast,"{'2010': 73339, '2011': 219975, '2012': 336031...",204022
3,African Burial Ground National Monument,afbg,NY,Northeast,"{'2010': 117113, '2011': 108585, '2012': 77624...",68994
4,Agate Fossil Beds National Monument,agfo,NE,Midwest,"{'2010': 12509, '2011': 11617, '2012': 11824, ...",15265
...,...,...,...,...,...,...
362,Wupatki National Monument,wupa,AZ,Intermountain,"{'2010': 221083, '2011': 216165, '2012': 20136...",209009
363,Yellowstone National Park,yell,WY,Intermountain,"{'2010': 3640185, '2011': 3394326, '2012': 344...",3779045
364,Yosemite National Park,yose,CA,Pacific West,"{'2010': 3901408, '2011': 3951393, '2012': 385...",4122831
365,Yukon - Charley Rivers National Preserve,yuch,AK,Alaska,"{'2010': 6211, '2011': 1718, '2012': 1393, '20...",2118


In [30]:
# generating json file
renamed_last_10_year_records_df.to_json("Output/visits_last_decade.json", orient = 'index')

In [31]:
# create a database connection function
def get_db():
    
    conn = 'mongodb://localhost:27017'
    client = pymongo.MongoClient(conn)

    return client.NationalParksDB

In [32]:
# Drops last_year_decade_visits collection if available to remove duplicates
db = get_db()
db.last_year_decade_visits.drop()

# open/create last_year_decade_visits collection
last_year_decade_visits = db.last_year_decade_visits

In [33]:
with open("Output/visits_last_decade.json") as f:
    file_data = json.load(f)

    for k, v in file_data.items():
    
        last_year_decade_visits.insert_one(v)

Monthly data for 2019

In [34]:
# File to Load "Monthly data for 2019"
monthly_2019 = "Data/Monthly_2019_Visits.csv"

# Read csv
monthly_visits_2019_df= pd.read_csv(monthly_2019)
monthly_visits_2019_df["Unit Code"] = monthly_visits_2019_df["Unit Code"].str.lower()
monthly_visits_2019_df

Unnamed: 0,Park,Unit Code,Park Type,Region,State,Year,Month,Recreation Visits
0,Abraham Lincoln Birthplace NHP,abli,National Historical Park,Southeast,KY,2019,1,1939
1,Abraham Lincoln Birthplace NHP,abli,National Historical Park,Southeast,KY,2019,2,6534
2,Abraham Lincoln Birthplace NHP,abli,National Historical Park,Southeast,KY,2019,3,14566
3,Abraham Lincoln Birthplace NHP,abli,National Historical Park,Southeast,KY,2019,4,16496
4,Abraham Lincoln Birthplace NHP,abli,National Historical Park,Southeast,KY,2019,5,24646
...,...,...,...,...,...,...,...,...
4543,Zion NP,zion,National Park,Intermountain,UT,2019,8,535322
4544,Zion NP,zion,National Park,Intermountain,UT,2019,9,497443
4545,Zion NP,zion,National Park,Intermountain,UT,2019,10,429604
4546,Zion NP,zion,National Park,Intermountain,UT,2019,11,230180


In [35]:
# transform monthly_visits_2019_df
monthly_visits_2019_df['Month'] = monthly_visits_2019_df['Month'].apply(lambda x: calendar.month_abbr[x])
monthly_visits_2019_df

Unnamed: 0,Park,Unit Code,Park Type,Region,State,Year,Month,Recreation Visits
0,Abraham Lincoln Birthplace NHP,abli,National Historical Park,Southeast,KY,2019,Jan,1939
1,Abraham Lincoln Birthplace NHP,abli,National Historical Park,Southeast,KY,2019,Feb,6534
2,Abraham Lincoln Birthplace NHP,abli,National Historical Park,Southeast,KY,2019,Mar,14566
3,Abraham Lincoln Birthplace NHP,abli,National Historical Park,Southeast,KY,2019,Apr,16496
4,Abraham Lincoln Birthplace NHP,abli,National Historical Park,Southeast,KY,2019,May,24646
...,...,...,...,...,...,...,...,...
4543,Zion NP,zion,National Park,Intermountain,UT,2019,Aug,535322
4544,Zion NP,zion,National Park,Intermountain,UT,2019,Sep,497443
4545,Zion NP,zion,National Park,Intermountain,UT,2019,Oct,429604
4546,Zion NP,zion,National Park,Intermountain,UT,2019,Nov,230180


In [36]:
# rename columns
renamed_monthly_visits_2019 = monthly_visits_2019_df.rename(columns={"Park":"park_name","Unit Code":"park_code","Park Type":"park_type","Region":"region","State":"state","Month":"month","Recreation Visits":"visits"})
renamed_monthly_visits_2019

Unnamed: 0,park_name,park_code,park_type,region,state,Year,month,visits
0,Abraham Lincoln Birthplace NHP,abli,National Historical Park,Southeast,KY,2019,Jan,1939
1,Abraham Lincoln Birthplace NHP,abli,National Historical Park,Southeast,KY,2019,Feb,6534
2,Abraham Lincoln Birthplace NHP,abli,National Historical Park,Southeast,KY,2019,Mar,14566
3,Abraham Lincoln Birthplace NHP,abli,National Historical Park,Southeast,KY,2019,Apr,16496
4,Abraham Lincoln Birthplace NHP,abli,National Historical Park,Southeast,KY,2019,May,24646
...,...,...,...,...,...,...,...,...
4543,Zion NP,zion,National Park,Intermountain,UT,2019,Aug,535322
4544,Zion NP,zion,National Park,Intermountain,UT,2019,Sep,497443
4545,Zion NP,zion,National Park,Intermountain,UT,2019,Oct,429604
4546,Zion NP,zion,National Park,Intermountain,UT,2019,Nov,230180


In [37]:
# merge renamed_montly_visits_2019 df with original names dataframe
merge_months_visits_2019 = pd.merge(renamed_monthly_visits_2019,renamed_np_original, on ="park_code")
merge_months_visits_2019

Unnamed: 0,park_name_x,park_code,park_type,region,state,Year,month,visits,park_name_y
0,Abraham Lincoln Birthplace NHP,abli,National Historical Park,Southeast,KY,2019,Jan,1939,Abraham Lincoln Birthplace National Historical...
1,Abraham Lincoln Birthplace NHP,abli,National Historical Park,Southeast,KY,2019,Feb,6534,Abraham Lincoln Birthplace National Historical...
2,Abraham Lincoln Birthplace NHP,abli,National Historical Park,Southeast,KY,2019,Mar,14566,Abraham Lincoln Birthplace National Historical...
3,Abraham Lincoln Birthplace NHP,abli,National Historical Park,Southeast,KY,2019,Apr,16496,Abraham Lincoln Birthplace National Historical...
4,Abraham Lincoln Birthplace NHP,abli,National Historical Park,Southeast,KY,2019,May,24646,Abraham Lincoln Birthplace National Historical...
...,...,...,...,...,...,...,...,...,...
4399,Zion NP,zion,National Park,Intermountain,UT,2019,Aug,535322,Zion National Park
4400,Zion NP,zion,National Park,Intermountain,UT,2019,Sep,497443,Zion National Park
4401,Zion NP,zion,National Park,Intermountain,UT,2019,Oct,429604,Zion National Park
4402,Zion NP,zion,National Park,Intermountain,UT,2019,Nov,230180,Zion National Park


In [38]:
#rename columns
renamed_months_visits_2019 = merge_months_visits_2019.rename(columns={"park_name_y":"park_name","Year":"year"})
bymonths_visits_2019 = renamed_months_visits_2019[["park_name","park_code","park_type","region","state","year",'month',"visits"]]
bymonths_visits_2019.head()

Unnamed: 0,park_name,park_code,park_type,region,state,year,month,visits
0,Abraham Lincoln Birthplace National Historical...,abli,National Historical Park,Southeast,KY,2019,Jan,1939
1,Abraham Lincoln Birthplace National Historical...,abli,National Historical Park,Southeast,KY,2019,Feb,6534
2,Abraham Lincoln Birthplace National Historical...,abli,National Historical Park,Southeast,KY,2019,Mar,14566
3,Abraham Lincoln Birthplace National Historical...,abli,National Historical Park,Southeast,KY,2019,Apr,16496
4,Abraham Lincoln Birthplace National Historical...,abli,National Historical Park,Southeast,KY,2019,May,24646


In [39]:
# create json structure for all months
monthly_visitors_2019 = bymonths_visits_2019.groupby(['park_name','park_code','region']).apply(lambda x: dict(zip(x['month'], x['visits'])))
bymonth_visitors_2019 = monthly_visitors_2019.to_frame()
bymonth_visitors_2019

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,0
park_name,park_code,region,Unnamed: 3_level_1
Abraham Lincoln Birthplace National Historical Park,abli,Southeast,"{'Jan': 1939, 'Feb': 6534, 'Mar': 14566, 'Apr'..."
Acadia National Park,acad,Northeast,"{'Jan': 8532, 'Feb': 9154, 'Mar': 20161, 'Apr'..."
Adams National Historical Park,adam,Northeast,"{'Jan': 27, 'Feb': 179, 'Mar': 253, 'Apr': 537..."
African Burial Ground National Monument,afbg,Northeast,"{'Jan': 96, 'Feb': 1571, 'Mar': 1395, 'Apr': 5..."
Agate Fossil Beds National Monument,agfo,Midwest,"{'Jan': 0, 'Feb': 94, 'Mar': 401, 'Apr': 391, ..."
...,...,...,...
Wupatki National Monument,wupa,Intermountain,"{'Jan': 724, 'Feb': 4243, 'Mar': 17447, 'Apr':..."
Yellowstone National Park,yell,Intermountain,"{'Jan': 33896, 'Feb': 31650, 'Mar': 28695, 'Ap..."
Yosemite National Park,yose,Pacific West,"{'Jan': 116746, 'Feb': 111665, 'Mar': 173610, ..."
Yukon - Charley Rivers National Preserve,yuch,Alaska,"{'Jan': 6, 'Feb': 72, 'Mar': 46, 'Apr': 37, 'M..."


In [40]:
#rename columns
bymonth_visitors_2019_df = bymonth_visitors_2019.rename(columns = {0:"month"}).reset_index(["park_name","park_code","region"])
bymonth_visitors_2019_df

Unnamed: 0,park_name,park_code,region,month
0,Abraham Lincoln Birthplace National Historical...,abli,Southeast,"{'Jan': 1939, 'Feb': 6534, 'Mar': 14566, 'Apr'..."
1,Acadia National Park,acad,Northeast,"{'Jan': 8532, 'Feb': 9154, 'Mar': 20161, 'Apr'..."
2,Adams National Historical Park,adam,Northeast,"{'Jan': 27, 'Feb': 179, 'Mar': 253, 'Apr': 537..."
3,African Burial Ground National Monument,afbg,Northeast,"{'Jan': 96, 'Feb': 1571, 'Mar': 1395, 'Apr': 5..."
4,Agate Fossil Beds National Monument,agfo,Midwest,"{'Jan': 0, 'Feb': 94, 'Mar': 401, 'Apr': 391, ..."
...,...,...,...,...
362,Wupatki National Monument,wupa,Intermountain,"{'Jan': 724, 'Feb': 4243, 'Mar': 17447, 'Apr':..."
363,Yellowstone National Park,yell,Intermountain,"{'Jan': 33896, 'Feb': 31650, 'Mar': 28695, 'Ap..."
364,Yosemite National Park,yose,Pacific West,"{'Jan': 116746, 'Feb': 111665, 'Mar': 173610, ..."
365,Yukon - Charley Rivers National Preserve,yuch,Alaska,"{'Jan': 6, 'Feb': 72, 'Mar': 46, 'Apr': 37, 'M..."


In [41]:
#adding state column
merge_bymonth_visitors_2019 = pd.merge(park_by_state, bymonth_visitors_2019_df, on = "park_code")
merge_bymonth_visitors_2019

Unnamed: 0,state,park_code,park_name,region,month
0,KY,abli,Abraham Lincoln Birthplace National Historical...,Southeast,"{'Jan': 1939, 'Feb': 6534, 'Mar': 14566, 'Apr'..."
1,ME,acad,Acadia National Park,Northeast,"{'Jan': 8532, 'Feb': 9154, 'Mar': 20161, 'Apr'..."
2,MA,adam,Adams National Historical Park,Northeast,"{'Jan': 27, 'Feb': 179, 'Mar': 253, 'Apr': 537..."
3,NY,afbg,African Burial Ground National Monument,Northeast,"{'Jan': 96, 'Feb': 1571, 'Mar': 1395, 'Apr': 5..."
4,NE,agfo,Agate Fossil Beds National Monument,Midwest,"{'Jan': 0, 'Feb': 94, 'Mar': 401, 'Apr': 391, ..."
...,...,...,...,...,...
362,AZ,wupa,Wupatki National Monument,Intermountain,"{'Jan': 724, 'Feb': 4243, 'Mar': 17447, 'Apr':..."
363,WY,yell,Yellowstone National Park,Intermountain,"{'Jan': 33896, 'Feb': 31650, 'Mar': 28695, 'Ap..."
364,CA,yose,Yosemite National Park,Pacific West,"{'Jan': 116746, 'Feb': 111665, 'Mar': 173610, ..."
365,AK,yuch,Yukon - Charley Rivers National Preserve,Alaska,"{'Jan': 6, 'Feb': 72, 'Mar': 46, 'Apr': 37, 'M..."


In [42]:
# selecting final columns
merge_bymonth_visitors_2019 = merge_bymonth_visitors_2019[["park_name","park_code","state","region","month"]]
merge_bymonth_visitors_2019

Unnamed: 0,park_name,park_code,state,region,month
0,Abraham Lincoln Birthplace National Historical...,abli,KY,Southeast,"{'Jan': 1939, 'Feb': 6534, 'Mar': 14566, 'Apr'..."
1,Acadia National Park,acad,ME,Northeast,"{'Jan': 8532, 'Feb': 9154, 'Mar': 20161, 'Apr'..."
2,Adams National Historical Park,adam,MA,Northeast,"{'Jan': 27, 'Feb': 179, 'Mar': 253, 'Apr': 537..."
3,African Burial Ground National Monument,afbg,NY,Northeast,"{'Jan': 96, 'Feb': 1571, 'Mar': 1395, 'Apr': 5..."
4,Agate Fossil Beds National Monument,agfo,NE,Midwest,"{'Jan': 0, 'Feb': 94, 'Mar': 401, 'Apr': 391, ..."
...,...,...,...,...,...
362,Wupatki National Monument,wupa,AZ,Intermountain,"{'Jan': 724, 'Feb': 4243, 'Mar': 17447, 'Apr':..."
363,Yellowstone National Park,yell,WY,Intermountain,"{'Jan': 33896, 'Feb': 31650, 'Mar': 28695, 'Ap..."
364,Yosemite National Park,yose,CA,Pacific West,"{'Jan': 116746, 'Feb': 111665, 'Mar': 173610, ..."
365,Yukon - Charley Rivers National Preserve,yuch,AK,Alaska,"{'Jan': 6, 'Feb': 72, 'Mar': 46, 'Apr': 37, 'M..."


In [43]:
# generating a json file
merge_bymonth_visitors_2019.to_json("Output/bymonth_visits_2019.json", orient = "index")

In [44]:
# Drops monthly_visits_2019 collection if available to remove duplicates
db = get_db()
db.monthly_visits_2019.drop()

# open/create monthly_visits_2019 collection
monthly_visits_2019 = db.monthly_visits_2019

In [45]:
# insert to database
with open("Output/bymonth_visits_2019.json") as g:
    month_file_data = json.load(g)

    for k, v in month_file_data.items():
    
        monthly_visits_2019.insert_one(v)