In [1]:
import pandas as pd
import os
import requests
import datetime as dt
from config import noaa_token as token

In [2]:
#variables
base = 'https://www.ncdc.noaa.gov/cdo-web/api/v2/'
header = {'token':token,
          'Content-Type':'application/json'}

state_ids = {'PA':'FIPS:42',
            'OH':'FIPS:39',
            'MI':'FIPS:26',
            'IL':'FIPS:17',
            'WI':'FIPS:55',
            'MN':'FIPS:27',
            'IA':'FIPS:18',
            'NY':'FIPS:36'}

In [3]:
#get count
def get_c(json):
    return json['metadata']['resultset']['count']

def gather(endpt,load):
    initial = requests.get(base+endpt,headers=header,params=load).json()
    count = get_c(initial)
    json = initial['results']
    offset = 1000
    while offset < count:
        load['offset'] = offset
        json += requests.get(base+endpt,headers=header,params=load).json()['results']
        offset += 1000
    return pd.DataFrame(json)

In [4]:
stations = gather('stations',{'locationid':'CLIM:102','limit':'1000'})

In [5]:
stations[['First_ID','Last_ID']] = stations['id'].str.split(':',expand=True)
stations[['City','State Country']] = stations['name'].str.split(',',expand=True)
stations_no_dups = stations.set_index('id').drop_duplicates()
stations_no_dups

Unnamed: 0_level_0,elevation,mindate,maxdate,latitude,name,datacoverage,elevationUnit,longitude,First_ID,Last_ID,City,State Country
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
COOP:110203,228.6,1941-04-01,2008-06-01,42.48110,"ANTIOCH, IL US",0.9542,METERS,-88.09940,COOP,110203,ANTIOCH,IL US
COOP:113455,164.0,1935-07-01,2015-11-01,40.88210,"GLADSTONE DAM 18, IL US",0.9741,METERS,-91.02340,COOP,113455,GLADSTONE DAM 18,IL US
COOP:114355,167.6,1935-07-01,2015-11-01,41.42550,"ILLINOIS CITY DAM 16, IL US",0.9772,METERS,-91.00940,COOP,114355,ILLINOIS CITY DAM 16,IL US
COOP:114655,167.6,1931-01-01,2009-09-01,41.09944,"KEITHSBURG, IL US",0.9598,METERS,-90.93944,COOP,114655,KEITHSBURG,IL US
COOP:117391,173.1,1985-06-01,2015-11-01,41.51800,"ROCK ISLAND LOCK AND DAM 15, IL US",0.9479,METERS,-90.56470,COOP,117391,ROCK ISLAND LOCK AND DAM 15,IL US
...,...,...,...,...,...,...,...,...,...,...,...,...
WBAN:94992,185.9,2005-01-01,2020-01-09,47.74722,"GRAND MARAIS, MN US",1.0000,METERS,-90.34444,WBAN,94992,GRAND MARAIS,MN US
WBAN:94994,204.8,2005-01-01,2020-01-09,43.15611,"BOSCOBEL AIRPORT, WI US",1.0000,METERS,-90.67750,WBAN,94994,BOSCOBEL AIRPORT,WI US
WBAN:94997,417.6,2005-01-01,2020-01-09,46.44667,"WADENA MUNICIPAL AIRPORT, MN US",1.0000,METERS,-95.21167,WBAN,94997,WADENA MUNICIPAL AIRPORT,MN US
WBAN:94998,393.8,2005-01-01,2020-01-09,41.70000,"AUDUBON MUNICIPAL AIRPORT, IA US",1.0000,METERS,-94.91667,WBAN,94998,AUDUBON MUNICIPAL AIRPORT,IA US


In [6]:
stations_no_dups['State Country'].value_counts()

 MI US    1825
 MN US    1771
 WI US    1401
 IA US    1312
 IL US      16
 NE US       8
 ND US       3
 SD US       2
 MO US       2
 IN US       2
 KS US       1
 OH US       1
Name: State Country, dtype: int64

In [7]:
coords_clim = stations_no_dups[['elevation','latitude','longitude']]
coords_clim.describe()

Unnamed: 0,elevation,latitude,longitude
count,6225.0,6344.0,6344.0
mean,297.099454,44.055435,-90.308764
std,79.517464,1.846201,3.970951
min,106.7,38.69889,-98.44167
25%,237.7,42.6333,-93.448208
50%,286.5,43.87245,-91.24485
75%,347.2,45.30478,-87.068609
max,1345.4,49.31833,-81.86


In [8]:
state_data = pd.DataFrame()
for state,id in state_ids.items():
    data = gather('stations',{'locationid':id,'limit':'1000'})
    print(len(data))
    data['State'] = state
    state_data = state_data.append(data,ignore_index=True)

2031
1570
1833
2387
1431
1792
2030
2313


In [9]:
len(state_data)

15387

In [10]:
state_data = state_data.set_index('id')
state_data

Unnamed: 0_level_0,elevation,mindate,maxdate,latitude,name,datacoverage,elevationUnit,longitude,State
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
COOP:281211,3.7,1931-01-01,1978-06-01,40.08333,"BURLINGTON, PA US",0.9650,METERS,-74.86667,PA
COOP:284635,20.7,1931-01-01,2015-11-01,40.35958,"LAMBERTVILLE, NJ US",0.9755,METERS,-74.94461,PA
COOP:284736,121.9,1962-04-01,1970-08-01,41.25000,"LAYTON 2, PA US",0.9701,METERS,-74.86667,PA
COOP:301413,250.5,1939-01-01,2015-11-01,42.00250,"CHEMUNG, NY US",0.9968,METERS,-76.63840,PA
COOP:306779,,1948-05-01,1948-12-01,41.36667,"PORT JERVIS BRIDGE, PA US",0.1242,,-74.70000,PA
...,...,...,...,...,...,...,...,...,...
WBAN:94745,111.9,1949-01-01,2020-01-09,41.06236,"WESTCHESTER CO AIRPORT, NY US",0.9300,METERS,-73.70463,NY
WBAN:94761,335.0,2005-08-04,2020-01-09,42.48333,"ITHACA TOMPKINS CNTY, NY US",1.0000,METERS,-76.46667,NY
WBAN:94789,3.4,1948-07-01,2020-01-09,40.63915,"JFK INTERNATIONAL AIRPORT, NY US",1.0000,METERS,-73.76401,NY
WBAN:94790,96.9,1949-05-01,2020-01-09,43.98867,"WATERTOWN AIRPORT, NY US",1.0000,METERS,-76.02623,NY


In [12]:
coords_st = state_data[['elevation','latitude','longitude']]
coords_st.describe()

Unnamed: 0,elevation,latitude,longitude
count,15226.0,15387.0,15387.0
mean,264.848489,42.135379,-84.610624
std,114.839497,2.266795,6.082993
min,0.0,35.34472,-98.41306
25%,195.1,40.432965,-88.86083
50%,248.7,41.78333,-85.690869
75%,316.4,43.49955,-79.18333
max,1482.9,49.31833,-71.1375
