In [1]:
import pandas as pd
import os
import requests
import datetime as dt
from config import noaa_token as token

In [2]:
#variables
base = 'https://www.ncdc.noaa.gov/cdo-web/api/v2/'
header = {'token':token,
          'Content-Type':'application/json'}

state_ids = {'Illinois':'',
            'Indiana':'',
            'Michigan':'',
            'Minnesota':'',
            'New York':'',
            'Ohio':'',
            'Pennsylvania':'',
            'Wisconsin':'',}

In [3]:
#get count
def get_c(json):
    return json['metadata']['resultset']['count']

def gather(endpt,load):
    initial = requests.get(base+endpt,headers=header,params=load).json()
    count = get_c(initial)
    json = initial['results']
    offset = 1000
    while offset < count:
        load['offset'] = offset
        json += requests.get(base+endpt,headers=header,params=load).json()['results']
        offset += 1000
    return pd.DataFrame(json)

In [13]:
#Return only rows in daterange
#currently dates are hardcoded - can we do this programatically
def time_range(df):
    df_min = df[df['mindate']<='1972-12-19']
    df_total = df_min[df_min['maxdate']>='2016-05-05']
    return df_total

# def coords_range(df):
#     df_lat = df[df['latitude']]
#     df_lng = 
#     df_ele = 
#     return df_ele

In [4]:
location_ids = requests.get(base+'locations',headers=header,params={'locationcategoryid':'ST','limit':100})
location_ids.status_code

200

In [5]:
all_states = pd.DataFrame(location_ids.json()['results'])
all_states.head()

Unnamed: 0,mindate,maxdate,name,datacoverage,id
0,1888-02-01,2020-01-09,Alabama,1,FIPS:01
1,1893-09-01,2020-01-09,Alaska,1,FIPS:02
2,1867-08-01,2020-01-09,Arizona,1,FIPS:04
3,1871-07-01,2020-01-09,Arkansas,1,FIPS:05
4,1850-10-01,2020-01-09,California,1,FIPS:06


In [6]:
for state in state_ids.keys():
    state_ids[state] = all_states[all_states['name']==state]['id'].values[0]
state_ids

{'Illinois': 'FIPS:17',
 'Indiana': 'FIPS:18',
 'Michigan': 'FIPS:26',
 'Minnesota': 'FIPS:27',
 'New York': 'FIPS:36',
 'Ohio': 'FIPS:39',
 'Pennsylvania': 'FIPS:42',
 'Wisconsin': 'FIPS:55'}

In [7]:
state_data = pd.DataFrame()
for state,id in state_ids.items():
    data = gather('stations',{'locationid':id,'limit':'1000'})
    print(state + ': ' + str(len(data)))
    data['location_id_state'] = state
    state_data = state_data.append(data,ignore_index=True)
print('Total: '+str(len(state_data)))

Illinois: 2387
Indiana: 2030
Michigan: 1833
Minnesota: 1792
New York: 2313
Ohio: 1570
Pennsylvania: 2031
Wisconsin: 1431
Total: 15387


In [8]:
state_data = state_data.set_index('id')
state_data.head()

Unnamed: 0_level_0,elevation,mindate,maxdate,latitude,name,datacoverage,elevationUnit,longitude,location_id_state
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
COOP:110050,131.1,1990-10-01,2015-11-01,38.5411,"ALBERS 1 W, IL US",0.9967,METERS,-89.6289,Illinois
COOP:110055,161.5,1942-06-01,2006-04-01,38.3777,"ALBION, IL US",0.9518,METERS,-88.0569,Illinois
COOP:110072,222.5,1931-01-01,2015-11-01,41.1977,"ALEDO, IL US",0.9951,METERS,-90.7447,Illinois
COOP:110082,207.3,1948-07-01,2015-11-01,41.0579,"ALEXIS 1 SW, IL US",0.0964,METERS,-90.5654,Illinois
COOP:110137,132.6,1943-09-01,2015-11-01,38.86702,"ALTON MELVIN PRICE LOCK AND DAM, IL US",0.9389,METERS,-90.14886,Illinois


In [9]:
state_data['mindate'] = pd.to_datetime(state_data['mindate'])
state_data['maxdate'] = pd.to_datetime(state_data['maxdate'])

In [10]:
state_min = state_data[state_data['mindate']<='1972-12-19']
state_time = state_min[state_min['maxdate']>='2016-05-05']
len(state_time)

1057

In [12]:
state_nonull = state_time[pd.notnull(state_time['elevation'])]

In [15]:
state_intime = time_range(state_nonull)

In [16]:
state_intime.head()

Unnamed: 0_level_0,elevation,mindate,maxdate,latitude,name,datacoverage,elevationUnit,longitude,location_id_state
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
GHCND:USC00110072,222.5,1901-01-01,2020-01-08,41.1977,"ALEDO, IL US",1.0,METERS,-90.7447,Illinois
GHCND:USC00110082,207.3,1948-07-01,2020-01-04,41.0579,"ALEXIS 1 SW, IL US",0.9995,METERS,-90.5654,Illinois
GHCND:USC00110137,132.6,1892-12-01,2020-01-08,38.86702,"ALTON MELVIN PRICE LOCK AND DAM, IL US",1.0,METERS,-90.14886,Illinois
GHCND:USC00110330,207.3,1948-07-01,2020-01-08,40.2333,"AUGUSTA, IL US",1.0,METERS,-90.9471,Illinois
GHCND:USC00110338,201.2,1893-01-01,2020-01-08,41.78038,"AURORA, IL US",1.0,METERS,-88.30925,Illinois


In [None]:
coords_st = state_data[['elevation','latitude','longitude']]
coords_st.describe()