In [47]:
import pandas as pd
import os
import requests
import datetime as dt
from config import noaa_token as token

In [53]:
#variables
base = 'https://www.ncdc.noaa.gov/cdo-web/api/v2/'
header = {'token':token,
          'Content-Type':'application/json'}

dataset = {'datasetid':'GHCND'}
location = {'locationid':'CLIM:102'}
datatypes = {'datatypeid':'ID'}

payload = {'locationid':'CLIM:102',
          'limit':'1000'}

query_list = ['precipitation','snowfall','ice','water']
avoid_list = ['average','percentile','probability','multiday','mean']

In [49]:
#get count
def get_c(json):
    return json['metadata']['resultset']['count']

#take end point as a string and payload as a dictionary
def gather(endpt,load):
    initial = requests.get(base+endpt,headers=header,params=load).json()
    count = get_c(initial)
    json = initial['results']
    offset = 1000
    while offset < count:
        load['offset'] = offset
        json += requests.get(base+endpt,headers=header,params=load).json()['results']
        offset += 1000
    return pd.DataFrame(json)

#returns subset of df where maxdate is in December 2019
def recent(df):
    return df[df['maxdate'].str.startswith('2019-12')]

In [50]:
dt_df = gather('datatypes',payload)
dt_df['name'] = dt_df['name'].str.lower()
dt_df

Unnamed: 0,mindate,maxdate,name,datacoverage,id
0,1994-03-19,1996-05-28,average cloudiness midnight to midnight from 3...,1.00,ACMC
1,1965-01-01,2005-12-31,average cloudiness midnight to midnight from m...,1.00,ACMH
2,1994-02-01,1996-05-28,average cloudiness sunrise to sunset from 30-s...,1.00,ACSC
3,1965-01-01,2005-12-31,average cloudiness sunrise to sunset from manu...,1.00,ACSH
4,1991-06-05,2019-12-20,base data,0.95,ALL
...,...,...,...,...,...
1126,1996-07-08,2005-09-09,"ash, dust, sand, or other blowing obstruction",1.00,WV07
1127,1996-10-21,2002-02-04,snow or ice crystals,1.00,WV18
1128,1996-07-01,2005-12-31,rain or snow shower,1.00,WV20
1129,2010-01-01,2010-12-31,long-term average year-to-date precipitation t...,1.00,YTD-PRCP-NORMAL


In [51]:
recent(dt_df)

Unnamed: 0,mindate,maxdate,name,datacoverage,id
4,1991-06-05,2019-12-20,base data,0.95,ALL
234,2011-03-08,2019-12-18,digital accumulation array(one hour precipitat...,0.95,DAA
236,1832-05-11,2019-12-18,number of days included in the multiday precip...,1.00,DAPR
239,1994-05-20,2019-12-18,digital hybrid scan reflectivity,0.95,DHR
357,2011-03-08,2019-12-18,one hour precipitation difference = daa-n1p,0.95,DOD
...,...,...,...,...,...
1107,1851-04-13,2019-12-15,hail (may include small hail),1.00,WT05
1108,1890-03-28,2019-12-15,glaze or rime,1.00,WT06
1110,1851-04-11,2019-12-15,smoke or haze,1.00,WT08
1111,1852-02-12,2019-12-14,blowing or drifting snow,1.00,WT09


In [26]:
stations_df = gather('stations',payload)
stations_df

Unnamed: 0,elevation,mindate,maxdate,latitude,name,datacoverage,id,elevationUnit,longitude
0,228.6,1941-04-01,2008-06-01,42.48110,"ANTIOCH, IL US",0.9542,COOP:110203,METERS,-88.09940
1,164.0,1935-07-01,2015-11-01,40.88210,"GLADSTONE DAM 18, IL US",0.9741,COOP:113455,METERS,-91.02340
2,167.6,1935-07-01,2015-11-01,41.42550,"ILLINOIS CITY DAM 16, IL US",0.9772,COOP:114355,METERS,-91.00940
3,167.6,1931-01-01,2009-09-01,41.09944,"KEITHSBURG, IL US",0.9598,COOP:114655,METERS,-90.93944
4,173.1,1985-06-01,2015-11-01,41.51800,"ROCK ISLAND LOCK AND DAM 15, IL US",0.9479,COOP:117391,METERS,-90.56470
...,...,...,...,...,...,...,...,...,...
6340,185.9,2005-01-01,2019-12-19,47.74722,"GRAND MARAIS, MN US",1.0000,WBAN:94992,METERS,-90.34444
6341,204.8,2005-01-01,2019-12-19,43.15611,"BOSCOBEL AIRPORT, WI US",1.0000,WBAN:94994,METERS,-90.67750
6342,417.6,2005-01-01,2019-12-19,46.44667,"WADENA MUNICIPAL AIRPORT, MN US",1.0000,WBAN:94997,METERS,-95.21167
6343,393.8,2005-01-01,2019-12-19,41.70000,"AUDUBON MUNICIPAL AIRPORT, IA US",1.0000,WBAN:94998,METERS,-94.91667


In [41]:
recent(stations_df)

Unnamed: 0,elevation,mindate,maxdate,latitude,name,datacoverage,id,elevationUnit,longitude
1611,335.0,1978-12-01,2019-12-16,48.633300,"BARWICK, MN US",1.0000,GHCND:CA006020559,METERS,-93.966700
1615,383.4,1998-06-17,2019-12-17,43.267291,"WAUKON 0.5 ESE, IA US",0.9988,GHCND:US1IAAL0004,METERS,-91.466922
1616,341.4,2018-05-26,2019-12-17,43.401739,"LANSING 4.1 NW, IA US",0.9442,GHCND:US1IAAL0005,METERS,-91.285070
1617,329.2,2018-09-19,2019-12-18,43.140900,"POSTVILLE 5.5 NE, IA US",1.0000,GHCND:US1IAAL0006,METERS,-91.492600
1621,295.4,2018-03-23,2019-12-18,40.734480,"CENTERVILLE 0.3 NE, IA US",0.7830,GHCND:US1IAAP0007,METERS,-92.870150
...,...,...,...,...,...,...,...,...,...
6340,185.9,2005-01-01,2019-12-19,47.747220,"GRAND MARAIS, MN US",1.0000,WBAN:94992,METERS,-90.344440
6341,204.8,2005-01-01,2019-12-19,43.156110,"BOSCOBEL AIRPORT, WI US",1.0000,WBAN:94994,METERS,-90.677500
6342,417.6,2005-01-01,2019-12-19,46.446670,"WADENA MUNICIPAL AIRPORT, MN US",1.0000,WBAN:94997,METERS,-95.211670
6343,393.8,2005-01-01,2019-12-19,41.700000,"AUDUBON MUNICIPAL AIRPORT, IA US",1.0000,WBAN:94998,METERS,-94.916670


In [54]:
v_df = pd.DataFrame()
for q in query_list:
    e_df = dt_df[dt_df['name'].str.contains(q)]
    for a in avoid_list:
        e_df = e_df[~(e_df['name'].str.contains(a))]
    v_df = v_df.append(e_df)
v_df.reset_index()

Unnamed: 0,index,mindate,maxdate,name,datacoverage,id
0,234,2011-03-08,2019-12-18,digital accumulation array(one hour precipitat...,0.95,DAA
1,357,2011-03-08,2019-12-18,one hour precipitation difference = daa-n1p,0.95,DOD
2,358,1994-06-17,2019-12-18,digital precipitation array,0.95,DPA
3,359,1888-12-01,2015-11-01,departure from normal monthly precipitation.,1.0,DPNP
4,361,2011-03-08,2019-12-18,instantaneous precipitation rate,0.95,DPR
5,362,2011-03-08,2019-12-18,storm total precipitation difference = dta-dsp,0.95,DSD
6,363,1994-06-17,2019-12-18,digital storm total precipitation,0.95,DSP
7,364,2011-03-08,2019-12-18,digital storm total accumulation(storm total p...,0.95,DTA
8,939,1994-06-17,2019-12-18,one hour precipitation total,0.95,N1P
9,956,1994-06-17,2019-12-18,three hour precipitation total,0.95,N3P
