In [1]:
import pandas as pd
import os
import requests
import datetime as dt
from config import noaa_token as token

In [2]:
#variables
base = 'https://www.ncdc.noaa.gov/cdo-web/api/v2/'
header = {'token':token,
          'Content-Type':'application/json'}

dataset = {'datasetid':'GHCND'}
location = {'locationid':'CLIM:102'}
datatypes = {'datatypeid':'ID'}

payload = {'locationid':'CLIM:102',
          'limit':'1000'}

query_list = ['precipitation','snow','ice','water']
avoid_list = ['average','percentile','probability','multiday','mean']

In [3]:
#get count
def get_c(json):
    return json['metadata']['resultset']['count']

#take end point as a string and payload as a dictionary
def gather(endpt,load):
    initial = requests.get(base+endpt,headers=header,params=load).json()
    count = get_c(initial)
    json = initial['results']
    offset = 1000
    while offset < count:
        load['offset'] = offset
        json += requests.get(base+endpt,headers=header,params=load).json()['results']
        offset += 1000
    return pd.DataFrame(json)

#returns subset of df where maxdate is in December 2019
def recent(df):
    return df[df['maxdate'].str.startswith('2019-12')]

In [4]:
dt_df = gather('datatypes',payload)
dt_df['name'] = dt_df['name'].str.lower()
dt_df

Unnamed: 0,mindate,maxdate,name,datacoverage,id
0,1994-03-19,1996-05-28,average cloudiness midnight to midnight from 3...,1.00,ACMC
1,1965-01-01,2005-12-31,average cloudiness midnight to midnight from m...,1.00,ACMH
2,1994-02-01,1996-05-28,average cloudiness sunrise to sunset from 30-s...,1.00,ACSC
3,1965-01-01,2005-12-31,average cloudiness sunrise to sunset from manu...,1.00,ACSH
4,1991-06-05,2020-01-06,base data,0.95,ALL
...,...,...,...,...,...
1126,1996-07-08,2005-09-09,"ash, dust, sand, or other blowing obstruction",1.00,WV07
1127,1996-10-21,2002-02-04,snow or ice crystals,1.00,WV18
1128,1996-07-01,2005-12-31,rain or snow shower,1.00,WV20
1129,2010-01-01,2010-12-31,long-term average year-to-date precipitation t...,1.00,YTD-PRCP-NORMAL


In [5]:
recent(dt_df)

Unnamed: 0,mindate,maxdate,name,datacoverage,id
989,1948-01-01,2019-12-30,peak gust time,1.0,PGTM
1103,1851-05-19,2019-12-30,"fog, ice fog, or freezing fog (may include hea...",1.0,WT01
1104,1900-12-23,2019-12-30,heavy fog or heaving freezing fog (not always ...,1.0,WT02
1105,1851-04-02,2019-12-30,thunder,1.0,WT03
1106,1852-02-20,2019-12-30,"ice pellets, sleet, snow pellets, or small hail""",1.0,WT04
1107,1851-04-13,2019-12-28,hail (may include small hail),1.0,WT05
1108,1890-03-28,2019-12-30,glaze or rime,1.0,WT06
1110,1851-04-11,2019-12-30,smoke or haze,1.0,WT08
1111,1852-02-12,2019-12-30,blowing or drifting snow,1.0,WT09
1112,1886-03-20,2019-12-09,"tornado, waterspout, or funnel cloud""",1.0,WT10


In [6]:
stations_df = gather('stations',payload)
stations_df.head()

Unnamed: 0,elevation,mindate,maxdate,latitude,name,datacoverage,id,elevationUnit,longitude
0,272.8,1963-04-01,2014-01-01,44.9907,"GOLDEN VALLEY, MN US",0.0225,COOP:213202,METERS,-93.3989
1,437.7,1921-07-01,1990-10-01,47.73333,"GONVICK 2 W, MN US",0.8834,COOP:213206,METERS,-95.51667
2,186.5,1931-01-01,2015-11-01,47.7518,"GRAND MARAIS, MN US",0.9814,COOP:213282,METERS,-90.3282
3,411.5,1931-01-01,2015-11-01,43.7047,"GRAND MEADOW, MN US",0.9932,COOP:213290,METERS,-92.5644
4,222.5,1950-07-01,2015-11-01,47.9711,"GRAND PORTAGE RANGER STATION, MN US",0.6586,COOP:213296,METERS,-89.6908


In [8]:
stations_unique = stations_df['name'].unique()
stations_df[stations_df['id'] in stations_unique]

  


ValueError: Lengths must match to compare

In [9]:
stations = stations_df.set_index('id').drop_duplicates()
stations

Unnamed: 0_level_0,elevation,mindate,maxdate,latitude,name,datacoverage,elevationUnit,longitude
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
COOP:213202,272.8,1963-04-01,2014-01-01,44.99070,"GOLDEN VALLEY, MN US",0.0225,METERS,-93.39890
COOP:213206,437.7,1921-07-01,1990-10-01,47.73333,"GONVICK 2 W, MN US",0.8834,METERS,-95.51667
COOP:213282,186.5,1931-01-01,2015-11-01,47.75180,"GRAND MARAIS, MN US",0.9814,METERS,-90.32820
COOP:213290,411.5,1931-01-01,2015-11-01,43.70470,"GRAND MEADOW, MN US",0.9932,METERS,-92.56440
COOP:213296,222.5,1950-07-01,2015-11-01,47.97110,"GRAND PORTAGE RANGER STATION, MN US",0.6586,METERS,-89.69080
...,...,...,...,...,...,...,...,...
WBAN:94992,185.9,2005-01-01,2020-01-07,47.74722,"GRAND MARAIS, MN US",1.0000,METERS,-90.34444
WBAN:94994,204.8,2005-01-01,2020-01-07,43.15611,"BOSCOBEL AIRPORT, WI US",1.0000,METERS,-90.67750
WBAN:94997,417.6,2005-01-01,2020-01-07,46.44667,"WADENA MUNICIPAL AIRPORT, MN US",1.0000,METERS,-95.21167
WBAN:94998,393.8,2005-01-01,2020-01-07,41.70000,"AUDUBON MUNICIPAL AIRPORT, IA US",1.0000,METERS,-94.91667


In [10]:
stations_df['name'].value_counts()

NETT LAKE, MN US                          9
MANKATO, MN US                            9
REDWOOD FALLS MUNICIPAL AIRPORT, MN US    6
GRANTSBURG, WI US                         6
ST. PAUL DOWNTOWN AIRPORT, MN US          6
                                         ..
LITTLE FALLS 3.0 N, MN US                 1
DULUTH 17.6 NNW, MN US                    1
DULUTH SKY HARBOR AIRPORT, MN US          1
MARQUETTE 0.4 ESE, MI US                  1
WYOMING 1.7 NW, MI US                     1
Name: name, Length: 4579, dtype: int64

In [11]:
mn_s = stations_df[stations_df['name'].str.endswith('MN US') 
                   & stations_df['id'].str.startswith('GHCND') 
                   & stations_df['maxdate'].str.startswith('2020')]

In [12]:
stations_df[stations_df['id'].str.endswith('WBAN:94992')]

Unnamed: 0,elevation,mindate,maxdate,latitude,name,datacoverage,id,elevationUnit,longitude
6340,185.9,2005-01-01,2020-01-07,47.74722,"GRAND MARAIS, MN US",1.0,WBAN:94992,METERS,-90.34444


In [13]:
superior1 = stations[stations['elevation']>183]
superior2 = superior1[superior1['elevation']<185]
superior2

Unnamed: 0_level_0,elevation,mindate,maxdate,latitude,name,datacoverage,elevationUnit,longitude
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
GHCND:US1MIBN0001,184.1,2008-07-01,2017-05-21,41.792007,"NEW BUFFALO 0.4 WNW, MI US",0.9337,METERS,-86.751916
GHCND:US1MIBY0006,183.2,2012-11-01,2018-10-06,43.602489,"BAY CITY 2.2 WNW, MI US",1.0,METERS,-83.929367
GHCND:US1MICP0011,183.2,2014-07-01,2019-09-25,46.392849,"SAULT STE. MARIE 7.8 SE, MI US",0.5544,METERS,-84.270003
GHCND:US1MIMB0003,184.1,2008-07-01,2010-06-11,42.613428,"STERLING HEIGHTS 3.4 NE, MI US",1.0,METERS,-82.982202
GHCND:US1MIMB0010,184.1,2008-08-01,2013-06-13,42.591073,"STERLING HEIGHTS 2.3 ENE, MI US",0.3731,METERS,-82.988245
GHCND:US1MIMB0017,184.7,2009-05-01,2010-05-12,42.478659,"EASTPOINTE 1.0 NNE, MI US",0.998,METERS,-82.941585
GHCND:US1MIMB0023,184.7,2010-09-01,2013-04-19,42.573168,"CLINTON 1.0 S, MI US",0.6256,METERS,-82.917344
GHCND:US1MIMB0054,183.5,2015-12-16,2020-01-06,42.635189,"UTICA 2.9 E, MI US",0.9252,METERS,-82.965289
GHCND:US1MIMG0001,184.1,2008-07-01,2009-06-02,43.207627,"MUSKEGON 2.5 WSW, MI US",0.9173,METERS,-86.296977
GHCND:US1MIMG0010,183.5,2010-03-01,2012-10-28,43.16025,"NORTON SHORES 0.9 W, MI US",0.8108,METERS,-86.271083


In [14]:
stations_df[stations_df['id'].str.endswith('94992')]

Unnamed: 0,elevation,mindate,maxdate,latitude,name,datacoverage,id,elevationUnit,longitude
6016,185.9,1998-08-01,2019-12-30,47.74722,"GRAND MARAIS, MN US",1.0,GHCND:USW00094992,METERS,-90.34444
6340,185.9,2005-01-01,2020-01-07,47.74722,"GRAND MARAIS, MN US",1.0,WBAN:94992,METERS,-90.34444


In [15]:
v_df = pd.DataFrame()
for q in query_list:
    e_df = dt_df[dt_df['name'].str.contains(q)]
    for a in avoid_list:
        e_df = e_df[~(e_df['name'].str.contains(a))]
    v_df = v_df.append(e_df)
v_df.reset_index().sort_values('datacoverage',ascending=False)

Unnamed: 0,index,mindate,maxdate,name,datacoverage,id
18,1101,1952-07-01,2020-01-06,water equivalent of snow on the ground,1.0,WESD
27,1106,1852-02-20,2019-12-30,"ice pellets, sleet, snow pellets, or small hail""",1.0,WT04
21,1111,1852-02-12,2019-12-30,blowing or drifting snow,1.0,WT09
22,1120,1851-04-13,2019-04-13,"snow, snow pellets, snow grains, or ice crystals",1.0,WT18
23,1127,1996-10-21,2002-02-04,snow or ice crystals,1.0,WV18
24,1128,1996-07-01,2005-12-31,rain or snow shower,1.0,WV20
25,1091,1926-03-17,2019-11-28,thickness of ice on water,1.0,THIC
26,1103,1851-05-19,2019-12-30,"fog, ice fog, or freezing fog (may include hea...",1.0,WT01
28,1120,1851-04-13,2019-04-13,"snow, snow pellets, snow grains, or ice crystals",1.0,WT18
19,1102,1998-06-01,2020-01-06,water equivalent of snowfall,1.0,WESF


In [18]:
v_df.reset_index().sort_values('maxdate',ascending=False)

Unnamed: 0,index,mindate,maxdate,name,datacoverage,id
18,1101,1952-07-01,2020-01-06,water equivalent of snow on the ground,1.0,WESD
34,1102,1998-06-01,2020-01-06,water equivalent of snowfall,1.0,WESF
33,1101,1952-07-01,2020-01-06,water equivalent of snow on the ground,1.0,WESD
19,1102,1998-06-01,2020-01-06,water equivalent of snowfall,1.0,WESF
17,1012,1857-01-18,2020-01-06,snow depth,1.0,SNWD
10,985,1994-06-17,2020-01-05,storm total precipitation,0.95,NTP
1,357,2011-03-08,2020-01-05,one hour precipitation difference = daa-n1p,0.95,DOD
15,1074,1994-05-20,2020-01-05,supplemental precipitation data,0.95,SPD
12,990,2011-03-08,2020-01-05,storm total accumulation(storm total precipita...,0.95,PTA
11,988,2011-03-08,2020-01-05,one hour accumulation(one hour precipitation t...,0.95,OHA
