In [None]:
import pandas as pd
import os
import requests
import datetime as dt
from config import noaa_token as token

In [None]:
#variables
base = 'https://www.ncdc.noaa.gov/cdo-web/api/v2/'
header = {'token':token,
          'Content-Type':'application/json'}

dataset = {'datasetid':'GHCND'}
location = {'locationid':'CLIM:102'}
datatypes = {'datatypeid':'ID'}

payload = {'locationid':'CLIM:102',
          'limit':'1000'}

query_list = ['precipitation','snow','ice','water']
avoid_list = ['average','percentile','probability','multiday','mean']

In [None]:
#get count
def get_c(json):
    return json['metadata']['resultset']['count']

#take end point as a string and payload as a dictionary
def gather(endpt,load):
    initial = requests.get(base+endpt,headers=header,params=load).json()
    count = get_c(initial)
    json = initial['results']
    offset = 1000
    while offset < count:
        load['offset'] = offset
        json += requests.get(base+endpt,headers=header,params=load).json()['results']
        offset += 1000
    return pd.DataFrame(json)

#returns subset of df where maxdate is in December 2019
def recent(df):
    return df[df['maxdate'].str.startswith('2019-12')]

In [None]:
dt_df = gather('datatypes',payload)
dt_df['name'] = dt_df['name'].str.lower()
dt_df

In [None]:
recent(dt_df)

In [6]:
stations_df = gather('stations',payload)
stations_df.head()

Unnamed: 0,elevation,mindate,maxdate,latitude,name,datacoverage,id,elevationUnit,longitude
0,272.8,1963-04-01,2014-01-01,44.9907,"GOLDEN VALLEY, MN US",0.0225,COOP:213202,METERS,-93.3989
1,437.7,1921-07-01,1990-10-01,47.73333,"GONVICK 2 W, MN US",0.8834,COOP:213206,METERS,-95.51667
2,186.5,1931-01-01,2015-11-01,47.7518,"GRAND MARAIS, MN US",0.9814,COOP:213282,METERS,-90.3282
3,411.5,1931-01-01,2015-11-01,43.7047,"GRAND MEADOW, MN US",0.9932,COOP:213290,METERS,-92.5644
4,222.5,1950-07-01,2015-11-01,47.9711,"GRAND PORTAGE RANGER STATION, MN US",0.6586,COOP:213296,METERS,-89.6908


In [7]:
stations_unique = stations_df['name'].unique()
stations_df[stations_df['id'] in stations_unique]

  stations_df[stations_df['id'] in stations_unique]


ValueError: Lengths must match to compare

In [9]:
stations = stations_df.set_index('id').drop_duplicates()
stations

Unnamed: 0_level_0,elevation,mindate,maxdate,latitude,name,datacoverage,elevationUnit,longitude
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
COOP:213202,272.8,1963-04-01,2014-01-01,44.99070,"GOLDEN VALLEY, MN US",0.0225,METERS,-93.39890
COOP:213206,437.7,1921-07-01,1990-10-01,47.73333,"GONVICK 2 W, MN US",0.8834,METERS,-95.51667
COOP:213282,186.5,1931-01-01,2015-11-01,47.75180,"GRAND MARAIS, MN US",0.9814,METERS,-90.32820
COOP:213290,411.5,1931-01-01,2015-11-01,43.70470,"GRAND MEADOW, MN US",0.9932,METERS,-92.56440
COOP:213296,222.5,1950-07-01,2015-11-01,47.97110,"GRAND PORTAGE RANGER STATION, MN US",0.6586,METERS,-89.69080
...,...,...,...,...,...,...,...,...
WBAN:94992,185.9,2005-01-01,2020-01-07,47.74722,"GRAND MARAIS, MN US",1.0000,METERS,-90.34444
WBAN:94994,204.8,2005-01-01,2020-01-07,43.15611,"BOSCOBEL AIRPORT, WI US",1.0000,METERS,-90.67750
WBAN:94997,417.6,2005-01-01,2020-01-07,46.44667,"WADENA MUNICIPAL AIRPORT, MN US",1.0000,METERS,-95.21167
WBAN:94998,393.8,2005-01-01,2020-01-07,41.70000,"AUDUBON MUNICIPAL AIRPORT, IA US",1.0000,METERS,-94.91667


In [None]:
stations_df['name'].value_counts()

In [None]:
mn_s = stations_df[stations_df['name'].str.endswith('MN US') 
                   & stations_df['id'].str.startswith('GHCND') 
                   & stations_df['maxdate'].str.startswith('2020')]

In [None]:
stations_df[stations_df['id'].str.endswith('WBAN:94992')]

In [None]:
superior1 = stations[stations['elevation']>183]
superior2 = superior1[superior1['elevation']<185]
superior2

In [None]:
stations_df[stations_df['id'].str.endswith('94992')]

In [None]:
v_df = pd.DataFrame()
for q in query_list:
    e_df = dt_df[dt_df['name'].str.contains(q)]
    for a in avoid_list:
        e_df = e_df[~(e_df['name'].str.contains(a))]
    v_df = v_df.append(e_df)
v_df.reset_index().sort_values('datacoverage',ascending=False)

In [11]:
dt_df["mindate"]=pd.to_datetime(dt_df["mindate"])
dt_df

Unnamed: 0,mindate,maxdate,name,datacoverage,id
0,1994-03-19,1996-05-28,average cloudiness midnight to midnight from 3...,1.00,ACMC
1,1965-01-01,2005-12-31,average cloudiness midnight to midnight from m...,1.00,ACMH
2,1994-02-01,1996-05-28,average cloudiness sunrise to sunset from 30-s...,1.00,ACSC
3,1965-01-01,2005-12-31,average cloudiness sunrise to sunset from manu...,1.00,ACSH
4,1991-06-05,2020-01-06,base data,0.95,ALL
...,...,...,...,...,...
1126,1996-07-08,2005-09-09,"ash, dust, sand, or other blowing obstruction",1.00,WV07
1127,1996-10-21,2002-02-04,snow or ice crystals,1.00,WV18
1128,1996-07-01,2005-12-31,rain or snow shower,1.00,WV20
1129,2010-01-01,2010-12-31,long-term average year-to-date precipitation t...,1.00,YTD-PRCP-NORMAL
