In [11]:
from urllib import request

import json
import os
import random

In [2]:
MAPBOX_ACCESS_TOKEN = os.environ['MAPBOX_ACCESS_TOKEN']

STATE_CODES = ['AL', 'AK', 'AZ', 'AR', 'CA', 'CO', 'CT', 'DC', 'DE', 'FL', 'GA', 
               'HI', 'ID', 'IL', 'IN', 'IA', 'KS', 'KY', 'LA', 'ME', 'MD', 
               'MA', 'MI', 'MN', 'MS', 'MO', 'MT', 'NE', 'NV', 'NH', 'NJ', 
               'NM', 'NY', 'NC', 'ND', 'OH', 'OK', 'OR', 'PA', 'RI', 'SC', 
               'SD', 'TN', 'TX', 'UT', 'VT', 'VA', 'WA', 'WV', 'WI', 'WY']

filepath = 'sites.json'

In [3]:
sites_dict = {
    'unchecked': [],
    'water_found': [],
    'water_not_found': []   
}

In [4]:
def get_sites_for_state(state_code):
    url = 'http://waterservices.usgs.gov/nwis/iv/?format=json&stateCd={}'.format(state_code)
    response = request.urlopen(url)
    sites = json.loads(response.read())['value']['timeSeries']

    return sites


def parse_site_data(site):
    site_info = site['sourceInfo']
    site_code = site_info['siteCode'][0]['value']
    site_geo_info = site_info['geoLocation']['geogLocation']

    name = site_info['siteName']
    lon = site_geo_info['longitude']
    lat = site_geo_info['latitude']

    return site_code, name, lon, lat

In [5]:
total_sites = 0
unique_sites = set()

for state_code in STATE_CODES:
    sites = get_sites_for_state(state_code)
    num_sites_for_state = 0

    for site in sites:
        site_code, name, lon, lat = parse_site_data(site)
        
        if site_code in unique_sites:
            continue

        unique_sites.add(site_code)
        num_sites_for_state += 1

        sites_dict['unchecked'].append({
            'site_code': site_code,
            'name': name,
            'lon': lon,
            'lat': lat
        })

    total_sites += num_sites_for_state
    print('Data recorded for {} {} sites.'.format(num_sites_for_state, state_code))

print('{} total sites.'.format(total_sites))

Data recorded for 224 AL sites.
Data recorded for 269 AK sites.
Data recorded for 425 AZ sites.
Data recorded for 329 AR sites.
Data recorded for 1147 CA sites.
Data recorded for 754 CO sites.
Data recorded for 105 CT sites.
Data recorded for 7 DC sites.
Data recorded for 64 DE sites.
Data recorded for 1298 FL sites.
Data recorded for 578 GA sites.
Data recorded for 280 HI sites.
Data recorded for 452 ID sites.
Data recorded for 513 IL sites.
Data recorded for 483 IN sites.
Data recorded for 307 IA sites.
Data recorded for 303 KS sites.
Data recorded for 269 KY sites.
Data recorded for 321 LA sites.
Data recorded for 153 ME sites.
Data recorded for 263 MD sites.
Data recorded for 224 MA sites.
Data recorded for 350 MI sites.
Data recorded for 312 MN sites.
Data recorded for 244 MS sites.
Data recorded for 533 MO sites.
Data recorded for 362 MT sites.
Data recorded for 307 NE sites.
Data recorded for 472 NV sites.
Data recorded for 79 NH sites.
Data recorded for 473 NJ sites.
Data recor

In [12]:
sites_dict['unchecked'] = random.sample(sites_dict['unchecked'], len(sites_dict['unchecked']))

In [13]:
sites_dict

{'unchecked': [{'site_code': '1025125356',
   'name': 'WREN WASH AT YUCCA MTN, NTS, NV',
   'lon': -116.455,
   'lat': 36.8597222},
  {'site_code': '400832074082101',
   'name': '250429-- Allaire State Park C Obs',
   'lon': -74.14236318,
   'lat': 40.142892},
  {'site_code': '07378500',
   'name': 'Amite River near Denham Springs, LA',
   'lon': -90.99038,
   'lat': 30.464079},
  {'site_code': '02226180',
   'name': 'BRUNSWICK RIVER AT ST. SIMONS ISLAND, GA',
   'lon': -81.3964856,
   'lat': 31.13356523},
  {'site_code': '15294100',
   'name': 'DESHKA R NR WILLOW AK',
   'lon': -150.3391832,
   'lat': 61.7675212},
  {'site_code': '09423000',
   'name': 'COLORADO RIVER BELOW DAVIS DAM, AZ-NV',
   'lon': -114.5721876,
   'lat': 35.19166556},
  {'site_code': '09486510',
   'name': 'SANTA CRUZ RIVER NR. RILLITO, AZ.',
   'lon': -111.1528808,
   'lat': 32.40507247},
  {'site_code': '12175000',
   'name': 'ROSS RESERVOIR NEAR NEWHALEM, WA',
   'lon': -121.0684535,
   'lat': 48.73262959},
  

In [7]:
len(sites_dict.get('unchecked'))

20467

In [8]:
with open(filepath, 'w+') as fp:
    json.dump(sites_dict, fp)