In [2]:
!head output.csv

,circle_coordinates,circle_name,closest_coordinates,closest_station
0,"(41.869061, -71.032434)",Taunton-Middleboro,"(41.87556, -71.02111)","TAUNTON MUNICIPAL AIRPORT, MA US"
1,"(42.3833, -71.1667)",Belmont,"(42.38333, -71.11667)","CAMBRIDGE, MA US"
2,"(42.3833, -71.1667)",Belmont,"(42.38333, -71.11667)","CAMBRIDGE, MA US"
3,"(40.9333, -73.1167)",L.I.: Setauket,"(40.9256, -73.0997)","SETAUKET EAST SETAUKET 0.3 SSE, NY US"
4,"(40.1, -75.5833)",Merlin,"(40.08333, -75.55)","DEVAULT 1 W, PA US"
5,"(41.5, -90.5167)",Moline,"(41.4885, -90.5084)","MOLINE 1.0 WNW, IL US"
6,"(41.869061, -71.032434)",Taunton-Middleboro,"(41.87556, -71.02111)","TAUNTON MUNICIPAL AIRPORT, MA US"
7,"(39.966731, -75.600001)",West Chester,"(39.9708, -75.635)","WEST CHESTER 2 NW, PA US"
8,"(42.3833, -71.1667)",Belmont,"(42.38333, -71.11667)","CAMBRIDGE, MA US"


In [3]:
import os
import sys
import csv
import collections
import json

In [4]:
unique_closest_station = {}
total_records = 0

with open("output.csv") as csvfile:
    reader = csv.DictReader(csvfile, delimiter=",")
    for row in reader:
        total_records += 1
        unique_closest_station[row['closest_station']] = unique_closest_station.get(row['closest_station'], 0) + 1

In [7]:
len(unique_closest_station)

5094

In [6]:
sorted(unique_closest_station.items(), key = lambda x: x[1], reverse = True)

[('ROCHESTER 3.4 NNW, NY US', 133),
 ('NY CITY CENTRAL PARK, NY US', 133),
 ('CADIZ 2, OH US', 118),
 ('BATTLE CREEK 5 NW, MI US', 118),
 ('READING 2.9 ESE, PA US', 117),
 ('STATEN ISLAND 1.4 SE, NY US', 116),
 ('WEST CHESTER 2 NW, PA US', 112),
 ('WESTPORT, CA', 110),
 ('INDIANAPOLIS 10.0 NE, IN US', 109),
 ('PRINCETON WATER WORK, NJ US', 108),
 ('WESTMINSTER, CA', 107),
 ('CAMERON HELIPORT, LA US', 107),
 ('GENEVA EXPERIMENT ST, NY US', 106),
 ('WHITE LAKE 3.2 N, MI US', 106),
 ('BENNINGTON MORSE STATE AIRPORT, VT US', 106),
 ('GRAND ISLAND, NY US', 105),
 ('GREENPORT POWER HOUS, NY US', 104),
 ('NEW LONDON 1.0 NNW, CT US', 103),
 ('NEW ROCHELLE 1.3 S, NY US', 102),
 ('HARTFORD, CT US', 101),
 ('NASHVILLE 11.2 SW, TN US', 101),
 ('LITITZ 4.5 N, PA US', 100),
 ('SIOUX CITY 0.7 NW, IA US', 100),
 ('NETHER PROVIDENCE TOWNSHIP 0.4, PA US', 98),
 ('SYRACUSE HANCOCK INTERNATIONAL AIRPORT, NY US', 98),
 ('WINDSOR HEIGHTS 0.8 SE, IA US', 98),
 ('LITTLETON 6.0 WSW, CO US', 98),
 ('PORTLAND KG

In [10]:
!head stations_all.csv

elevation|mindate|maxdate|latitude|name|datacoverage|id|elevationUnit|longitude
139|1948-01-01|2014-01-01|31.5702|ABBEVILLE, AL US|0.8813|COOP:010008|METERS|-85.2482
249.3|1938-01-01|2015-11-01|34.2553|ADDISON, AL US|0.5059|COOP:010063|METERS|-87.1814
302.1|1940-05-01|1962-03-01|34.41667|ADDISON CENTRAL TOWER, AL US|0.9658|COOP:010071|METERS|-87.31667
172.2|1995-04-01|2015-11-01|33.17833|ALABASTER SHELBY CO AIRPORT, AL US|0.8064|COOP:010116|METERS|-86.78167
183.8|1949-01-01|1949-12-01|34.6891|BELLE MINA, AL US|1|COOP:010117|METERS|-86.8819
34.1|1935-05-01|1936-11-01|31.13333|ALAGA, AL US|0.2624|COOP:010125|METERS|-85.06667
53.3|1940-11-01|2014-12-01|32.2322|ALBERTA, AL US|0.9888|COOP:010140|METERS|-87.4104
348.1|1931-01-01|1977-06-01|34.23333|ALBERTVILLE, AL US|0.9535|COOP:010148|METERS|-86.16667
195.1|1969-10-01|2015-11-01|32.9452|ALEXANDER CITY, AL US|0.9946|COOP:010160|METERS|-85.948


In [34]:
mapper__name_to_id = {}
mapper__id_to_name = {}
mapper__id_to_activeyears = {}

with open("stations_all.csv") as csvfile:
    reader = csv.DictReader(csvfile, delimiter="|")
    for row in reader:
        mapper__name_to_id[row["name"]]=row["id"]
        mapper__id_to_name[row["id"]] = row["name"]
        year_start = row["mindate"][:4]
        year_end = row["maxdate"][:4]
        
        mapper__id_to_activeyears[row["id"]] = (year_start, year_end)

In [35]:
station_types = {}

for stn_name in unique_closest_station:
    if stn_name not in mapper__name_to_id:
        print(f"WARN: {stn_name} not found on current NOAA Stations Listing")
    else:
        stn_id = mapper__name_to_id[stn_name]
        print(f"{stn_name}: \t {stn_id}")
        station_type = stn_id.split(":")[0]
        station_types[station_type] = station_types.get(station_type, 0) + 1

TAUNTON MUNICIPAL AIRPORT, MA US: 	 WBAN:54777
CAMBRIDGE, MA US: 	 GHCND:USC00191097
SETAUKET EAST SETAUKET 0.3 SSE, NY US: 	 GHCND:US1NYSF0076
DEVAULT 1 W, PA US: 	 GHCND:USC00362116
MOLINE 1.0 WNW, IL US: 	 GHCND:US1ILRI0018
WEST CHESTER 2 NW, PA US: 	 GHCND:USC00369464
GREENPORT POWER HOUS, NY US: 	 GHCND:USC00303464
WYNCOTE 0.3 ENE, PA US: 	 GHCND:US1PAMT0089
CAMBRIDGE B, MA US: 	 GHCND:USC00191099
WERNERSVILLE 0.5 ESE, PA US: 	 GHCND:US1PABR0009
NEW YORK BENSONHURST, NY US: 	 GHCND:USC00305798
OBERLIN, OH US: 	 GHCND:USC00336196
MILFORD 5 NW, IL US: 	 GHCND:USC00115646
NEEDHAM 1.2 E, MA US: 	 GHCND:US1MANF0017
LANCASTER 5.8 WNW, PA US: 	 GHCND:US1PALN0007
JAMAICA PLAIN, MA US: 	 GHCND:USC00193890
ITHACA, NY US: 	 COOP:304178
DELAWARE 3.7 NNW, OH US: 	 GHCND:US1OHDL0003
DIXON 1.5 SW, IL US: 	 GHCND:US1ILLE0017
NAHANT, MA : 	 GHCND:USC00195145
PHILADELPHIA FRANKLIN INSTITUTE, PA US: 	 GHCND:USC00366886
LEXINGTON FAYETTE 6.3 S, KY US: 	 GHCND:US1KYFY0002
HILTON, NY US: 	 GHCND:USC003

ST BRIDE S, CA: 	 GHCND:CA008403418
IGNACE, CA: 	 GHCND:CA006033690
COPPER CLIFF, CA: 	 GHCND:CA006061870
MACRORIE, CA: 	 GHCND:CA004024880
WILLOW CREEK, CA: 	 GHCND:CA004039001
GOLDEN HORN, CA: 	 GHCND:CA002100615
CANTAMAYEC, MX: 	 GHCND:MXN00031099
BETHEL AIRPORT, AK US: 	 WBAN:26615
CAPE SARICHEF: 	 GHCND:USC00501325
EAGLE RIVER GAKONA CIRCLE, AK US: 	 GHCND:USC00502645
MATANUSKA VALLEY 15, AK US: 	 GHCND:USC00505734
VALDEZ MUNICIPAL AIRPORT, AK US: 	 WBAN:26479
MENA 0.3 NNW, AR US: 	 GHCND:US1ARPL0002
KEAMS CANYON, AZ US: 	 GHCND:USC00024586
AUBURN 5.3 NNW, CA US: 	 GHCND:US1CAPC0032
LANCASTER 5.5 E, CA US: 	 GHCND:US1CALA0077
PANOCHE 2 W, CA US: 	 GHCND:USC00046675
SANTA MARIA 1.6 WSW, CA US: 	 GHCND:US1CASB0014
BLUE MESA LAKE, CO US: 	 GHCND:USC00050797
BROOKSVILLE 3.9 SE, FL US: 	 GHCND:US1FLHN0022
KEY LARGO 17.9 NE, FL US: 	 GHCND:US1FLMN0037
WHITE SPRINGS 0.3 NW, FL US: 	 GHCND:US1FLHM0005
PENSACOLA 12.5 SW, FL US: 	 GHCND:US1FLES0041
COHUTTA NUMBER 1 GEORGIA, GA US: 	 GHCND:U

In [36]:
station_types

{'WBAN': 229, 'GHCND': 4793, 'COOP': 59, 'NEXRAD': 12}

In [22]:
# Create Station-IDs-of-Interest Listing

In [31]:
with open("station_ids_of_interest.csv","w") as fh_stoi:

    for stn_name in unique_closest_station:
        if stn_name not in mapper__name_to_id:
            print(f"WARN: {stn_name} not found on current NOAA Stations Listing")
        else:
            stn_id = mapper__name_to_id[stn_name]
            year_start = mapper__id_to_activeyears[stn_id][0]
            year_ended = mapper__id_to_activeyears[stn_id][1]
            stn_name = mapper__id_to_name[]
            fh_stoi.write(f"{stn_id},{year_start},{year_ended}\n")


WARN: UTQIAGVIK FORMERLY BARROW 4 ENE, AK US not found on current NOAA Stations Listing


In [32]:
!head station_ids_of_interest.csv

WBAN:54777,2005,2019
GHCND:USC00191097,1884,1951
GHCND:US1NYSF0076,2016,2019
GHCND:USC00362116,1951,1988
GHCND:US1ILRI0018,2016,2019
GHCND:USC00369464,1893,2017
GHCND:USC00303464,1958,2015
GHCND:US1PAMT0089,2015,2019
GHCND:USC00191099,1890,1896
GHCND:US1PABR0009,2007,2018
