### Load packages

In [15]:
import requests
import pandas as pd
from io import StringIO


### Load USGS NWIS site location data

In [16]:
url = "https://waterservices.usgs.gov/nwis/site/"

params = {
    "format": "rdb",
    "stateCd": "VT",
    "siteStatus": "all"
}

r = requests.get(url, params=params)
r.raise_for_status()

df = pd.read_csv(
    StringIO(r.text),
    sep="\t",
    comment="#",
    dtype=str
)

sites_df = df[
    ["site_no", "station_nm", "dec_lat_va", "dec_long_va", "site_tp_cd"]
]

print(sites_df.head())


    site_no                                        station_nm   dec_lat_va  \
0       15s                                               50s          16s   
1  01129400                        BLACK BROOK AT AVERILL, VT    45.003935   
2  01129420            CAPON BROOK AT VT 102, NEAR CANAAN, VT   44.9369444   
3  01129700  PAUL STREAM TRIBUTARY NEAR BRUNSWICK SPRINGS, VT   44.6850514   
4  01133000  EAST BRANCH PASSUMPSIC RIVER NEAR EAST HAVEN, VT  44.63394195   

   dec_long_va site_tp_cd  
0          16s         7s  
1  -71.6923141         ST  
2  -71.5261111         ST  
3  -71.6211992         ST  
4  -71.8975942         ST  


In [17]:
# Clean up the data

print("Records before removing nulls:", len(sites_df))

lat = pd.to_numeric(sites_df["dec_lat_va"], errors="coerce")
lon = pd.to_numeric(sites_df["dec_long_va"], errors="coerce")

sites_clean_df = sites_df[lat.notna() & lon.notna()]

print("Records after removing nulls:", len(sites_clean_df))
sites_clean_df.head()

Records before removing nulls: 4811
Records after removing nulls: 4705


Unnamed: 0,site_no,station_nm,dec_lat_va,dec_long_va,site_tp_cd
1,1129400,"BLACK BROOK AT AVERILL, VT",45.003935,-71.6923141,ST
2,1129420,"CAPON BROOK AT VT 102, NEAR CANAAN, VT",44.9369444,-71.5261111,ST
3,1129700,"PAUL STREAM TRIBUTARY NEAR BRUNSWICK SPRINGS, VT",44.6850514,-71.6211992,ST
4,1133000,"EAST BRANCH PASSUMPSIC RIVER NEAR EAST HAVEN, VT",44.63394195,-71.8975942,ST
5,1133100,"DISH MILL BROOK AT EAST BURKE, VT",44.5897764,-71.9228722,ST


In [18]:
# Download USGS NWIS information to a data file

sites_clean_df.to_csv("../data/weather/USGS_NWIS_site_locations.csv")