# Station Visualizer

This notebook generates a station map and a chart showing station distribution over time, for a given stage (raw, clean, QAQC, merge). Note that this reflects the information provided in station lists, and not actual station data availability (i.e., this figure should be re-made following the cleaning stage to reflect dropped stations and the actual temporal availability of data). 

In [None]:
import time
import boto3
import numpy as np
import pandas as pd
import xarray as xr

# Set AWS credentials
s3 = boto3.resource("s3")
s3_cl = boto3.client("s3")  # for lower-level processes

# Set relative paths to other folders and objects in repository.
BUCKET_NAME = "wecc-historical-wx"
RAW_DIR = "1_raw_wx"
CLEAN_DIR = "2_clean_wx"
QAQC_DIR = "3_qaqc_wx"
MERGE_DIR = "4_merge_wx"

In [14]:
station_list = pd.read_csv(
    f"s3://{BUCKET_NAME}/{RAW_DIR}/all_network_stationlist_pull.csv"
)

In [15]:
station_list.head(1)

Unnamed: 0.1,Unnamed: 0,name,latitude,longitude,elevation,start-date,end-date,pulled,time_checked,network
0,0,BOISE AIR TERMINAL/GOWEN FD AIRPORT,43.567,-116.241,860.5,1931-01-01 00:00:00+00:00,2023-03-22 00:00:00+00:00,Y,2023-03-23 21:42:22+00:00,ASOSAWOS


In [5]:
station_list['start-date']

0        1931-01-01 00:00:00+00:00
1        2006-01-01 00:00:00+00:00
2        2006-01-01 00:00:00+00:00
3        2006-01-01 00:00:00+00:00
4        2006-01-01 00:00:00+00:00
                   ...            
15875    2010-05-27 00:00:00+00:00
15876    2010-05-27 00:00:00+00:00
15877    2010-05-27 00:00:00+00:00
15878    2010-05-27 00:00:00+00:00
15879    2012-10-04 00:00:00+00:00
Name: start-date, Length: 15880, dtype: object

In [6]:
station_list["start-date"] = pd.to_datetime(station_list["start-date"], utc=True)
station_list["end-date"] = pd.to_datetime(station_list["end-date"], utc=True)

In [7]:
station_list["start-date"]

0       1931-01-01 00:00:00+00:00
1       2006-01-01 00:00:00+00:00
2       2006-01-01 00:00:00+00:00
3       2006-01-01 00:00:00+00:00
4       2006-01-01 00:00:00+00:00
                   ...           
15875   2010-05-27 00:00:00+00:00
15876   2010-05-27 00:00:00+00:00
15877   2010-05-27 00:00:00+00:00
15878   2010-05-27 00:00:00+00:00
15879   2012-10-04 00:00:00+00:00
Name: start-date, Length: 15880, dtype: datetime64[ns, UTC]

In [8]:
subdf = station_list.loc[~station_list["start-date"].isnull()].copy()

In [9]:
len(subdf)

15410

In [10]:
len(station_list)

15880

In [12]:
## Filter out non-downloaded rows
subdf = subdf.loc[subdf["pulled"] != "N"].copy()

In [13]:
len(subdf)

15394