# Data Quality Visualizations

In [36]:
import time
import boto3
import numpy as np
import pandas as pd
import xarray as xr
import geopandas as gpd
import contextily as cx

# Set AWS credentials
s3 = boto3.resource("s3")
s3_cl = boto3.client("s3")  # for lower-level processes

# Set relative paths to other folders and objects in repository.
BUCKET_NAME = "wecc-historical-wx"
QAQC_DIR = "3_qaqc_wx"
MERGE_DIR = "4_merge_wx"
stations_csv_path = f"s3://{BUCKET_NAME}/{QAQC_DIR}/all_network_stationlist_qaqc.csv"
shapepath = "s3://wecc-historical-wx/0_maps/tl_2021_us_state"

## Check rate tables

## Map

In [None]:
# Per-network flag rates tables

network_natv_rates = pd.read_csv(
    f"s3://wecc-historical-wx/{MERGE_DIR}/network_native_flag_rates.csv"
)
network_hrly_rates = pd.read_csv(
    f"s3://wecc-historical-wx/{MERGE_DIR}/network_hourly_flag_rates.csv"
)

In [69]:
# Per-station flag rates tables

network_natv_rates = pd.read_csv(
    f"s3://wecc-historical-wx/{MERGE_DIR}/station_native_flag_rates.csv"
)
network_hrly_rates = pd.read_csv(
    f"s3://wecc-historical-wx/{MERGE_DIR}/station_hourly_flag_rates.csv"
)

### Merge with station list

In [None]:
network = 'ASOSAWOS'

In [None]:
station_list = pd.read_csv(stations_csv_path)

In [None]:
merged_list = sub_station_list.merge(flag_rate_df, on="era-id")

In [None]:
sub_station_list = station_list[station_list["network"] == network]

In [None]:
merged_list

In [None]:
map_list = merged_list

# Format dates in datetime format (this gets lost in import).
map_list["start-date"] = pd.to_datetime(map_list["start-date"], utc=True)
map_list["end-date"] = pd.to_datetime(map_list["end-date"], utc=True)

# Make a geodataframe.
gdf = gpd.GeoDataFrame(
    map_list,
    geometry=gpd.points_from_xy(map_list.longitude, map_list.latitude),
)
gdf.set_crs(epsg=4326, inplace=True)  # Set CRS

# Project data to match base tiles.
gdf_wm = gdf.to_crs(epsg=3857)  # Web mercator

# Read in geometry of continental US.
us = gpd.read_file(shapepath)

# Remove territories, AK, HI
rem_list = ["HI", "AK", "MP", "GU", "AS", "PR", "VI"]
us = us.loc[us.STUSPS.isin(rem_list) == False]

# Use to clip stations
us = us.to_crs(epsg=3857)
gdf_us = gdf_wm.clip(us)



In [None]:
# Plot
ax = gdf_us.plot(
    "tas",
    figsize=(15, 15),
    alpha=1,
    markersize=3,
    legend=True,
    cmap="nipy_spectral",
)
cx.add_basemap(ax, source=cx.providers.CartoDB.Positron)
ax.set_axis_off()