In [1]:
%load_ext autoreload
%autoreload 2
import sys

# instead of creating a package using setup.py or building from a docker/singularity file,
# import the sister directory of src code to be called on in notebook.
# This keeps the notebook free from code to only hold visualizations and is easier to test
# It also helps keep the state of variables clean such that cells aren't run out of order with a mysterious state
sys.path.append("..")

In [8]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import skew
import statistics
import cartopy.crs as crs
import cartopy.feature as cfeature
import xarray as xr

ModuleNotFoundError: No module named 'pandas.tslib'

In [None]:
def format_df(df):
    new_df = pd.DataFrame()
    value_list = []
    for x, _ in df.iterrows():
        count = int(df.iloc[x]["COUNT"])
        value = df.iloc[x]["VALUE"]
        for n in np.arange(count):
            val = value
            value_list.append(val)
    new_df["VALUE"] = value_list
    return new_df


def stat_anal(state, buffer, directory, state_df, station_list, lonlist, latlist):
    final_df = pd.DataFrame()
    std_list = []
    variance_list = []
    skew_list = []
    distance_list = []
    stations = []
    elevs = []
    for x, _ in enumerate(directory):
        # read in csv
        df2 = pd.DataFrame()
        df = pd.read_csv(
            f"/home/aevans/landtype/elevation/data/{state}/elev/{buffer}/{directory[x]}"
        )
        dfv1 = format_df(df)
        std = statistics.stdev(dfv1["VALUE"])
        variance = statistics.pvariance(dfv1["VALUE"])
        my_skew = skew(dfv1["VALUE"])
        elevation = state_df["elev"].iloc[x]
        station = station_list[x]
        split_diff = dfv1["VALUE"] - state_df["elev"].iloc[x]
        diff_list = split_diff.to_list()
        df2["diff_elev"] = diff_list
        describe = df2["diff_elev"].describe()
        fifty = describe[5]
        distance = state_df["elev"].iloc[x] - fifty
        # add data
        stations.append(station)
        elevs.append(elevation)
        distance_list.append(distance)
        skew_list.append(my_skew)
        variance_list.append(variance)
        std_list.append(std)

    final_df["station"] = stations
    final_df["elev"] = elevs
    final_df["std"] = std_list
    final_df["variance"] = variance_list
    final_df["skew"] = skew_list
    final_df["med_dist"] = distance_list
    final_df["lon"] = lonlist
    final_df["lat"] = latlist
    return final_df


def percent_plot(df, variable):
    fig, ax = plt.subplots(figsize=(20, 10))
    ax.scatter(df["station"], df[variable], s=50)
    for n in df.iterrows():
        ax.annotate(n[1]["station"], (n[1]["station"], n[1][variable]), fontsize=15)
    ax.grid()
    ax.set_ylabel(variable, size=20)
    ax.tick_params(labelbottom=False, bottom=False)


def plurality_plot(df, s):
    fig, ax = plt.subplots()
    df.plot.scatter(x="lon", y="lat", c=s, s=s, colormap="jet", figsize=(9, 6), ax=ax)
    ax.set_title(f"Mesonet Site {s} by Elevation", size=16)
    ax.set_xlabel("Longitude", size=14)
    ax.set_ylabel("Latitude", size=14)
    ax.tick_params(axis="x", labelsize=12)
    ax.tick_params(axis="y", labelsize=12)
    ax.grid()


def good_sites_elev_ok(elev_df):
    good_list = []

    for x, _ in elev_df.iterrows():
        if (
            (elev_df.iloc[x]["std"] < 100)
            & (elev_df.iloc[x]["variance"] < 5000)
            & (elev_df.iloc[x]["med_dist"] < 400)
            & (elev_df.iloc[x]["skew"] < 1.0)
            & (elev_df.iloc[x]["skew"] > -1.0)
        ):
            good_station = elev_df.iloc[x]["station"]
            good_list.append(good_station)

    return good_list


def good_sites_elev(elev_df):
    good_list = []

    for x, _ in elev_df.iterrows():
        if (
            (elev_df.iloc[x]["std"] < 150)
            & (elev_df.iloc[x]["variance"] < 20000)
            & (elev_df.iloc[x]["med_dist"] < 400)
            & (elev_df.iloc[x]["skew"] < 1.0)
            & (elev_df.iloc[x]["skew"] > -1.0)
        ):
            good_station = elev_df.iloc[x]["station"]
            good_list.append(good_station)

    return good_list


def good_sites_df(df, good_list):
    lons = []
    lats = []
    stations = []
    for x, _ in df.iterrows():
        if df.iloc[x]["station"] in good_list:
            get_station = df.iloc[x]["station"]
            get_lon = df.iloc[x]["lon"]
            get_lat = df.iloc[x]["lat"]
            lons.append(get_lon)
            lats.append(get_lat)
            stations.append(get_station)
    good_elev_df = pd.DataFrame()
    good_elev_df["station"] = stations
    good_elev_df["lon"] = lons
    good_elev_df["lat"] = lats
    return good_elev_df


def good_elevs_plot(good_elev_df):
    projPC = crs.PlateCarree()
    latN = good_elev_df["lat"].max() + 1
    latS = good_elev_df["lat"].min() - 1
    lonW = good_elev_df["lon"].max() + 1
    lonE = good_elev_df["lon"].min() - 1
    cLat = (latN + latS) / 2
    cLon = (lonW + lonE) / 2
    projLcc = crs.LambertConformal(central_longitude=cLon, central_latitude=cLat)

    fig, ax = plt.subplots(
        figsize=(12, 9), subplot_kw={"projection": crs.PlateCarree()}
    )
    ax.set_extent([lonW, lonE, latS, latN], crs=projPC)
    ax.add_feature(cfeature.LAND)
    ax.add_feature(cfeature.COASTLINE)
    ax.add_feature(cfeature.BORDERS, linestyle="--")
    ax.add_feature(cfeature.LAKES, alpha=0.5)
    ax.add_feature(cfeature.STATES)
    ax.xticklabels_top = False
    ax.ylabels_right = False
    ax.gridlines(
        crs=crs.PlateCarree(),
        draw_labels=True,
        linewidth=2,
        color="black",
        alpha=0.5,
        linestyle="--",
    )
    ax.scatter(
        x=good_elev_df["lon"],
        y=good_elev_df["lat"],
        c="r",
        s=40,
    )
    for n in good_elev_df.iterrows():
        ax.annotate(n[1]["station"], (n[1]["lon"] + 0.1, n[1]["lat"]), fontsize=10)
    ax.set_title(f"Good Mesonet Sites by Elevation", size=16)
    ax.set_xlabel("Longitude", size=14)
    ax.set_ylabel("Latitude", size=14)
    ax.tick_params(axis="x", labelsize=12)
    ax.tick_params(axis="y", labelsize=12)
    ax.grid()

In [None]:
def current_time_mesonet_df(mesonet_data_path) -> pd.DataFrame:
    """
    This will return a dataframe that contains data from the mesonet sites

    Args:
        Mesonet Data Path (f string)

    Returns:
        df (pd.DataFrame): Mesonet Data Frame
    """

    # most recent year
    dir_Year = os.listdir(f"{mesonet_data_path}")
    sort_dir_Year = sorted(dir_Year)
    data_point_Year = sort_dir_Year[-1]

    # find most recent month
    dir_Month = os.listdir(f"{mesonet_data_path}/{data_point_Year}")
    sort_dir_Month = sorted(dir_Month)
    data_point_Month = sort_dir_Month[-1]

    # this is your directory for most recent year and month
    most_recent = os.listdir(
        f"{mesonet_data_path}/{data_point_Year}/{data_point_Month}"
    )

    # most recent datapoint
    sort_most_recent = sorted(most_recent)
    data_point = sort_most_recent[-1]

    # this will return the year of the most recent data point
    new_year = data_point[0:4]

    # this will return the month of the most recent datapoint
    new_month = data_point[4:6]

    # this will return the day of the most recent datapoint
    new_day = data_point[6:8]

    # create Mesonet DataFrame

    # year
    year = new_year

    # month
    month = new_month

    # day
    day = new_day

    # file path
    file = year + month + day + ".nc"

    mesonet_df = (
        xr.open_dataset(f"{mesonet_data_path}/{year}/{month}/{file}")
        .to_dataframe()
        .reset_index()
    )
    return mesonet_df

In [None]:
def most_recent_time(df: pd.DataFrame, mesonet_data_path) -> pd.DataFrame:
    """
    This will return a dataframe that contains only the timestamps with filled data from the mesonet sites

    Args:
    Mesonet Data Path (f string)

    Returns:
    df (pd.DataFrame): Mesonet Data Frame
    """

    # most recent year
    dir_Year = os.listdir(f"{mesonet_data_path}")
    sort_dir_Year = sorted(dir_Year)
    data_point_Year = sort_dir_Year[-1]

    # find most recent month
    dir_Month = os.listdir(f"{mesonet_data_path}/{data_point_Year}")
    sort_dir_Month = sorted(dir_Month)
    data_point_Month = sort_dir_Month[-1]

    # this is your directory for most recent year and month
    most_recent = os.listdir(
        f"{mesonet_data_path}/{data_point_Year}/{data_point_Month}"
    )

    # most recent datapoint
    sort_most_recent = sorted(most_recent)
    data_point = sort_most_recent[-1]

    # this will return the year of the most recent data point
    new_year = data_point[0:4]

    # this will return the month of the most recent datapoint
    new_month = data_point[4:6]

    # this will return the day of the most recent datapoint
    new_day = data_point[6:8]

    # create Mesonet DataFrame

    # year
    year = new_year

    # month
    month = new_month

    # day
    day = new_day

    current_time_df = df.dropna(subset=["tair"])

    last_value = current_time_df["time_5M"].iat[-1]
    hour = last_value.hour
    minute = last_value.minute
    second = last_value.second

    string_hour = str(hour)
    string_minute = str(minute)
    string_sec = str(second)

    # time
    time = string_hour + ":" + string_minute + ":" + string_sec
    df.reset_index(inplace=True)

    # creating a new dataframe that is centered on the location in the dataframe
    mesonet_single_datetime_df = df.loc[df["time_5M"] == f"{year}-{month}-{day} {time}"]
    return mesonet_single_datetime_df

In [None]:
# This will return the most recent data avail on mesonet
# this is my file path
ny_df = pd.read_csv("/home/aevans/landtype/notebooks/nysm_coords.csv")

In [None]:
# This will return the most recent data avail on mesonet
# this is my file path
ny_mesonet_data_path = "/home/aevans/nysm/archive/nysm/netcdf/proc"

In [28]:
nysm_df1 = current_time_mesonet_df(ny_mesonet_data_path)
nysm_df = most_recent_time(nysm_df1)

NameError: name 'xr' is not defined

In [None]:
nysm_df

NameError: name 'nysm_df' is not defined

In [5]:
ny_df

Unnamed: 0.1,Unnamed: 0,station,latitude,longitude
0,0,ADDI,42.040359,-77.237259
1,1,ANDE,42.182270,-74.801392
2,2,BATA,43.019939,-78.135658
3,3,BEAC,41.528751,-73.945267
4,4,BELD,42.223221,-75.668518
...,...,...,...,...
121,121,WFMB,44.393234,-73.858826
122,122,WGAT,43.532410,-75.158600
123,123,WHIT,43.485073,-73.423073
124,124,WOLC,43.228680,-76.842613


In [6]:
directory = os.listdir(f"/home/aevans/landtype/elevation/data/NY/elev/nam/")
sorted_direct = sorted(directory)

In [7]:
# paths to data
path_ny = f"/home/aevans/landtype/elevation/data/CSVs_elevation_ny_nam"

In [8]:
station_list_ny = ny_df["station"].to_list()
ny_df_lons = ny_df["longitude"].to_list()
ny_df_lats = ny_df["latitude"].to_list()

In [9]:
x = 0
for i in range(1, 127):
    df = pd.read_csv(f"{path_ny}/aspect_csv_{i}.csv")
    df.to_csv(
        f"/home/aevans/landtype/elevation/data/NY/elev/nam/{station_list_ny[x]}_elev.csv"
    )
    x += 1

In [11]:
slope_df = stat_anal(
    "NY", 'nam', sorted_direct, ny_df, station_list_ny, ny_df_lons, ny_df_lats
)

KeyError: 'elev'

In [None]:
slope_df.to_csv("/home/aevans/correlation/nam/elev_hrrr.csv")

: 