In [None]:
# SCRATCH NOTEBOOK

In [4]:
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import pandas as pd
import geopandas as gpd
import os
import mlflow
import mlflow.pytorch
import torch
import json
import boto3
from snowML.datapipe.utils import get_geos as gg
from snowML.datapipe.utils import data_utils as du
from snowML.datapipe import to_model_ready as mr
from snowML.LSTM import LSTM_evaluate as evaluate 
from snowML.LSTM import LSTM_pre_process as pp


# Test Logic for Getting/Plotting SWE Data 

In [None]:

def get_data(huc):

    # Get UA Model Ready Data
    f_UA = f"model_ready_huc{huc}.csv"
    bucket_name = "snowml-model-ready"
    if not du.isin_s3(bucket_name, f_UA):
        df_UA =  None
    else:
        df_UA = du.s3_to_df(f_UA, bucket_name)
        df_UA.set_index("day", inplace=True)

    # Get UCLA Model Ready Data
    f_UCLA = f"model_ready_huc{huc}_ucla.csv"
    if not du.isin_s3(bucket_name, f_UCLA):
        df_UCLA = None
    else:
        df_UCLA = du.s3_to_df(f_UCLA, bucket_name)
        df_UCLA.set_index("day", inplace=True)

    return df_UA, df_UCLA

def slim_df(df): 
    df_slim = df[["mean_swe"]]
    return df_slim

def make_plot_df(df_UA, df_UCLA): 
    df_UA_slim = slim_df(df_UA)
    if df_UCLA is None: 
        return df_UA_slim
    df_UCLA_slim = slim_df(df_UCLA)
    df_UCLA_slim = df_UCLA_slim.rename(columns={"mean_swe": "mean_swe_UCLA"})
    df_joined = df_UA_slim.join(df_UCLA_slim, how="inner")
    return df_joined

def plot_swe(df_UA, df_UCLA, huc):
    
    df_joined =make_plot_df(df_UA, df_UCLA)
    
    # Ensure 'day' is datetime
    df_joined.index = pd.to_datetime(df_joined.index, errors="coerce")

    # Create figure and plot
    fig, ax = plt.subplots(figsize=(12, 6))
    ax.plot(df_joined.index, df_joined["mean_swe"], c='b', label="Mean_SWE_UAData")
    if df_UCLA is not None: 
        ax.plot(df_joined.index, df_joined["mean_swe_UCLA"], c = "black", label = "Mean_SWE_UCLA_Data")
    ax.set_ylim(0, 2)
    ax.legend()
    ax.set_xlabel('Date')
    ax.set_ylabel('SWE (m)')
    ax.set_title(f"Mean SWE for HUC {huc}")


    # Format x-axis to show only years
    ax.xaxis.set_major_formatter(mdates.DateFormatter("%Y"))
    ax.xaxis.set_major_locator(mdates.YearLocator())
    plt.setp(ax.get_xticklabels(), rotation=45, ha="center")
    fig.tight_layout()
    return fig

In [None]:
huc12 = 171100050703
df_UA, df_UCLA = get_data(huc12)
fig = plot_swe(df_UA, df_UCLA, huc12)



# PreCalculate Geos Files for Huc8s

In [None]:
b = "snowml-dashboard"

In [None]:
# All Huc 8 in Region 17 
R17 = gg.get_geos_with_name(17, '08')
R17

In [None]:
R17["Display name"] = R17["huc_name"].str.slice(0, 20) + " (" + R17["huc_id"].astype(str) + ")"
R17.head()

In [None]:
R17 = R17.drop(columns=["huc_name"])
R17.head()

In [None]:
R17_2 = R17.copy()

In [None]:
R17_2["Model Ready"] = "No"

In [None]:
# Write to S3 
f = "R17_huc8"
url = f"s3://{b}/{f}.geo.parquet"
R17_2.to_parquet(url)

# Precalculate Huc12 geo files

In [None]:
huc_08 = "17010101"

In [None]:
def make_huc12_file(huc_08): 
    geos = gg.get_geos_with_name(huc_08, '12')
    geos["Display name"] = geos["huc_name"].str.slice(0, 20) + " (" + geos["huc_id"].astype(str) + ")"
    geos = geos.drop(columns=["huc_name"])
    return geos

def save_file(geos, huc_08): 
    f = f"{huc_08}_huc12"
    url = f"s3://{b}/{f}.geo.parquet"
    geos.to_parquet(url)
    return url

In [None]:
#count = 0 
#for huc_08 in R17["huc_id"]:
    #count += 1
    #if count % 10 == 0: 
        #print(f"step {count}")
    #geos = make_huc12_file(huc_08)
    #save_file(geos, huc_08)


# Record Which HUCS Are Model Ready 

In [None]:
# Read from S3
f = "R17_huc8"
url = f"s3://{b}/{f}.geo.parquet"
df = gpd.read_parquet(url)
df.head()

In [None]:
def check_errors():
    file_path = "model_ready_err.txt"
    if os.path.getsize(file_path) == 0:
        return False 
    return True 

def process_and_record(huc): 
    f = "R17_huc8"
    url = f"s3://{b}/{f}.geo.parquet"
    df = gpd.read_parquet(url)
    geos = gg.get_geos(huc, '12')
    mha.process_multi_huc_quiet(geos)
    if not check_errors(): 
        print(f"no errors for huc{huc}!")
        df.loc[df["huc_id"] == str(huc), "Model Ready"] = "Yes"
        df.to_parquet(url)

In [None]:
process_and_record(17110005)

In [None]:
f = "R17_huc8"
url = f"s3://{b}/{f}.geo.parquet"
df = gpd.read_parquet(url)
df.head()

In [None]:
df.loc[df["huc_id"] == "17110005"] 

In [None]:
huc_list = ["17020009", "17110005", "17110006", "17110009", "17030002", "17110008", "17030001"]



In [None]:
for huc in huc_list: 
    process_and_record(huc)

In [None]:
df_filtered = df[df["huc_id"].isin(huc_list)]
df_filtered

In [None]:
df_filtered["Display name"].to_list()

# Retreive and Save MLFLOW MODELS 

In [2]:
s3 = boto3.client("s3")

def model_from_MLflow(uri): 
    model = evaluate.load_model(uri)

def model_to_s3(model, model_name, bucket_name = "snowml-dashboard"): 
    file_name = f"{model_name}.pth"
    torch.save(model.state_dict(), file_name)
    s3.upload_file(file_name, bucket_name, f"models/{file_name}")
    os.remove(file_name)
    return file_name

def params_to_s3(params, model_name, bucket_name = "snowml-dashboard"): 
    params_file = f"{model_name}_params.json"
    with open(params_file, "w") as f:
        json.dump(params, f, indent=2)
    s3.upload_file(params_file, bucket_name, "models/params.json")
    os.remove(params_file)

def get_norm(params): 
     _, global_means, global_stds = pp.pre_process(huc_list_all_tr, var_list)


def save_all_model_data(model_uri, model_name, tracking_uri, run_id, bucket_name = "models/params.json"): 
    model = evaluate.load_model(model_name)
    model_to_s3(model, model_name, bucket_name = bucket_name) 
    params = evaluate.get_params(tracking_uri, run_id, bucket_name = bucket_name)
    params_to_s3(params, model_name)


    
    

In [18]:
# Retrieve Model To USe for MultiTraining 
multi_huc_model_uri = "s3://sues-test/298/a6c611d4c4cf410e9666796e3a8892b7/artifacts/epoch9_model/data"
model_name = "Multi_Huc_Trained"
model = evaluate.load_model(multi_huc_model_uri)

s3://sues-test/298/a6c611d4c4cf410e9666796e3a8892b7/artifacts/epoch29_model


Downloading artifacts:   0%|          | 0/6 [00:00<?, ?it/s]



SnowModel(
  (lstm1): LSTM(3, 64, batch_first=True, dropout=0.5)
  (linear): Linear(in_features=64, out_features=1, bias=True)
  (leaky_relu): LeakyReLU(negative_slope=0.01)
)


In [21]:
# Save weights and parms to S3 
model_to_s3(model, model_name) 

'Multi_Huc_Trained.pth'

In [None]:
# Retrieve Params from ML flow 
tracking_uri = "arn:aws:sagemaker:us-west-2:677276086662:mlflow-tracking-server/dawgsML"
run_id = "a6c611d4c4cf410e9666796e3a8892b7"
params = evaluate.get_params(tracking_uri, run_id)

In [None]:
"""Module to Save PreTrained Model Data to S3 for Use in Dashabord"""
# pylint: disable=C0103

import os
import json
#import mlflow
#import mlflow.pytorch
import torch
import boto3
#from snowML.datapipe import to_model_ready as mr
from snowML.LSTM import LSTM_evaluate as evaluate
from snowML.LSTM import LSTM_pre_process as pp

s3 = boto3.client("s3")

def model_from_MLflow(uri):
    model = evaluate.load_model(uri)
    return model

def model_to_s3(model, model_name, bucket_name = "snowml-dashboard"):
    file_name = f"{model_name}.pth"
    torch.save(model.state_dict(), file_name)
    s3.upload_file(file_name, bucket_name, f"models/{file_name}")
    os.remove(file_name)
    return file_name

def params_to_s3(params, model_name, bucket_name = "snowml-dashboard"):
    params_file = f"{model_name}_params.json"
    with open(params_file, "w") as f:
        json.dump(params, f, indent=2)
    s3.upload_file(params_file, bucket_name, f"models/{params_file}")
    os.remove(params_file)

def norms_to_s3(g_means, g_stds, model_name, bucket_name = "snowml-dashboard"):
    g_means = g_means.to_dict()
    g_stds = g_stds.to_dict()
    means_file = f"{model_name}_means.json"
    with open(means_file, "w") as f:
        json.dump(g_means, f, indent=2)
    s3.upload_file(means_file, bucket_name, f"models/{means_file}")
    os.remove(means_file)
    std_file = means_file = f"{model_name}_stds.json"
    with open(std_file, "w") as f:
        json.dump(g_stds, f, indent=2)
    s3.upload_file(std_file, bucket_name, f"models/{std_file}")
    os.remove(std_file)
    

def get_norm(params):
    huc_list_all_tr = params["train_hucs"] + params["val_hucs"]
    _, global_means, global_stds = pp.pre_process(huc_list_all_tr, params["var_list"])
    return global_means, global_stds


def save_all_model_data(model_uri, model_name, tracking_uri, run_id):
    model = evaluate.load_model(model_uri)
    model_to_s3(model, model_name)
    params = evaluate.get_params(tracking_uri, run_id)
    params_to_s3(params, model_name)
    return params
