In [None]:
import os
from pathlib import Path
import shutil

import teehr

teehr.__version__

In [None]:
from teehr.evaluation.spark_session_utils import create_spark_session

spark = create_spark_session(
    aws_access_key_id="minioadmin",
    aws_secret_access_key="minioadmin123",
    update_configs={
        "spark.hadoop.fs.s3a.aws.credentials.provider":  "org.apache.hadoop.fs.s3a.AnonymousAWSCredentialsProvider"
    }
)

In [None]:
%%time
dir_path = "/data/temp_warehouse"

ev = teehr.Evaluation(
    spark=spark,
    dir_path=dir_path,
    create_dir=True
)

In [None]:
ev.set_active_catalog("remote")

ev.active_catalog

### Add configuration

In [None]:
og_df = ev.configurations.to_pandas()
og_df.head()

In [None]:
from teehr import Configuration

configuration = Configuration(
    name="nwpsrfc_streamflow_forecast",
    type="secondary",
    description="NWPS RFC Streamflow Forecast",
)

ev.configurations.add(configuration)

In [None]:
new_df = ev.configurations.to_pandas()
new_df.head()

### Add variable

In [None]:
og_df = ev.variables.to_pandas()
og_df.head()

In [None]:
from teehr import Variable

variable = Variable(
    name="streamflow_6hr_inst",
    long_name="Instantaneous 6-hour streamflow"
)

ev.variables.add(variable)

In [None]:
new_df = ev.variables.to_pandas()
new_df.head()

### Add crosswalk entries

In [None]:
og_df = ev.location_crosswalks.to_pandas()
og_df.head(10)

In [None]:
import pandas as pd

usgs_series = og_df['primary_location_id']
nwps_series = usgs_series.str.replace('usgs-', 'nwpsrfc-')
data = {
    'primary_location_id': usgs_series,
    'secondary_location_id': nwps_series,
}
temp_df = pd.DataFrame(data)
updated_xwalk = pd.concat([og_df, temp_df], ignore_index=True)

updated_xwalk.head(20)

In [None]:
ev.location_crosswalks.load_dataframe(updated_xwalk)

In [None]:
new_df = ev.location_crosswalks.to_pandas()
new_df.head(20)

### Kill spark

In [None]:
ev.spark.stop()