In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.io as pio
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.express as px
from plotly.offline import init_notebook_mode 

from utils.signatures import update_cumul_df, load_flow_loc
from utils.concentration import cumulative_lq, excel_save

Read model timeseries and observation timeseries

In [4]:
# Read observed and modelled data and calculate daily DIN load for each land use.
data_dir = ['../data/mod/', '../output/']
time_ranges = [[f'{year}/7/1', f'{year+1}/6/30'] for year in range(2009, 2020)]
# time_ranges = obs_events.loc[:, ['start', 'end']].values
double_mass_ratio = {}
parameter_ts = pd.read_csv(f'{data_dir[1]}contribution_each_param.csv', index_col='Date')
mod_df = pd.read_csv(f'{data_dir[0]}DIN_flow.csv', index_col='Date')
parameter_ts.index = pd.to_datetime(parameter_ts.index,  format='mixed', dayfirst=True)
mod_df.index = pd.to_datetime(mod_df.index, dayfirst=True)
# Calcualte the DIN load from each land use.
din_each_land = {}
din_each_land['cane'] = parameter_ts.loc[:, 'DeliveryRatioSurface':'DeliveryRatioSeepage'].sum(axis=1) + parameter_ts.loc[:, 'DWC']
din_each_land['grazing_forest'] = parameter_ts.loc[:, 'Grazing Forested-dissConst_DWC':'Grazing Forested-dissConst_EMC'].sum(axis=1)
din_each_land['grazing_open'] = parameter_ts.loc[:, 'Grazing Open-dissConst_DWC':'Grazing Open-dissConst_EMC'].sum(axis=1)
din_each_land['conservation'] = parameter_ts.loc[:, 'Conservation-dissConst_DWC':'Conservation-dissConst_EMC'].sum(axis=1)
din_each_land['forest'] = parameter_ts.loc[:, 'Forestry-dissConst_DWC':'Forestry-dissConst_EMC'].sum(axis=1)
din_each_land['other'] = parameter_ts.loc[:, 'Urban-dissConst_DWC':'Horticulture-dissConst_EMC'].sum(axis=1)

Normalize data for timeseries correpsonding to each parameter and landuse

In [None]:
# Iterate the parameters in the model time series.
for col in parameter_ts.columns[1:]:
    # Create an empty dataframe
    annual_total = pd.DataFrame(columns=mod_df.columns)
    double_mass_ratio = {}
    mod_df.loc['2009-07-01':, 'Loads (kg)'] = parameter_ts[col]
    # Replace the column of 'Loads (kg)'
    for ii in range(0, len(time_ranges)-2):
    # for ii in range(index_range1[0]-1, index_range1[1]-1):
        df_temp = load_flow_loc(time_ranges[ii], mod_df, timestep='d')
        df_temp = update_cumul_df(df_temp, df_temp.values[:, 0], df_temp.values[:, -2])
        double_mass_ratio[f'obs_year_{ii}'] = df_temp
        annual_total.loc[time_ranges[ii][0][0:4]] = df_temp.sum(axis=0)
        fn = f'{data_dir[0]}{col}_cumul_norm.xlsx'
        excel_save(double_mass_ratio, fn, True)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/

In [10]:
# Iterate each land use in the model time series.
for key, val in din_each_land.items():
    # Create an empty dataframe
    annual_total = pd.DataFrame(columns=mod_df.columns)
    double_mass_ratio = {}
    mod_df.loc['2009-07-01':, 'Loads (kg)'] = val
    # Replace the column of 'Loads (kg)'
    for ii in range(0, len(time_ranges)-2):
    # for ii in range(index_range1[0]-1, index_range1[1]-1):
        df_temp = load_flow_loc(time_ranges[ii], mod_df, timestep='d')
        df_temp = update_cumul_df(df_temp, df_temp.values[:, 0], df_temp.values[:, -2])
        double_mass_ratio[f'obs_year_{ii}'] = df_temp
        annual_total.loc[time_ranges[ii][0][0:4]] = df_temp.sum(axis=0)
        fn = f'{data_dir[0]}{key}_cumul_norm.xlsx'
        excel_save(double_mass_ratio, fn, True)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/

Generate doule cumulative curve for the times series of each land use

In [7]:
from plotly.subplots import make_subplots
color_list = px.colors.qualitative.D3
xlabel='Normalized cumulative discharge'
ylabel='Normalized cumulative mass'

In [None]:
# To loop over land use type or parameters
for p_name in parameter_ts.columns[1:]: # parameter_ts.columns[1:]; din_each_land.keys()
    print(p_name)
    # Read excel
    df_mod = pd.read_excel(f'{data_dir[1]}Diagnosis/{p_name}_cumul_norm.xlsx', None)
    # reshape the dataframe for plotting
    # Formatting model simulations
    df_mod_plot = pd.DataFrame(columns = ['Date', 'cumul_flow_ratio', 'cumul_load_ratio'])
    for k, v in df_mod.items():
        df_mod_plot = pd.concat([df_mod_plot, v.loc[:, ['Date', *v.columns[-2:]]]])
    df_mod_plot.set_index('Date', inplace=True)
    df_mod_plot.index= pd.to_datetime(df_mod_plot.index)

    fig = make_subplots(
        rows=1, cols=1,
        horizontal_spacing = 0.01,
        specs=[[{"type": "scatter"}]]
    )
        
    for ii in range(2009, 2018):
        # Plot simulations
        df_mod_temp = df_mod_plot.loc[pd.to_datetime(f'{ii}-07-01'):pd.to_datetime(f'{ii+1}-07-01'), :].values
        fig.add_trace(go.Scatter(x = df_mod_temp[:, 0], y=df_mod_temp[:, 1], mode = 'lines', name=f'{ii}',
                                line = dict(width=2, dash='dot', color = color_list[ii-2009])), 
                    row=1, col=1)

    fig.update_layout(
        yaxis = dict(
            title_text = ylabel,
            tickfont=dict(size=16)
        ),
        xaxis = dict(
            title_text = xlabel,
            tickfont=dict(size=16)
        ),
        font = dict(size = 16),
        title=p_name
    )

    fig.update_layout(height=500, width=800,
                    legend = dict(yanchor="bottom", y = 0.01, xanchor = "right", x = 0.99, 
                    font = dict(size=16), orientation='v', bgcolor="white",
                    bordercolor="Black",
                    borderwidth=1), legend_title_text='Year', font_family = "Arial", font_color = 'black')

    fig.write_image(f'{data_dir[1]}Diagnosis/{p_name}_cumulative_lines.png', format = 'png', scale=2, engine = 'kaleido')

cane
grazing_forest
grazing_open
conservation
forest
other
