"""
This script is used to caclulate the Event Mean Concentration (EMC).
The inputs are .csv files containing concentration and flow after linear interpolation.
"""

In [2]:
import pandas as pd
import numpy as np
from utils.concentration import rainfall_events, emc_cal, conc_interpolate, event_emc
import datetime

# read the discrete storm events
# Read daily loads and flow
# Read hourly loads and flow
from common_settings import obspath, outpath, events_name, \
    obs_events, day_load_flow, hour_load_flow, conct_name, modpath, mod_load_flow

In [3]:
from utils.concentration import cumulative_lq, excel_save
from utils.signatures import update_cumul_df, load_flow_loc

## Produce the event mean concentration of obs and mod

In [3]:
# Calculate EMC for low-frequency data
cols = [col for col in day_load_flow.columns if ('Load' in col) or ('Flow(ML)' in col)]
index_range1 = [1, 38]
index_range2 = [60, obs_events.shape[0]+1]

In [5]:
obs_events = event_emc(obs_events, day_load_flow, index_range1, cols[0], cols[1], 
    time_scale='d', multiplier=1e3)

In [4]:
# Calculate EMC for high-frequency data
cols = [col for col in hour_load_flow.columns if ('Load' in col) or ('ML' in col)]
index_range2 = [60, obs_events.shape[0]+1]
loads_col = cols[1]; flow_col = cols[0]
obs_events = event_emc(obs_events, hour_load_flow, index_range2, loads_col, flow_col, 
    time_scale='h', multiplier=1)

In [6]:
obs_events.to_csv(outpath + events_name, index='ID')

In [21]:
# read the discrete storm events
filename = 'mod_storm_event_common.csv'
events = rainfall_events(f'{modpath}{filename}')
# Calculate EMC for modeling data
cols = [col for col in mod_load_flow.columns if ('Load' in col) or ('ML' in col)]
index_range = [1, events.shape[0]+1]
loads_col = cols[0]; flow_col = cols[1]

events = event_emc(events, mod_load_flow, index_range, loads_col, flow_col, 
    time_scale='d', multiplier=1)
events.dropna(axis=0, inplace=True)
events.to_csv(f'{outpath}DIN_{filename}', index='ID')

## Produce the Normalized cumulative ratio of loads and flow 

### calculate the daily data for double mass plot (Q-L)

In [12]:
time_ranges = [[f'{year}/7/1', f'{year+1}/6/30'] for year in range(2009, 2020)]
# time_ranges = obs_events.loc[:, ['start', 'end']].values
double_mass_ratio = {}

In [11]:
for ii in range(0, len(time_ranges)-2):
# for ii in range(index_range1[0]-1, index_range1[1]-1):
    df_temp = load_flow_loc(time_ranges[ii], day_load_flow, timestep='d')
    df_temp = update_cumul_df(df_temp, df_temp.values[:, 0], df_temp.values[:, -1])
    double_mass_ratio[f'obs_year_{ii}'] = df_temp

In [12]:
# save outputs into one excel
fn = outpath +'obs_year_cumulative_ratio_day.xlsx'
excel_save(double_mass_ratio, fn, True)

### calculate the hourly data for double mass plot (Q-L)

In [15]:
double_mass_ratio = {}
for ii in range(index_range2[0]-1, index_range2[1]-1):
# for ii in range(9, len(time_ranges)):
    df_temp = load_flow_loc(time_ranges[ii], hour_load_flow, timestep='h')
    df_temp = update_cumul_df(df_temp, df_temp.values[:, -1], df_temp.values[:, 0])
    double_mass_ratio[f'obs_storm_{ii}'] = df_temp

In [16]:
# save outputs into one excel
fn = outpath +'obs_storm_cumulative_ratio_hour.xlsx'
excel_save(double_mass_ratio, fn)

### calculate the modeling data for double mass plot (Q-L)

In [28]:
modpath = '../data/mod/'
filename = 'storm_event.csv'
mod_events = rainfall_events(f'{modpath}{filename}')

In [29]:
# Calculate EMC for modeling data
cols = [col for col in mod_load_flow.columns if ('Load' in col) or ('ML' in col)]
index_range = [1, mod_events.shape[0]]
loads_col = cols[0]; flow_col = cols[1]

In [30]:
double_mass_ratio = {}
# time_ranges = [[f'{year}-07-01', f'{year+1}-06-30'] for year in range(2009, 2014)]
time_ranges = mod_events.loc[:, ['start', 'end']].values
for ii in range(index_range[0], index_range[1]):
# for ii in range(len(time_ranges)):
    df_temp = load_flow_loc(time_ranges[ii], mod_load_flow, timestep='d')
    df_temp = update_cumul_df(df_temp, df_temp.values[:, 0], df_temp.values[:, -1])
    double_mass_ratio[f'mod_storm_{ii}'] = df_temp

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item_labels[indexer[info_axis]]] = value


In [31]:
# save results
fn = outpath +'mod_storm_cumulative_ratio_day.xlsx'
excel_save(double_mass_ratio, fn)

## Calculate event load coefficients

### Event loads for obs

In [23]:
obs_event_fn = 'obs_storm_event_common'
obs_events = pd.read_csv(f'{outpath}{obs_event_fn}.csv', index_col = 'ID') 
time_ranges = [[f'{year}/7/1', f'{year+1}/6/30'] for year in range(2009, 2016)]

In [24]:
# for each year, calculate the yearly loads 
year_loads = {}
# obs daily data
for tt in time_ranges[0:-2]:
    df = load_flow_loc(tt, day_load_flow, timestep='d')
    year_loads[tt[0][0:4]] = np.round(df.values[:, 0].sum(), 2)  
    
# # obs hourly data
# for tt in time_ranges[-2:]:
#     df = load_flow_loc(tt, hour_load_flow, timestep='h')
#     year_loads[tt[0][0:4]] = np.round(df.values[:, 0].sum(), 2)  

In [25]:
# The event load coefficients
for ii in range(1, index_range1[1]):
    df_event = load_flow_loc(obs_events.loc[ii, 'start':'end'].values, day_load_flow, timestep='d')
    ymd= pd.to_datetime(obs_events.loc[ii, 'start'])
    month = ymd.month; year = ymd.year
    if month < 7:
        obs_events.loc[ii, 'event_load_coefficients'] = df_event.values[:, 0].sum() / year_loads[str(year - 1)]
    else:
        obs_events.loc[ii, 'event_load_coefficients'] = df_event.values[:, 0].sum() / year_loads[str(year)]
        
# for ii in range(index_range2[0], index_range2[1]):
#     df_event = load_flow_loc(obs_events.loc[ii, 'start':'end'].values, hour_load_flow, timestep='h')
#     ymd= pd.to_datetime(obs_events.loc[ii, 'start'])
#     month = ymd.month; year = ymd.year
#     if month < 7:
#         obs_events.loc[ii, 'event_load_coefficients'] = df_event.values[:, 0].sum() / year_loads[str(year-1)]
#     else:
#         obs_events.loc[ii, 'event_load_coefficients'] = df_event.values[:, 0].sum() / year_loads[str(year)]

In [26]:
obs_events.to_csv(f'{outpath}{obs_event_fn}.csv')

### Event loads for mod

In [27]:
mod_event_fn = 'DIN_mod_storm_event_common'
mod_events = pd.read_csv(f'{outpath}{mod_event_fn}.csv', index_col = 'ID') 
time_ranges = [[f'{year}/7/1', f'{year+1}/6/30'] for year in range(2009, 2014)]

In [28]:
# for each year, calculate the yearly loads 
mod_loads = {}
# mod daily data
for tt in time_ranges:
    df = load_flow_loc(tt, mod_load_flow, timestep='d')
    mod_loads[tt[0][0:4]] = np.round(df.values[:, 0].sum(), 2)  

In [29]:
# The event load coefficients
for ii in mod_events.index:
    df_event = load_flow_loc(mod_events.loc[ii, 'start':'end'].values, mod_load_flow, timestep='d')
    ymd= pd.to_datetime(mod_events.loc[ii, 'start'])
    month = ymd.month; year = ymd.year
    if month < 7:
        mod_events.loc[ii, 'event_load_coefficients'] = df_event.values[:, 0].sum() / mod_loads[str(year - 1)]
    else:
        mod_events.loc[ii, 'event_load_coefficients'] = df_event.values[:, 0].sum() / mod_loads[str(year)]

In [30]:
mod_events.to_csv(f'{outpath}{mod_event_fn}.csv')

## Calculate the peaktime difference between flow and loads

### Mod results

In [31]:
mod_event_fn = 'DIN_mod_storm_event_common'
mod_events = pd.read_csv(f'{outpath}{mod_event_fn}.csv', index_col = 'ID') 

In [32]:
# find the peak time of loads
for ii in mod_events.index:
    df_event = load_flow_loc(mod_events.loc[ii, 'start':'end'].values, mod_load_flow, timestep='d')
    peaktime_load = df_event[df_event.loc[:, 'Loads (kg)']==df_event.loc[:, 'Loads (kg)'].max()].index
    mod_events.loc[ii, 'peaktime_load'] = peaktime_load
    mod_events.loc[ii, 'peakflow'] = df_event.loc[:, 'Flow_cumecs (ML.day^-1)'].max()
    mod_events.loc[ii, 'peaktime'] = df_event[df_event.loc[:, 'Flow_cumecs (ML.day^-1)']==df_event.loc[:, 'Flow_cumecs (ML.day^-1)'].max()].index
    
mod_events.loc[:, 'delta_time'] = mod_events.peaktime_load - mod_events.peaktime

In [33]:
mod_events.to_csv(f'{outpath}{mod_event_fn}.csv')

### Obs results

In [34]:
obs_event_fn = 'obs_storm_event_common'
obs_events = pd.read_csv(f'{outpath}{obs_event_fn}.csv', index_col = 'ID') 

In [35]:
# find the peak time of loads
for ii in obs_events.index:
    df_event = load_flow_loc(obs_events.loc[ii, 'start':'end'].values, day_load_flow, timestep='d')
    peaktime_load = df_event[df_event.loc[:, 'Linear_Average_Load(t)']==df_event.loc[:, 'Linear_Average_Load(t)'].max()].index
    obs_events.loc[ii, 'peaktime_load'] = peaktime_load
    obs_events.loc[ii, 'peakflow'] = df_event.loc[:, 'Flow(ML)'].max()
    obs_events.loc[ii, 'peaktime'] = df_event[df_event.loc[:, 'Flow(ML)']==df_event.loc[:, 'Flow(ML)'].max()].index
    
obs_events.loc[:, 'delta_time'] = pd.to_datetime(obs_events.peaktime_load) - pd.to_datetime(obs_events.peaktime)

In [36]:
obs_events.to_csv(f'{outpath}{obs_event_fn}.csv')

## Variability of load-discharge ratio (seasonal average concentration)

### Obs results

In [16]:
time_ranges = [[f'{year}/7/1', f'{year}/10/1', f'{year+1}/1/1', f'{year+1}/4/1', f'{year+1}/7/1'] for year in range(2009, 2014)]
df_ratio = pd.DataFrame(index=[str(year) for year in range(2009, 2014)], columns = [1, 2, 3, 4])

In [17]:
for tt in time_ranges:
    for ii in range(len(tt) -1):
        start = pd.to_datetime(tt[ii])
        end = pd.to_datetime(tt[ii + 1]) - datetime.timedelta(days=1)   
        df = load_flow_loc([start, end], day_load_flow, timestep ='d')
        df_ratio.loc[tt[0][0:4], ii+1] = df.sum(axis=0)[0] / df.sum(axis=0)[2] * 1000

In [18]:
df_ratio.to_csv(f'{outpath}obs_seasonal_concentration.csv')

### Mod results

In [19]:
time_ranges = [[f'{year}/7/1', f'{year}/10/1', f'{year+1}/1/1', f'{year+1}/4/1', f'{year+1}/7/1'] for year in range(2009, 2014)]
df_ratio = pd.DataFrame(index=[str(year) for year in range(2009, 2014)], columns = [1, 2, 3, 4])

In [20]:
for tt in time_ranges:
    for ii in range(len(tt) -1):
        start = pd.to_datetime(tt[ii])
        end = pd.to_datetime(tt[ii + 1]) - datetime.timedelta(days=1)   
        df = load_flow_loc([start, end], mod_load_flow, timestep ='d')
        df_ratio.loc[tt[0][0:4], ii+1] = df.sum(axis=0)[0] / df.sum(axis=0)[2]

In [21]:
df_ratio.to_csv(f'{outpath}mod_seasonal_concentration.csv')

## Monthly loads

In [10]:
df_month = pd.DataFrame(columns = ['obs', 'mod'])

In [51]:
# calculate the monthly loads and flow
for y in range(2009, 2015):
    for m in range(1, 13):
        start = pd.to_datetime(f'{y}/{m}/1')
        if m == 12:
            end = pd.to_datetime(f'{y+1}/1/1') - datetime.timedelta(days=1) 
        else:
            end = pd.to_datetime(f'{y}/{m+1}/1') - datetime.timedelta(days=1)
            
        df_month.loc[f'{y}/{m}', 'obs'] = 1000 * load_flow_loc([start, end], day_load_flow, timestep ='d').sum(axis=0)[0]
        df_month.loc[f'{y}/{m}', 'mod'] = load_flow_loc([start, end], mod_load_flow, timestep ='d').sum(axis=0)[0]    
df_month = df_month[(df_month.obs != 0) & (df_month.loc[:, 'mod'] != 0)]
df_month.index.name = 'Month'

In [52]:
df_month.to_csv(f'{outpath}mod_obs_month.csv')

## Calculate the coefficients of variation for concentrations (CVC) and discharge (CVQ), their ratio (CVC:CVQ)

In [53]:
# read df
# define timeperiod
# calculate CVC and CVQ 
# calculate the coefficients of CVC/CVQ