In [2]:
import numpy as np
import matplotlib.pyplot as plt
import os
from scipy.io import savemat # for saving data to .mat file

In [3]:
# Add working directory to search path
# os.chdir('ADD_YOUR_PATH_HERE')  # Set working directory

########################################################### mein Working Directory ###########################################################
os.chdir(r"/home/illich/SeriesDistance/")
########################################################### mein Working Directory ###########################################################

# Read input
obs = np.genfromtxt('data/HOST_timeseries.csv', delimiter=';', skip_header=1, usecols=2, max_rows=10000)
sim = np.genfromtxt('data/HOST_timeseries.csv', delimiter=';', skip_header=1, usecols=3, max_rows=10000)
obs_events = np.genfromtxt('data/HOST_obs_events.csv', delimiter=';')
sim_events = np.genfromtxt('data/HOST_sim_events.csv', delimiter=';')
obs_sim_pairing = np.genfromtxt('data/HOST_event_pairing.csv', delimiter=';')

# Output filename
outfile = './results/output.mat'

# Smoothing options
smooth_flag = True  # Smooth both obs and sim (default=True)
nse_smooth_limit = 0.99  # Specifies degree of smoothing according to NSE criterion (default=0.99)

# Specification of the magnitude error model
error_model = 'relative'  # 'relative' or 'standard'; (default='relative')

# Parametrization of the objective function
weight_nfc = 1/7  # Weights number of re-assigned hydrological cases (default=1/7)
weight_rds = 1/7  # Weights the importance of the re-assigned segments (default=1/7)
weight_sdt = 5/7  # Weights the SD timing error component (default=5/7)
weight_sdv = 0  # Weights the SD magnitude error component (default=0)

# Set plot flags
pf_input = True  # Plots smoothed and original input time series ('obs' and 'sim')
pf_CoarseGrainSteps = False  # Plots intermediate coarse graining steps and progression of objective function
pf_segs_cons_indivEvents = False  # Plots each individual event with optimized segments and connectors in an own figure
pf_objective_functions = False  # Plots the SD objective function for each event in an own figure
pf_segs_cons_entireSeries = True  # Plots the entire time series with optimized segments and connectors in an individual figure
pf_errorDistributions = True  # SeriesDistance error distributions for rise, fall and low-flow

# Data manipulations and pre-processing
if smooth_flag:
    obs_org = obs.copy()
    sim_org = sim.copy()
    obs, sim = f_smooth_DP(obs, sim, nse_smooth_limit)  # Implement this function

# replace identical neigboring values to avoid problems with assignment of unique peaks and valleys
obs = f_ReplaceEqualNeighbours(obs)  # Implement this function
sim = f_ReplaceEqualNeighbours(sim)  # Implement this function

# plot input data
if pf_input:
    f_PlotInput(obs_org, obs, obs_events, sim_org, sim, sim_events, obs_sim_pairing, [])  # Implement this function

# Initialize output arrays for both, coarse graining and SD
e_sd_t_rise = []  # Error distribution for events, rise, time component
e_sd_q_rise = []  # Error distribution for events, rise, magnitude component
e_sd_t_fall = []  # Error distribution for events, fall, time component
e_sd_q_fall = []  # Error distribution for events, fall, magnitude component
segs_obs_opt_all = []  # Coarse-grained segments of 'obs'
segs_sim_opt_all = []  # Coarse-grained segments of 'sim'
seg_raw_statistics = []  # Segment statistics
seg_opt_statistics = []  # Segment statistics
seg_event_id = []  # Stores event ID for each segs_opt object
connectors = {'x_match_obs_global': [], 'y_match_obs': [], 'x_match_sim_global': [], 'y_match_sim': []}  # Connectors between matching points in 'obs' and 'sim'

# Apply coarse-graining and SD method to each event
for ii in range(len(obs_sim_pairing)):
    # print progress in command window
    print(f'event {ii + 1} of {len(obs_sim_pairing)}')

    # get event start and corresponding end points
    obs_eventindex = range(obs_sim_pairing[ii, 0], obs_events[np.where(obs_events[:, 0] == obs_sim_pairing[ii, 0])[0][0], 1] + 1)
    sim_eventindex = range(obs_sim_pairing[ii, 1], sim_events[np.where(sim_events[:, 0] == obs_sim_pairing[ii, 1])[0][0], 1] + 1)

    # apply coarse graining: determines optimal level of aggregation of each event (key output: segs_xxx_opt)
    segs_obs_opt, segs_sim_opt, cons, connector_data, ObFuncVal, opt_step, CoarseGrain_segs, seg_raw_stats = \
        f_CoarseGraining_Event(obs, obs_eventindex, sim, sim_eventindex, weight_nfc, weight_rds, weight_sdt, weight_sdv, error_model, pf_CoarseGrainSteps)  # Implement this function

    # summarize segment statistics (before and after coarse graining)
    seg_raw_statistics.append(seg_raw_stats)
    seg_opt_statistics.append([ii + 1] + f_SegStats(segs_obs_opt) + int(len(ObFuncVal) > 1) + opt_step + f_SegStats(segs_sim_opt))  # Implement f_SegStats

    # store optimized segment combination for all events 
    segs_obs_opt_all.extend(segs_obs_opt)
    segs_sim_opt_all.extend(segs_sim_opt)

    # memorize corresponding event ID (only required for plotting)
    seg_event_id.extend([ii + 1] * len(segs_obs_opt))

    # plot each individual event with optimized segments and connectors in an own figure
    if pf_segs_cons_indivEvents:
        f_PlotConnectedSeries(obs, segs_obs_opt, sim, segs_sim_opt, cons)  # Implement this function

    # plot the objective function values
    if pf_objective_functions:
        f_plot_ObjectiveFunction_CoarsGrainStps(ObFuncVal, opt_step, f'event # {ii + 1}')  # Implement this function

    # compute SD results for the optimized level of generalization
    obs_fromto = range(segs_obs_opt[0]['starttime_global'], segs_obs_opt[-1]['endtime_global'] + 1)
    sim_fromto = range(segs_sim_opt[0]['starttime_global'], segs_sim_opt[-1]['endtime_global'] + 1)
    _, _, _, e_q_rise, e_t_rise, _, e_q_fall, e_t_fall, _, cons, _, _ = f_SD(obs[obs_fromto], segs_obs_opt, sim[sim_fromto], segs_sim_opt, error_model, 'true')  # Implement this function

    # build overall SD error distributions by successively adding errors from individual events
    e_sd_t_rise.extend(e_t_rise)
    e_sd_q_rise.extend(e_q_rise)
    e_sd_t_fall.extend(e_t_fall)
    e_sd_q_fall.extend(e_q_fall)

    # store individual connectors of each single event
    if not connectors['x_match_obs_global']:
        connectors['x_match_obs_global'] = cons[0]['x_match_obs_global']
        connectors['y_match_obs'] = cons[0]['y_match_obs']
        connectors['x_match_sim_global'] = cons[0]['x_match_sim_global']
        connectors['y_match_sim'] = cons[0]['y_match_sim']
    else:
        connectors['x_match_obs_global'].extend(cons[0]['x_match_obs_global'])
        connectors['y_match_obs'].extend(cons[0]['y_match_obs'])
        connectors['x_match_sim_global'].extend(cons[0]['x_match_sim_global'])
        connectors['y_match_sim'].extend(cons[0]['y_match_sim'])
    # here ends the big loop over all 'ii' items in the matching event list

# attach event number to segment objects (for plotting, helps identifying events)
for kk in range(len(segs_obs_opt_all)):
    segs_obs_opt_all[kk]['eventID'] = seg_event_id[kk]
    segs_sim_opt_all[kk]['eventID'] = seg_event_id[kk]

# determine the SeriesDistance distribution for no-event periods
e_sd_lowFlow, cons1D = f_SD_1dNoEventError(obs, sim, obs_events, sim_events, obs_sim_pairing, error_model)  # Implement this function

# add the connectors for no-event cases to those of the events
connectors['x_match_obs_global'].extend(cons1D[0]['x_match_obs_global'])
connectors['y_match_obs'].extend(cons1D[0]['y_match_obs'])
connectors['x_match_sim_global'].extend(cons1D[0]['x_match_sim_global'])
connectors['y_match_sim'].extend(cons1D[0]['y_match_sim'])

# determine the contingency table        
contingency_table = f_ComputeContingencyTable(obs_events, sim_events, obs_sim_pairing)  # Implement this function

# plot the entire time series with all coarse-grained segments and connectors
if pf_segs_cons_entireSeries:
    f_PlotConnectedSeries(obs, segs_obs_opt_all, sim, segs_sim_opt_all, connectors)  # Implement this function

# plot error distributions
if pf_errorDistributions:
    # plot 2d error distributions
    f_PlotSDErrors(e_sd_q_rise, e_sd_t_rise, e_sd_q_fall, e_sd_t_fall, [], [], [], [])  # Implement this function
    # plot 1d error distribution
    f_Plot1dErrors(e_sd_lowFlow, [], 'SD low-flow')  # Implement this function

# Save all inputs, outputs and important parameters into a single .mat file
savemat(outfile, {
    'obs': obs,
    'obs_org': obs_org,
    'obs_events': obs_events,
    'sim': sim,
    'sim_org': sim_org,
    'sim_events': sim_events,
    'obs_sim_pairing': obs_sim_pairing,
    'segs_obs_opt_all': segs_obs_opt_all,
    'segs_sim_opt_all': segs_sim_opt_all,
    'seg_raw_statistics': seg_raw_statistics,
    'seg_opt_statistics': seg_opt_statistics,
    'connectors': connectors,
    'e_sd_q_rise': e_sd_q_rise,
    'e_sd_t_rise': e_sd_t_rise,
    'e_sd_q_fall': e_sd_q_fall,
    'e_sd_t_fall': e_sd_t_fall,
    'e_sd_lowFlow': e_sd_lowFlow,
    'error_model': error_model,
    'contingency_table': contingency_table
})

NameError: name 'f_smooth_DP' is not defined