In [9]:

from itertools import product
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.optimize import minimize
import json

import os
os.chdir('..')

from src.swbm import *
from src.plots import *
from src.utils import *

#%pwd

In [2]:
# Load data and format
input_swbm_ger = pd.read_csv('data/Data_swbm_Germany.csv')
input_ger = prepro(input_swbm_ger)

input_swbm_swe = pd.read_csv('data/Data_swbm_Sweden.csv')
input_swe = prepro(input_swbm_swe)

input_swbm_esp = pd.read_csv('data/Data_swbm_Spain.csv')
input_esp = prepro(input_swbm_esp)

sites = [input_ger, input_swe, input_esp]

In [3]:
# Create masks
start_train = '2010-01-01'
end_train = '2015-12-31'
train_mask = (input_ger['time'] >= start_train) & (input_ger['time'] <= end_train)

In [4]:
# set up masks
start_train = '2010-01-01'
end_train = '2015-12-31'

start_eval = '2011-01-01'
end_eval = end_train

# get train mask
train_mask = (input_ger['time'] >= start_train) & (input_ger['time'] <= end_train)



In [5]:
# Values for each variable

cs = [210, 420, 840]
b0 = [0.4, 0.6, 0.8]
g = [0.2, 0.5, 0.8]
a = [2, 4, 8]

# Generate all combinations using itertools.product
all_combinations = list(product(cs, b0, g, a))

In [6]:
# Run and evaluate for all combinations
swbm_param = 'b0'
np.random.seed(42)

# get data
best_score =[]
for site in sites:
    max_corr_score = -np.inf
    # get train data
    input_swbm = site[train_mask]
    input_swbm.reset_index(drop=True, inplace=True)

    # get eval mask and eval input
    eval_mask = (input_swbm['time'] >= start_eval) & (input_swbm['time'] <= end_eval)
    input_eval = input_swbm[eval_mask]
    input_eval.reset_index(drop=True, inplace=True)


    for combination in all_combinations:
        
        # Run SWBM without seasonal variation
        #break
        const_swbm_params = {'c_s': combination[0], 
                            'b0': combination[1], 
                            'g': combination[2], 
                            'a': combination[3]}

        # optimize sinus params for b0
        init_values = [0.5, 2, 5, combination[1]]

        res = minimize(opt_swbm_corr,
                    np.asarray(init_values).flatten(),  # has to be 1D
                    args=(input_swbm, const_swbm_params, swbm_param),
                    options={"maxiter": 500, "disp": False})
        opt_params_df = minimize_res2df(res, [swbm_param])
        
        # Set swbm const_swbm_params
        params_seasonal = {'c_s': combination[0], 
                            'b0': combination[1], 
                            'g': combination[2], 
                            'a': combination[3]}

        # Get sinus curve for current single parameter
        params_seasonal[swbm_param] = seasonal_sinus(
            len(input_swbm),
            amplitude=opt_params_df.loc['amplitude', swbm_param],
            freq=opt_params_df.loc['freq', swbm_param],
            phase=opt_params_df.loc['phase', swbm_param],
            center=opt_params_df.loc['center', swbm_param],
            which=swbm_param
        )

        # Run SWBM with optimized seasonal b0
        preds_seasonal = predict_ts(input_swbm, params_seasonal)
        moists_seasonal, runoffs_seasonal, ets_seasonal, na_count = preds_seasonal

        # cut data for evaluation
        moists_eval = moists_seasonal[eval_mask]
        runoffs_eval = runoffs_seasonal[eval_mask]
        ets_eval = ets_seasonal[eval_mask]
        
        # calculate correlation
        eval_df = eval_swbm(input_eval,
                            {'sm': moists_eval,
                            'ro': runoffs_eval,
                            'le': ets_eval},
                            swbm_param)
        if eval_df['corr'].sum() > max_corr_score:
            max_corr_score = eval_df['corr'].sum()
            # save all results in dict 
            combination_dict = {
                'Combination': combination,
                'sum_corr': eval_df['corr'].sum(),
                'SinusParameters': opt_params_df,
                'eval_df': eval_df,
                'na_count': na_count,
                'na_sum': sum(na_count.values())
            }
    
    best_score.append(combination_dict)

No corr. P=0.9255945773588431
No corr. P=0.9255946461060085
No corr. P=0.9255947965801951
No corr. P=0.925594540188593
No corr. P=0.9255942825059603
No corr. P=0.9210783116562964
No corr. P=0.921078416269813
No corr. P=0.9210785765813784
No corr. P=0.921078269167282
No corr. P=0.9210780838593837
No corr. P=0.9741440646231867
No corr. P=0.9741441816680866
No corr. P=0.9741448338073927
No corr. P=0.974144149631306
No corr. P=0.9741444764624316
No corr. P=0.7664647310840697
No corr. P=0.7664648464640975
No corr. P=0.7664654598177483
No corr. P=0.766464811363654
No corr. P=0.7664651235330859
No corr. P=0.183091770685705
No corr. P=0.18309182532010046
No corr. P=0.18309207090912594
No corr. P=0.18309180330843627
No corr. P=0.1830919352777916
No corr. P=0.20123065808121518
No corr. P=0.2012307044417843
No corr. P=0.20123102275052007
No corr. P=0.201230667034165
No corr. P=0.20123060501411558
No corr. P=0.4879881918082194
No corr. P=0.4879881007151412
No corr. P=0.48798774413957885
No corr. P

In [60]:
def transform_data(input_data):
    output_data = [
        {'cs': input_data['Combination'][0],
         'b0': input_data['Combination'][1],
         'g': input_data['Combination'][2],
         'a': input_data['Combination'][3]},
        {'sum_corr': input_data['sum_corr']},
        {'sm_cor': input_data['eval_df']['corr'][0],
         'ro_cor': input_data['eval_df']['corr'][1],
         'le_cor': input_data['eval_df']['corr'][2]}
    ]
    return output_data

In [64]:
ger_output = transform_data(best_score[0])
swe_output = transform_data(best_score[1])
esp_output = transform_data(best_score[2])

In [65]:
with open('ger_output.json', 'w') as file:
    json.dump(ger_output, file)

with open('swe_output.json', 'w') as file:
    json.dump(swe_output, file)

with open('esp_output.json', 'w') as file:
    json.dump(esp_output, file)

In [None]:
with open('ger_output.json', 'r') as file:
    ger_calib = json.load(file)

with open('swe_output.json', 'r') as file:
    swe_calib = json.load(file)

with open('esp_output.json', 'r') as file:
    esp_calib = json.load(file)