In [1]:
from itertools import product
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.optimize import minimize
import json

import os
os.chdir('..')

from src.swbm import *
from src.plots import *
from src.utils import *

#%pwd

In [2]:
# Load data and format
input_swbm_ger = pd.read_csv('data/Data_swbm_Germany.csv')
input_ger = prepro(input_swbm_ger)

input_swbm_swe = pd.read_csv('data/Data_swbm_Sweden.csv')
input_swe = prepro(input_swbm_swe)

input_swbm_esp = pd.read_csv('data/Data_swbm_Spain.csv')
input_esp = prepro(input_swbm_esp)

sites = [input_ger, input_swe, input_esp]

In [3]:
# Create masks
start_train = '2010-01-01'
end_train = '2015-12-31'
train_mask = (input_ger['time'] >= start_train) & (input_ger['time'] <= end_train)

In [4]:
# set up masks
start_train = '2010-01-01'
end_train = '2015-12-31'

start_eval = '2011-01-01'
end_eval = end_train

# get train mask
train_mask = (input_ger['time'] >= start_train) & (input_ger['time'] <= end_train)



In [5]:
# Values for each variable

cs = [210, 420, 840]
b0 = [0.4, 0.6, 0.8]
g = [0.2, 0.5, 0.8]
a = [2, 4, 8]

# Generate all combinations using itertools.product
all_combinations = list(product(cs, b0, g, a))

In [7]:
# Run and evaluate for all combinations
swbm_param = 'b0'
np.random.seed(42)

# get data
best_score =[]
for site in sites:
    max_corr_score = -np.inf
    # get train data
    input_swbm = site[train_mask]
    input_swbm.reset_index(drop=True, inplace=True)

    # get eval mask and eval input
    eval_mask = (input_swbm['time'] >= start_eval) & (input_swbm['time'] <= end_eval)
    input_eval = input_swbm[eval_mask]
    input_eval.reset_index(drop=True, inplace=True)


    for combination in all_combinations:
        
        # Run SWBM without seasonal variation
        #break
        const_swbm_params = {'c_s': combination[0], 
                            'b0': combination[1], 
                            'g': combination[2], 
                            'a': combination[3]}

        # Run SWBM with optimized seasonal b0
        preds = predict_ts(input_swbm, const_swbm_params)
        moists, runoffs, ets, na_count = preds

        # cut data for evaluation
        moists_eval = moists[eval_mask]
        runoffs_eval = runoffs[eval_mask]
        ets_eval = ets[eval_mask]
        
        # calculate correlation
        eval_df = eval_swbm(input_eval,
                            {'sm': moists_eval,
                            'ro': runoffs_eval,
                            'le': ets_eval},
                            swbm_param)
        if eval_df['corr'].sum() > max_corr_score:
            max_corr_score = eval_df['corr'].sum()
            # save all results in dict 
            combination_dict = {
                'Combination': combination,
                'sum_corr': eval_df['corr'].sum(),
                'eval_df': eval_df,
                'na_count': na_count,
                'na_sum': sum(na_count.values())
            }
    
    best_score.append(combination_dict)
    print('Hi Im almost done :)')

Hi Im almost done :)
Hi Im almost done :)
Hi Im almost done :)


In [8]:
def transform_data(input_data):
    output_data = [
        {'c_s': input_data['Combination'][0],
         'b0': input_data['Combination'][1],
         'g': input_data['Combination'][2],
         'a': input_data['Combination'][3]},
        {'sum_corr': input_data['sum_corr']},
        {'sm_cor': input_data['eval_df']['corr'][0],
         'ro_cor': input_data['eval_df']['corr'][1],
         'le_cor': input_data['eval_df']['corr'][2]},
    ]
    return output_data

In [9]:
ger_noneseasonal = transform_data(best_score[0])
swe_noneseasonal = transform_data(best_score[1])
esp_noneseasonal = transform_data(best_score[2])

In [10]:
with open('results/ger_noneseasonal.json', 'w') as file:
    json.dump(ger_noneseasonal, file)

with open('results/swe_noneseasonal.json', 'w') as file:
    json.dump(swe_noneseasonal, file)

with open('results/esp_noneseasonal.json', 'w') as file:
    json.dump(esp_noneseasonal, file)

In [11]:
# Load data and format
input_swbm_ger = pd.read_csv('data/Data_swbm_Germany.csv')
input_ger = prepro(input_swbm_ger)

input_swbm_swe = pd.read_csv('data/Data_swbm_Sweden.csv')
input_swe = prepro(input_swbm_swe)

input_swbm_esp = pd.read_csv('data/Data_swbm_Spain.csv')
input_esp = prepro(input_swbm_esp)

sites = [input_ger, input_swe, input_esp]

In [12]:
calibs = [ger_noneseasonal, swe_noneseasonal, esp_noneseasonal]

In [13]:
# Create test masks for test
start_test = '2015-01-01'
end_test = '2020-12-31'
test_mask = (input_ger['time'] >= start_test) & (input_ger['time'] <= end_test)

start_eval = '2016-01-01'
end_eval = end_test

In [14]:
# test SWBM for all sites
swbm_param = 'b0'
evals_test = []
model_results = []
for site, calib in zip(sites, calibs):
    #break
    # get test data
    input_swbm = site[test_mask]
    input_swbm.reset_index(drop=True, inplace=True)

    # get eval mask and eval input
    eval_mask = (input_swbm['time'] >= start_eval) & (input_swbm['time'] <= end_eval)
    input_eval = input_swbm[eval_mask]
    input_eval.reset_index(drop=True, inplace=True)

    const_swbm_params = calib[0].copy()
    params = calib[0].copy()

    # Run SWBM with optimized seasonal b0
    preds = predict_ts(input_swbm, params)
    moists, runoffs, ets, na_count = preds

    # Save data in dataframe
    model_result = input_swbm.copy()
    model_result = pd.concat([model_result, pd.DataFrame({'moists': moists,
                                  'runoffs': runoffs,
                                  'ets': ets}, index=model_result.index)], axis=1)

    # cut data for evaluation
    moists_eval = moists[eval_mask]
    runoffs_eval = runoffs[eval_mask]
    ets_eval = ets[eval_mask]
    
    # calculate correlation
    eval_df = eval_swbm(input_eval,
                        {'sm': moists_eval,
                        'ro': runoffs_eval,
                        'le': ets_eval},
                        swbm_param)
    
    result_test = {
                'Combination': calib[0],
                'sum_corr': eval_df['corr'].sum(),
                'eval_df': eval_df,
            }

    evals_test.append(result_test)
    model_results.append(model_result)

In [15]:
# Transform function for evaluation
def transform_evals(input_data):
    output_data = [
        input_data['Combination'],
        {'sum_corr': input_data['sum_corr']},
        {'sm_corr': input_data['eval_df']['corr'][0],
         'ro_corr': input_data['eval_df']['corr'][1],
         'le_corr': input_data['eval_df']['corr'][2]}
    ]
    return output_data

In [16]:
# Transform evaluation
ger_nonseasonal_test = transform_evals(evals_test[0])
swe_nonseasonal_test = transform_evals(evals_test[0])
esp_nonseasonal_test = transform_evals(evals_test[0])

In [17]:
# Save evaluations
with open('results/ger_nonseasonal_test.json', 'w') as file:
    json.dump(ger_nonseasonal_test, file)

with open('results/swe_nonseasonal_test.json', 'w') as file:
    json.dump(swe_nonseasonal_test, file)

with open('results/esp_nonseasonal_test.json', 'w') as file:
    json.dump(esp_nonseasonal_test, file)

In [18]:
# Save model predictions
model_results[0].to_csv('data/output/ger_nonseasonal_test.csv', index=False)
model_results[1].to_csv('data/output/swe_nonseasonal_test.csv', index=False)
model_results[2].to_csv('data/output/esp_nonseasonal_test.csv', index=False)