In [None]:
import dabench as dab
import numpy as np
import jax
from timeit import default_timer as timer
import pandas as pd
import pickle
import os

# Set up

### Read-in Raytune Results

In [None]:
raytune_system_dim_results = pd.read_csv('./out/l96/raytune_l96_hessian_v4.csv')
raytune_system_dim_results['trialnum'] = raytune_system_dim_results.index
raytune_system_dim_results.index = np.arange(raytune_system_dim_results.shape[0])
rows_to_get = raytune_system_dim_results.groupby(['system_dim']).idxmin(numeric_only=True)['rmse']
best_results_system_dim = raytune_system_dim_results.loc[rows_to_get]

In [None]:
raytune_heatmap_results = pd.read_csv('./out/l96/raytune_werrors_heatmap_hessian_v4_alldims_bigger_error.csv')
raytune_heatmap_results['trialnum'] = raytune_heatmap_results.index
raytune_heatmap_results.index = np.arange(raytune_heatmap_results.shape[0])
rows_to_get = raytune_heatmap_results.groupby(['num_obs','obs_sd']).idxmin(numeric_only=True)['rmse']
best_results_heatmap = raytune_heatmap_results.loc[rows_to_get]

### Define parameters

In [None]:
system_dim= 36
spinup_size = 14400
valid_size = 5000
transient_size = 1000
test_size = 5000
nr_steps = spinup_size + valid_size + transient_size + test_size
delta_t=0.01
obs_sd = 0.5
sigma_bg = obs_sd/1.5
sigma_obs = obs_sd*1.25
analysis_window = 0.1
analysis_time_in_window = 0.05
obs_location_count = 18
num_iters = 3
n_outer_loops = 3
num_runs = 50

### Function definition: Backprop 4DVar

We'll need to prep and run Backprop-4DVar many times, so this wraps it all into one function

In [None]:
def run_backprop_4dvar(system_dim, nr_steps, spinup_size, valid_size, test_size, 
                       test_run, delta_t, obs_location_count, obs_sd, sigma_bg, 
                       sigma_obs, analysis_window, analysis_time_in_window, 
                       random_seed, num_iters, learning_rate, lr_decay):
    np_rng = np.random.default_rng(random_seed)
    jax.clear_backends()

    ### Nature Run
    nature_run = dab.data.Lorenz96(system_dim=system_dim, delta_t=delta_t,
                                   store_as_jax=True, random_seed=random_seed)

    x0_initial = np_rng.normal(size=system_dim, scale=1)
    nature_run.generate(n_steps=nr_steps, x0 = x0_initial) 
    nr_spinup, nr_valid, nr_transient_and_test = nature_run.split_train_valid_test(
        spinup_size, valid_size, transient_size + test_size)
    nr_transient, nr_test, _ = nr_transient_and_test.split_train_valid_test(
        transient_size, test_size, 0)

    if not test_run:
        nr_eval = nr_valid
    else:
        nr_eval = nr_test


    ### Observations
    obs_l96 = dab.observer.Observer(
        nr_eval,
        time_indices = np.arange(0, nr_eval.time_dim, 5),
        random_location_count = obs_location_count,
        error_bias = 0.0,
        error_sd = obs_sd,
        random_seed=random_seed,
        stationary_observers=True,
        store_as_jax=True
    )
    obs_vec_l96 = obs_l96.observe()

    
    ### Forecast Model
    model_l96 = dab.data.Lorenz96(system_dim=system_dim, delta_t=delta_t, 
                                  store_as_jax=True, random_seed=random_seed)

    class L96Model(dab.model.Model):                                                                       
        """Defines model wrapper for Lorenz96 to test forecasting."""
        def forecast(self, state_vec, n_steps):
            self.model_obj.generate(x0=state_vec.values, n_steps=n_steps)
            new_vals = self.model_obj.values 

            new_vec = dab.vector.StateVector(values=new_vals, store_as_jax=True)

            return new_vec

    fc_model = L96Model(model_obj=model_l96)
    
    ### Set up DA matrices: H (observation), R (obs error), B (background error)
    H = np.zeros((obs_location_count, system_dim))
    H[np.arange(H.shape[0]), obs_vec_l96.location_indices[0]] = 1
    R = (sigma_obs**2)* np.identity(obs_location_count)
    B = (sigma_bg**2)*np.identity(system_dim)

    
    ### Run data assimilation
    da_time_start = timer()
    
    # Prep DA object
    dc = dab.dacycler.Var4DBackprop(
        system_dim=system_dim,
        delta_t=nr_eval.delta_t,
        H=H,
        B=B,
        R=R,
        num_iters=num_iters,
        loss_growth_limit=5,
        learning_rate=learning_rate,
        lr_decay=lr_decay,
        model_obj=fc_model,
        obs_window_indices=[0,5,10],
        steps_per_window=11, # 11 instead of 10 because inclusive of 0 and 11
        )

    # Generate initial conditions
    cur_tstep = 0
    x0_original = nr_eval.values[cur_tstep] + np_rng.normal(size=(system_dim,), 
                                                            scale=1)
    x0_sv = dab.vector.StateVector(
        values=x0_original,
        store_as_jax=True)
    
    # Execute
    out_statevec = dc.cycle(
        input_state = x0_sv,
        start_time = nr_eval.times[cur_tstep],
        obs_vector = obs_vec_l96,
        analysis_window=analysis_window,
        timesteps=498,
        obs_error_sd=sigma_obs,
        analysis_time_in_window=analysis_time_in_window)
    
    da_time = timer()-da_time_start
    rmse = np.sqrt(np.mean(np.square(nr_eval.values[:-20] - out_statevec.values)))
    
    return out_statevec, rmse, obs_vec_l96, nr_eval, da_time

### Function: 4DVar

In [None]:
def run_4dvar(system_dim, nr_steps, spinup_size, valid_size, transient_size, test_run, 
              test_size, delta_t, obs_location_count, obs_sd, sigma_bg, sigma_obs,
              analysis_window, analysis_time_in_window, random_seed, n_outer_loops):
    np_rng = np.random.default_rng(random_seed)
    jax.clear_backends()

    ### Nature Run
    nature_run = dab.data.Lorenz96(system_dim=system_dim, delta_t=delta_t,
                                   store_as_jax=True, random_seed=random_seed)

    x0_initial = np_rng.normal(size=system_dim, scale=1)
    nature_run.generate(n_steps=nr_steps, x0 = x0_initial) 
    nr_spinup, nr_valid, nr_transient_and_test = nature_run.split_train_valid_test(
        spinup_size, valid_size, transient_size + test_size)
    nr_transient, nr_test, _ = nr_transient_and_test.split_train_valid_test(
        transient_size, test_size, 0)

    if not test_run:
        nr_eval = nr_valid
    else:
        nr_eval = nr_test


    ### Observations
    obs_l96 = dab.observer.Observer(
        nr_eval,
        time_indices = np.arange(0, nr_eval.time_dim, 5),
        random_location_count = obs_location_count,
        error_bias = 0.0,
        error_sd = obs_sd,
        random_seed=random_seed,
        stationary_observers=True,
        store_as_jax=True
    )
    obs_vec_l96 = obs_l96.observe()

    
    ### Forecast Model
    model_l96 = dab.data.Lorenz96(system_dim=system_dim, delta_t=delta_t, 
                                  store_as_jax=True, random_seed=random_seed)
    class L96Model(dab.model.Model):                                                                       
        """Defines model wrapper for Lorenz96 to test forecasting."""
        def forecast(self, state_vec,n_steps):
            self.model_obj.generate(x0=state_vec.values, n_steps=n_steps)
            new_vals = self.model_obj.values 

            new_vec = dab.vector.StateVector(values=new_vals, store_as_jax=True)

            return new_vec

        def compute_tlm(self, state_vec, n_steps):
            """For 4DVar. Not used for Backprop4DVar"""
            M = self.model_obj.generate(n_steps=n_steps, x0=state_vec.values,
                                        return_tlm=True)
            return M, self.model_obj.values

    fc_model = L96Model(model_obj=model_l96)
    
    ### Set up DA matrices
    
    ### Set up DA matrices: H (observation), R (obs error), B (background error)
    H = np.zeros((obs_location_count, system_dim))
    H[np.arange(H.shape[0]), np.tile(obs_vec_l96.location_indices[0], 1)] = 1
    R = (sigma_obs**2) * np.identity(obs_location_count)
    B = (sigma_bg**2) * np.identity(system_dim)
    Bsqrt = np.sqrt(B)
    
    ### Run data assimilation
    da_time_start = timer()
    
    # Prep DA object
    dc = dab.dacycler.Var4D(
        system_dim=system_dim,
        delta_t=nature_run.delta_t,
        H=H,
        B=B,
        R=R,
        Bsqrt=Bsqrt,
        n_outer_loops=n_outer_loops,
        model_obj=fc_model,
        obs_window_indices=[0,5, 10],
        steps_per_window=11, # 11 instead of 10 because inclusive of 0 and 11
    )
    
    # Generate initial conditions
    cur_tstep = 0
    x0_original = nr_eval.values[cur_tstep] + np_rng.normal(size=(system_dim,), 
                                                            scale=1)
    x0_sv = dab.vector.StateVector(
        values=x0_original,
        store_as_jax=True)
    
    # Execute
    out_statevec_4dvar = dc.cycle(
        input_state = x0_sv,
        start_time = nr_eval.times[cur_tstep],
        obs_vector = obs_vec_l96,
        analysis_window=analysis_window,
        timesteps=498,
        obs_error_sd=sigma_obs,
        analysis_time_in_window=analysis_time_in_window)
    
    da_time = timer()-da_time_start
    rmse = np.sqrt(np.mean(np.square(nr_eval.values[:-20] - out_statevec_4dvar.values)))
    
    return out_statevec_4dvar, rmse, obs_vec_l96, nr_eval, da_time

### Function: Baserun (no DA)

In [None]:
def run_baserun(system_dim, nr_steps, spinup_size, valid_size, transient_size, test_run,
                test_size, delta_t, obs_location_count, obs_sd, sigma_bg, sigma_obs,
                analysis_window, analysis_time_in_window, random_seed):
    np_rng = np.random.default_rng(random_seed)
    jax.clear_backends()


    ### Nature Run
    nature_run = dab.data.Lorenz96(system_dim=system_dim, delta_t=delta_t, 
                                   store_as_jax=True, random_seed=random_seed)
    x0_initial = np_rng.normal(size=system_dim, scale=1)
    nature_run.generate(n_steps=nr_steps, x0 = x0_initial)
    
    nr_spinup, nr_valid, nr_transient_and_test = nature_run.split_train_valid_test(
        spinup_size, valid_size, transient_size + test_size)
    nr_transient, nr_test, _ = nr_transient_and_test.split_train_valid_test(
        transient_size, test_size, 0)

    if not test_run:
        nr_eval = nr_valid
    else:
        nr_eval = nr_test
  
    ### Forecast Model
    model_l96 = dab.data.Lorenz96(system_dim=system_dim, delta_t=delta_t,
                                  store_as_jax=True, random_seed=random_seed)


    ### Run
    da_time_start = timer()
    cur_tstep = 0
    x0_original = nr_eval.values[cur_tstep] + np_rng.normal(size=(system_dim,), scale=1)
    
    model_l96.generate(x0=x0_original, n_steps=nr_eval.time_dim)
    
    rmse = np.sqrt(np.mean(np.square(model_l96.values[:-20] - nr_eval.values[:-20])))
    da_time = timer()-da_time_start
    out_vec = dab.vector.StateVector(values=model_l96.values, times=model_l96.times, store_as_jax=True)
    
    return out_vec, rmse, nr_eval, da_time

# Run on Validation

### System size experiments - Validation Set

In [None]:
# Backprop 4D Var w/ Approximate Hessian
out_dict_list_bp = []
random_seed_list = np.arange(num_runs)
system_dim_list = [6, 20, 36, 72, 144, 256]

for system_dim in system_dim_list:
    
    obs_location_count = int(system_dim/2)
    best_results_filtered = best_results_system_dim.loc[
        best_results_system_dim['system_dim']==system_dim]
    learning_rate = best_results_filtered['config/lr'].values[0]
    lr_decay = best_results_filtered['config/lr_decay'].values[0]

    for i in range(num_runs):
        
        random_seed = system_dim + random_seed_list[i]
        
        run_dict = dict(
            system_dim=system_dim, 
            nr_steps= nr_steps,
            spinup_size=spinup_size,
            valid_size=valid_size,
            transient_size=transient_size,
            test_size=test_size,
            test_run=False,
            delta_t=delta_t,
            obs_location_count=obs_location_count,
            obs_sd=obs_sd,
            sigma_bg=sigma_bg,
            sigma_obs=sigma_obs,
            analysis_window=analysis_window,
            analysis_time_in_window=analysis_time_in_window,
            random_seed=random_seed,
            num_iters=num_iters,
            learning_rate=learning_rate,
            lr_decay = lr_decay)
        
        out_bp, error_bp, obs_vec_l96, nr_eval, da_time = run_backprop_4dvar(**run_dict)
        
        run_dict['time'] = da_time 
        run_dict['rmse'] = error_bp
        run_dict['run_num'] = i
        print('Run {}, Time = {}'.format(i,run_dict['time']))
        out_dict_list_bp.append(run_dict)
        
bp_df_time =  pd.DataFrame(out_dict_list_bp)
bp_df_time.to_csv('./out/l96/bp_df_time_v33_withraytune_hessian_iters3_val.csv')

In [None]:
# 4D Var
out_dict_list_4dvar = []

random_seed_list = np.arange(num_runs)
system_dim_list = [6, 20, 36, 72, 144, 256]

for system_dim in system_dim_list:
    
    for i in range(num_runs):
        
        obs_location_count = int(system_dim/2)
        random_seed = system_dim + random_seed_list[i]
        
        run_dict = dict(system_dim=system_dim, 
            nr_steps=nr_steps,
            spinup_size=spinup_size,
            valid_size=valid_size,
            transient_size=transient_size,
            test_size=test_size,
            test_run=False,
            delta_t=delta_t,
            obs_location_count=obs_location_count,
            obs_sd=obs_sd,
            sigma_bg=sigma_bg,
            sigma_obs=sigma_obs,
            analysis_window=analysis_window,
            analysis_time_in_window=analysis_time_in_window,
            random_seed=random_seed,
            n_outer_loops = n_outer_loops)
        
        out_4dvar, error_4dvar, obs_vec_l96, nr_eval, da_time = run_4dvar(**run_dict)
        run_dict['time'] = da_time
        run_dict['rmse'] = error_4dvar
        run_dict['run_num'] = i
        
        print('Run {}, Time = {}'.format(i,run_dict['time']))
        out_dict_list_4dvar.append(run_dict)
        
var4d_df_time =  pd.DataFrame(out_dict_list_4dvar)
var4d_df_time.to_csv('./out/l96/var4d_df_time_v33_3outer_val.csv')

### Experiments varying number of observations and obs error - Validation Set

In [None]:
# Backprop 4D Var w/ Approximate Hessian
out_dict_list_bp_obs = []

num_runs = 1
num_iters=3
system_dim = 36
random_seed = system_dim
num_obs_list = [6, 12, 18, 24, 30, 36]
obs_error_list = np.arange(0.1, 1.05, 0.1)

for obs_location_count in num_obs_list:
    for obs_sd in obs_error_list:
        
        best_results_filtered = best_results_heatmap.loc[
            (best_results_heatmap['obs_sd'].round(2)==round(obs_sd, 2)) & 
            (best_results_heatmap['num_obs']==obs_location_count)]
        learning_rate = best_results_filtered['config/lr'].values[0]
        lr_decay = best_results_filtered['config/lr_decay'].values[0]
        
        for i in range(num_runs):
            run_dict = dict(system_dim=system_dim, 
                nr_steps=nr_steps,
                spinup_size=spinup_size,
                valid_size=valid_size,
                transient_size=transient_size,
                test_size=test_size,
                test_run=False,
                delta_t=delta_t,
                obs_location_count=obs_location_count,
                obs_sd=obs_sd,
                sigma_bg=obs_sd/1.5,
                sigma_obs=obs_sd*1.25,
                analysis_window=analysis_window,
                analysis_time_in_window=analysis_time_in_window,
                random_seed=random_seed,
                num_iters=num_iters,
                learning_rate=learning_rate,
                lr_decay=lr_decay)
            
            out_bp, error_bp, obs_vec_l96, nature_run, da_time = run_backprop_4dvar(**run_dict)
            run_dict['time'] = da_time
            run_dict['rmse'] = error_bp
            run_dict['run_num'] = i
            
            print('Run {}, Time = {}'.format(i,run_dict['time']))
            print(obs_location_count, obs_sd, error_bp)
            out_dict_list_bp_obs.append(run_dict)
            
bp_df_obs =  pd.DataFrame(out_dict_list_bp_obs)
bp_df_obs.to_csv('./out/l96/bp_df_obs_v33_hessian_raytune_alldims_val.csv')

In [None]:
# 4D Var

out_dict_list_4dvar_obs = []

num_runs = 1
system_dim = 36
random_seed = system_dim
num_obs_list = [6, 12, 18, 24, 30, 36]
obs_error_list = np.arange(0.1, 1.05, 0.1)

for obs_location_count in num_obs_list:
    for obs_sd in obs_error_list:
        for i in range(num_runs):
            
            run_dict = dict(system_dim=system_dim, 
                nr_steps=nr_steps,
                spinup_size=spinup_size,
                valid_size=valid_size,
                transient_size=transient_size,
                test_size=test_size,
                test_run=False,
                delta_t=delta_t,
                obs_location_count=obs_location_count,
                obs_sd=obs_sd,
                sigma_bg=obs_sd/1.5,
                sigma_obs=obs_sd*1.25,
                analysis_window=analysis_window,
                analysis_time_in_window=analysis_time_in_window,
                random_seed=random_seed,
                n_outer_loops = n_outer_loops)
            
            out_4dvar, error_4dvar, obs_vec_l96, nature_run, da_time = run_4dvar(**run_dict)
            run_dict['time'] = da_time
            run_dict['rmse'] = error_4dvar
            run_dict['run_num'] = i
            
            print('Run {}, Time = {}'.format(i,run_dict['time']))
            out_dict_list_4dvar_obs.append(run_dict)
            
var4d_df_obs =  pd.DataFrame(out_dict_list_4dvar_obs)
var4d_df_obs.to_csv('./out/l96/var4d_df_obs_v33_3outer_val.csv')

# Repeating everything on the test set    

### System Size Experiments - Test Set

In [None]:
num_runs = 50

In [None]:
# Backprop 4D Var w/ Approximate Hessian

out_dict_list_bp = []

random_seed_list = np.arange(num_runs)+100
system_dim_list = [6, 20, 36, 72, 144, 256]

for system_dim in system_dim_list:
    
    obs_location_count = int(system_dim/2)
    best_results_filtered = best_results_system_dim.loc[best_results_system_dim['system_dim']==system_dim]
    learning_rate = best_results_filtered['config/lr'].values[0]
    lr_decay = best_results_filtered['config/lr_decay'].values[0]

    for i in range(num_runs):
        
        random_seed = system_dim + random_seed_list[i]
        
        run_dict = dict(
            system_dim=system_dim, 
            nr_steps= nr_steps,
            spinup_size=spinup_size,
            valid_size=valid_size,
            transient_size=transient_size,
            test_size=test_size,
            test_run=True,
            delta_t=delta_t,
            obs_location_count=obs_location_count,
            obs_sd=obs_sd,
            sigma_bg=sigma_bg,
            sigma_obs=sigma_obs,
            analysis_window=analysis_window,
            analysis_time_in_window=analysis_time_in_window,
            random_seed=random_seed,
            num_iters=num_iters,
            learning_rate=learning_rate,
            lr_decay = lr_decay)
        
        out_bp, error_bp, obs_vec_l96, nr_eval, da_time = run_backprop_4dvar(**run_dict)
        
        run_dict['time'] = da_time 
        run_dict['rmse'] = error_bp
        run_dict['run_num'] = i
        print('Run {}, Time = {}'.format(i,run_dict['time']))
        out_dict_list_bp.append(run_dict)
        
bp_df_time =  pd.DataFrame(out_dict_list_bp)
bp_df_time.to_csv('./out/l96/bp_df_time_v33_withraytune_hessian_iters3_test.py')

In [None]:
# 4DVar

out_dict_list_4dvar = []

random_seed_list = np.arange(num_runs)+100
system_dim_list = [6, 20, 36, 72, 144, 256]

for system_dim in system_dim_list:
    for i in range(num_runs):
        obs_location_count = int(system_dim/2)
        random_seed = system_dim + random_seed_list[i]
        run_dict = dict(system_dim=system_dim, 
            nr_steps=nr_steps,
            spinup_size=spinup_size,
            valid_size=valid_size,
            transient_size=transient_size,
            test_size=test_size,
            test_run=True,
            delta_t=delta_t,
            obs_location_count=obs_location_count,
            obs_sd=obs_sd,
            sigma_bg=sigma_bg,
            sigma_obs=sigma_obs,
            analysis_window=analysis_window,
            analysis_time_in_window=analysis_time_in_window,
            random_seed=random_seed,
            n_outer_loops = n_outer_loops)
        out_4dvar, error_4dvar, obs_vec_l96, nr_eval, da_time = run_4dvar(**run_dict)
        run_dict['time'] = da_time
        run_dict['rmse'] = error_4dvar
        run_dict['run_num'] = i
        print('Run {}, Time = {}'.format(i,run_dict['time']))
        out_dict_list_4dvar.append(run_dict)
        
var4d_df_time =  pd.DataFrame(out_dict_list_4dvar)
var4d_df_time.to_csv('./out/l96/var4d_df_time_v33_3outer_test.csv')

### Experiments varying number of observations and obs error - Test Set

In [None]:
# Backprop 4D Var w/ Approximate Hessian

num_runs = 30
system_dim = 36
num_obs_list = [6, 12, 18, 24, 30, 36]
obs_error_list = [0.1, 0.2, 0.3, 0.4, 0.5, 0.75, 1.0, 1.5, 2.0]
out_csv = './out/l96/bp_df_obs_v34_hessian_raytune_30runs_test.csv'

for obs_location_count in num_obs_list:
    for obs_sd in obs_error_list:
        
        best_results_filtered = best_results_heatmap.loc[
            (best_results_heatmap['obs_sd'].round(2)==round(obs_sd, 2)) & 
            (best_results_heatmap['num_obs']==obs_location_count)]
        learning_rate = best_results_filtered['config/lr'].values[0]
        lr_decay = best_results_filtered['config/lr_decay'].values[0]
        out_dict_list_bp_obs = []
        
        for i in range(num_runs):
            
            random_seed = 99 + i
            
            run_dict = dict(system_dim=system_dim, 
                nr_steps=nr_steps,
                spinup_size=spinup_size,
                valid_size=valid_size,
                transient_size=transient_size,
                test_size=test_size,
                test_run=True,
                delta_t=delta_t,
                obs_location_count=obs_location_count,
                obs_sd=obs_sd,
                sigma_bg=obs_sd/1.5,
                sigma_obs=obs_sd*1.25,
                analysis_window=analysis_window,
                analysis_time_in_window=analysis_time_in_window,
                random_seed=random_seed,
                num_iters=num_iters,
                learning_rate=learning_rate,
                lr_decay=lr_decay)
            
            out_bp, error_bp, obs_vec_l96, nature_run, da_time = run_backprop_4dvar(**run_dict)
            
            run_dict['time'] = da_time
            run_dict['rmse'] = error_bp
            run_dict['run_num'] = i
                        
            out_dict_list_bp_obs.append(run_dict)
            
        bp_df_obs =  pd.DataFrame(out_dict_list_bp_obs)
        bp_df_obs.to_csv(out_csv, mode='a',
                            header=(not os.path.isfile(out_csv)),
                            index=False)

In [None]:
# 4DVar

num_runs = 30
system_dim = 36
num_obs_list = [6, 12, 18, 24, 30, 36]
obs_error_list = [0.1, 0.2, 0.3, 0.4, 0.5, 0.75, 1.0, 1.5, 2.0]
out_csv = './out/l96/var4d_df_obs_v34_3outer_30runs_test.csv'


for obs_location_count in num_obs_list:
    for obs_sd in obs_error_list:
        
        out_dict_list_4dvar_obs = []
        
        for i in range(num_runs):
            
            random_seed = 99 + i
            
            run_dict = dict(system_dim=system_dim, 
                nr_steps=nr_steps,
                spinup_size=spinup_size,
                valid_size=valid_size,
                transient_size=transient_size,
                test_size=test_size,
                test_run=True,
                delta_t=delta_t,
                obs_location_count=obs_location_count,
                obs_sd=obs_sd,
                sigma_bg=obs_sd/1.5,
                sigma_obs=obs_sd*1.25,
                analysis_window=analysis_window,
                analysis_time_in_window=analysis_time_in_window,
                random_seed=random_seed,
                n_outer_loops = n_outer_loops)
            
            out_4dvar, error_4dvar, obs_vec_l96, nature_run, da_time = run_4dvar(**run_dict)
            
            run_dict['time'] = da_time
            run_dict['rmse'] = error_4dvar
            run_dict['run_num'] = i
            
            out_dict_list_4dvar_obs.append(run_dict)
            
        var4d_df_obs =  pd.DataFrame(out_dict_list_4dvar_obs)
        var4d_df_obs.to_csv(out_csv, mode='a',
                            header=(not os.path.isfile(out_csv)),
                            index=False)

# Run 36D example and save statevectors for later visualization

In [None]:
# Nature run and no-DA baserun

system_dim = 36
i = 0
obs_location_count = 18
obs_sd = 0.4
random_seed = 99 + i

run_dict = dict(system_dim=system_dim, 
                nr_steps=nr_steps,
                spinup_size=spinup_size,
                valid_size=valid_size,
                transient_size=transient_size,
                test_size=test_size,
                test_run=True,
                delta_t=delta_t,
                obs_location_count=obs_location_count,
                obs_sd=obs_sd,
                sigma_bg=obs_sd/1.5,
                sigma_obs=obs_sd*1.25,
                analysis_window=analysis_window,
                analysis_time_in_window=analysis_time_in_window,
                random_seed=random_seed)

out_baserun, error_baserun, nature_run, da_time = run_baserun(**run_dict)

run_dict['time'] = da_time
run_dict['rmse'] = error_baserun
run_dict['run_num'] = i

print('Run {}, Time = {}'.format(i,run_dict['time']))
print(obs_location_count, obs_sd, error_baserun)

# Write statevecs
out_file = './out/l96/l96_baserun_results_18obs_36dim_sd04_v1.pkl'
with open(out_file, 'wb') as f: 
     pickle.dump(out_baserun, f) 
f.close()

out_file = './out/l96/l96_baserun_nr_18obs_36dim_sd04_v1.pkl'
with open(out_file, 'wb') as f: 
     pickle.dump(nature_run, f) 
f.close()

In [None]:
# 4DVar

system_dim = 36
i = 0
obs_location_count = 18
obs_sd = 0.4
random_seed = 99 + i

run_dict = dict(system_dim=system_dim, 
                nr_steps=nr_steps,
                spinup_size=spinup_size,
                valid_size=valid_size,
                transient_size=transient_size,
                test_size=test_size,
                test_run=True,
                delta_t=delta_t,
                obs_location_count=obs_location_count,
                obs_sd=obs_sd,
                sigma_bg=obs_sd/1.5,
                sigma_obs=obs_sd*1.25,
                analysis_window=analysis_window,
                analysis_time_in_window=analysis_time_in_window,
                random_seed=random_seed,
                n_outer_loops = n_outer_loops)

out_4dvar, error_4dvar, obs_vec_l96, nature_run, da_time = run_4dvar(**run_dict)

run_dict['time'] = da_time
run_dict['rmse'] = error_4dvar
run_dict['run_num'] = i

print('Run {}, Time = {}'.format(i,run_dict['time']))
print(obs_location_count, obs_sd, error_4dvar)

# Write statevecs
out_file = './out/l96/l96_4dvar_results_18obs_36dim_sd04_v1.pkl'
with open(out_file, 'wb') as f: 
     pickle.dump(out_4dvar, f) 
f.close()

out_file = './out/l96/l96_4dvar_nr_18obs_36dim_sd04_v1.pkl'
with open(out_file, 'wb') as f: 
     pickle.dump(nature_run, f) 
f.close()

out_file = './out/l96/l96_4dvar_obsvec_18obs_36dim_sd04_v1.pkl'
with open(out_file, 'wb') as f: 
     pickle.dump(obs_vec_l96, f) 
f.close()

In [None]:
# Backprop 4DVar w/ approximate Hessian

system_dim = 36
i = 0
obs_location_count = 18
obs_sd = 0.4

best_results_filtered = best_results_heatmap.loc[
    (best_results_heatmap['obs_sd'].round(2)==round(obs_sd, 2)) & 
    (best_results_heatmap['num_obs']==obs_location_count)]
learning_rate = best_results_filtered['config/lr'].values[0]
lr_decay = best_results_filtered['config/lr_decay'].values[0]

random_seed = 99 + i

run_dict = dict(system_dim=system_dim, 
    nr_steps=nr_steps,
    spinup_size=spinup_size,
    valid_size=valid_size,
    transient_size=transient_size,
    test_size=test_size,
    test_run=True,
    delta_t=delta_t,
    obs_location_count=obs_location_count,
    obs_sd=obs_sd,
    sigma_bg=obs_sd/1.5,
    sigma_obs=obs_sd*1.25,
    analysis_window=analysis_window,
    analysis_time_in_window=analysis_time_in_window,
    random_seed=random_seed,
    num_iters=num_iters,
    learning_rate=learning_rate,
    lr_decay=lr_decay)

out_bp, error_bp, obs_vec_l96, nature_run, da_time = run_backprop_4dvar(**run_dict)
run_dict['time'] = da_time
run_dict['rmse'] = error_bp
run_dict['run_num'] = i

print('Run {}, Time = {}'.format(i,run_dict['time']))
print(obs_location_count, obs_sd, error_bp)

# Write statevecs
out_file = './out/l96/l96_bp_results_18obs_36dim_sd04_v1.pkl'
with open(out_file, 'wb') as f:  
     pickle.dump(out_bp, f) 
f.close()