# Functions for neogrowth model

## Description

store functions for the file neogrowth_model.ipynb

## I. Time iteration

Time iteration code is from QuantEcon website.

Source: https://python.quantecon.org/coleman_policy_iter.html


In [1]:
def v_star(y, α, β, μ):
    """
    True value function
    """
    c1 = np.log(1 - α * β) / (1 - β)
    c2 = (μ + α * np.log(α * β)) / (1 - α)
    c3 = 1 / (1 - β)
    c4 = 1 / (1 - α * β)
    return c1 + c2 * (c3 - c4) + c4 * np.log(y)

def σ_star(y, α, β):
    """
    True optimal policy
    """
    return (1 - α * β) * y

In [2]:
opt_growth_data = [
    ('α', float64),          # Production parameter
    ('β', float64),          # Discount factor
    ('μ', float64),          # Shock location parameter
    ('s', float64),          # Shock scale parameter
    ('grid', float64[:]),    # Grid (array)
    ('shocks', float64[:])   # Shock draws (array)
]

@jitclass(opt_growth_data)
class OptimalGrowthModel:

    def __init__(self,
                α=0.4, 
                β=0.96, 
                μ=0,
                s=0.5,
                grid_max=4,
                grid_size=120,
                shock_size=250,
                seed=1234):

        self.α, self.β, self.μ, self.s = α, β, μ, s

        # Set up grid
        self.grid = np.linspace(1e-5, grid_max, grid_size)

        # Store shocks (with a seed, so results are reproducible)
        np.random.seed(seed)
        self.shocks = np.exp(μ + s * np.random.randn(shock_size))
       

    def f(self, k):
        "The production function"
        return k**self.α
       

    def u(self, c):
        "The utility function"
        return np.log(c)

    def f_prime(self, k):
        "Derivative of f"
        return self.α * (k**(self.α - 1))


    def u_prime(self, c):
        "Derivative of u"
        return 1/c

    def u_prime_inv(self, c):
        "Inverse of u'"
        return 1/c



NameError: name 'float64' is not defined

In [3]:
@njit
def euler_diff(c, σ, y, og):
    """
    Set up a function such that the root with respect to c,
    given y and σ, is equal to Kσ(y).

    """

    β, shocks, grid = og.β, og.shocks, og.grid
    f, f_prime, u_prime = og.f, og.f_prime, og.u_prime

    # First turn σ into a function via interpolation
    σ_func = lambda x: interp(grid, σ, x)

    # Now set up the function we need to find the root of.
    vals = u_prime(σ_func(f(y - c) * shocks)) * f_prime(y - c) * shocks
    return u_prime(c) - β * np.mean(vals)

NameError: name 'njit' is not defined

In [4]:
@njit
def K(σ, og):
    """
    The Coleman-Reffett operator

     Here og is an instance of OptimalGrowthModel.
    """

    β = og.β
    f, f_prime, u_prime = og.f, og.f_prime, og.u_prime
    grid, shocks = og.grid, og.shocks

    σ_new = np.empty_like(σ)
    for i, y in enumerate(grid):
        # Solve for optimal c at y
        c_star = brentq(euler_diff, 1e-12, y-1e-12, args=(σ, y, og))[0]
        σ_new[i] = c_star

    return σ_new


NameError: name 'njit' is not defined

In [5]:
def solve_model_time_iter(model,    # Class with model information
                          σ,        # Initial condition
                          tol=1e-8,
                          max_iter=10000,
                          verbose=True,
                          print_skip=25):

    # Set up loop
    i = 0
    error = tol + 1

    while i < max_iter and error > tol:
        σ_new = K(σ, model)
        error = np.max(np.abs(σ - σ_new))
        i += 1
        if verbose and i % print_skip == 0:
            print(f"Error at iteration {i} is {error}.")
        σ = σ_new

    if error > tol:
        print("Failed to converge!")
    elif verbose:
        print(f"\nConverged in {i} iterations.")

    return σ_new

## II. All-in-One

$$\mathcal{L}_2(\theta) = \frac{1}{T} \sum_{t=1}^{T} \Big( f(s_t,\epsilon_{1,t}|\theta) f(s_t,\epsilon_{2,t}|\theta) \Big) $$


In [6]:
def sparse_mx_to_torch_sparse_tensor(sparse_mx):
    """Convert a scipy sparse matrix to a torch sparse tensor."""
    sparse_mx = sparse_mx.tocoo().astype(np.float32)
    indices = torch.from_numpy(
        np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64))
    values = torch.from_numpy(sparse_mx.data)
    shape = torch.Size(sparse_mx.shape)
    return torch.sparse.FloatTensor(indices, values, shape) 

def convert_sparse_matrix_to_sparse_tensor(X):
    """Convert a scipy sparse matrix to a tf sparse tensor."""
    coo = X.tocoo()
    indices = np.mat([coo.row, coo.col]).transpose()
    return tf.SparseTensor(indices, coo.data, coo.shape)

# Root mean square error
class RMSELoss(nn.Module):
    def __init__(self):
        super().__init__()
        self.mse = nn.MSELoss()
        
    def forward(self,yhat,y):
        return torch.sqrt(self.mse(yhat,y))
    
# Gaussian quadrature rule
# See: https://chaospy.readthedocs.io/en/master/api/chaospy.generate_quadrature.html
def dist(order, distribution, rule = "gaussian", sp=True):
    #order=int(n**(1/d))-1
    x, w = chaospy.generate_quadrature(order, distribution, rule=(rule), sparse=sp)
    return x, w

def create_W_expanded_matrix(M, N):
    """
    create a sparse matrix W_expanded with U repeate M times on the diagonal elements
    where U is an upper triangular matrix with 0 on the diagonal and 1 on the other upper elements
    W_expanded is a sparse torch matrix
    """
    A_expanded = np.ones((N, N))
    U = np.triu(A_expanded) # upper trianguler matrix of ones
    np.fill_diagonal(U, 0) #fill diagonal with 0
    U = sparse.csr_matrix(U) # convert to sparse
    # Unity matrix of size (M*M)
    B = sparse.csr_matrix(np.eye(M, M))
    W_expanded = sparse_mx_to_torch_sparse_tensor(sparse.kron(B, U))
    return W_expanded

NameError: name 'nn' is not defined

In [None]:
def truncated_normal(mean, std, lower, upper, len_series):
    draws = torch.empty(len_series, 1)
    nn.init.trunc_normal_(draws, mean, std, lower, upper)
    return draws

In [7]:
def sim_shocks(e_distribution, μ_e, σ_e, trunc_low, trunc_high, len_series):
    """
    Function to simulate shocks
    """
    # Generate shocks
    if e_distribution == "Normal":
        e_shock_series = torch.normal(mean=0, std=σ_e, size=(len_series,)).unsqueeze(1)
    elif e_distribution == "T":
        m = torch.distributions.studentT.StudentT(torch.tensor([3.0]))
        e_shock_series = m.sample([len_series]).squeeze(1)
    elif e_distribution == "Lognormal":
        # Standard Normal iid shocks
        e_shock_series = torch.normal(mean=0, std=σ_e, size=(len_series,)).unsqueeze(1)
        # Transform to lognormal
        e_shock_series = torch.exp(μ_e + σ_e * e_shock_series)
    elif e_distribution == "Lognormal_2":
        # Standard Normal iid shocks
        e_shock_series = torch.normal(mean=0, std=1.0, size=(len_series,)).unsqueeze(1)
        # Transform to lognormal
        e_shock_series = torch.exp(μ_e + σ_e * e_shock_series)
    elif e_distribution == "Trunc_Lognormal":
        # Trunc normal
        e_shock_series = truncated_normal(μ_e, σ_e, trunc_low, trunc_high, len_series)
        # Transform to lognormal
        e_shock_series = torch.exp(e_shock_series)
    elif e_distribution == "Beta":
        m = torch.distributions.beta.Beta(torch.tensor([0.5]), torch.tensor([0.5]))
        e_shock_series = torch.exp(m.sample((len_series,)))
    else:
        raise(f"Distribution {e_distribution} unknown.")
    return e_shock_series

def simulate_shocks(params, len_series):
    return sim_shocks(params.e_distribution, params.μ_e, params.σ_e, params.trunc_low, params.trunc_high, len_series)
    

In [8]:
def sim_states(params, len_series):
    if params.x_distribution == "Uniform":
        if params.use_Sobol_T == False:
            x = ((params.x_low - params.x_high) * torch.rand(len_series) + params.x_high).unsqueeze(1)
        else:
            #Very slow if T is large
            x = ((params.x_low - params.x_high) * params.soboleng.draw(len_series) + params.x_high)
    elif params.x_distribution == "Uniform_centered":
        x = ((params.x_low - params.x_high) * torch.rand(len_series) + params.x_high).unsqueeze(1)
    elif params.x_distribution == "Lognormal_2":
        # log(y_t) is Normal(self.α/(1 - self.α)*np.log(self.α*self.β), (self.σ_e**2 /(1 - self.α**2))
        log_x = torch.normal(mean=params.mm, std=params.ss, size=(len_series,)).unsqueeze(1)
        # Transform to lognormal
        x = torch.exp(log_x)
    elif params.x_distribution == "Lognormal_3":
        # log(y_t) is Normal, with mean (self.α/(1 - self.α)*np.log(self.α*self.β)
        # std dev is stored in params.ss_Lognormal_3
        log_x = torch.normal(mean=params.mm, std=params.ss_Lognormal_3, size=(len_series,)).unsqueeze(1)
        # Transform to lognormal
        x = torch.exp(log_x)
    elif params.x_distribution == "Normal":
        x = torch.normal(mean=0, std=params.σ_x, size=(len_series,)).unsqueeze(1)
    else:
        raise(f"Distribution {params.x_distribution} unknown.")
    return x


In [9]:
def Ξ_torch(model, params): # objective function if using all-in-one

    # randomly drawing current states    
    x = sim_states(params, params.T)
    
    # Draws 2 series of independent shocks
    e1 = simulate_shocks(params, params.T)
    e2 = simulate_shocks(params, params.T)
    
    # residuals for n random grid points under 2 realizations of shocks
    R1 = Residuals_torch(model, params, x, e1)
    R2 = Residuals_torch(model, params, x, e2)

    # construct all-in-one expectation operator
    R_squared = R1*R2
    
    # V1. give a summary of all the draws:
    return torch.mean(R_squared)

def Residuals_torch(model, params, y, e_r, version_resid = 6):
    # consumption today
    c = model(y)
    
    # implies some investment
    investment = y - c
    # investment scaled by shock
    state_tomorrow = params.f(investment)*e_r
    
    # consumption tomorrow
    # c_tomorrow = c_share_tomorrow*state_tomorrow 
    c_tomorrow = model(state_tomorrow)
    
    if version_resid < 5:
        #LHS
        LHS = params.u_prime(c)
        #RHS
        vals = params.u_prime(c_tomorrow)*params.f_prime(investment)*e_r
        RHS = params.β * vals

    if version_resid == 1: #No rescaling
        R = (RHS - LHS)
    elif version_resid == 2: #V2: good scaling. Default choice.
        R = (RHS - LHS)/(0.5*RHS + 0.5*LHS)
    elif version_resid == 3: #Other rescaling
        R = 1 - (LHS/RHS)
    elif version_resid == 4: #Other rescaling
        R = 1 - (RHS/LHS)
    elif version_resid == 5: #Simplify
        R = 1 - params.α*params.β*(y/investment)
    elif version_resid == 6: #logs
        R = torch.log(torch.tensor(params.α*params.β)) + torch.log(c) - torch.log(c_tomorrow) + (params.α-1)*torch.log(investment) + torch.log(e_r)
    return R


In [2]:
def approx_linear(y, α, β, gamma):
    # Almost the true solution
    #return (1 - α*β + gamma*torch.log(y))*y
     # Almost the true solution, correct for the mean error
    return (1 - α*β + gamma*torch.log(y))*y - (gamma/(1 - α*β))*(α/(1 - α))*torch.log(α*β)
    #return (1 - α*β + gamma*y)*y
    #return torch.exp(torch.log(torch.tensor([1 - α*β])) + ((1 - α*β + gamma)/(1 - α*β))*torch.log(y))
    
def Residuals_torch_linear(model, params, y, e_r, gamma, version_resid = 6):
    """
    Use an approximation of the Neural net.
    Good when close to final solution
    """
    # y: current state
    # e: shock
    # consumption today
    c = approx_linear(y, params.α, params.β, gamma)
    LHS = params.u_prime(c)
    
    # implies some investment
    investment = y - c
    
    # investment scaled by shock
    state_tomorrow = params.f(investment)*e_r
    #state_tomorrow =  (investment**params.α)*e_r
    
    # consumption tomorrow
    c_tomorrow = approx_linear(state_tomorrow, params.α, params.β, gamma)
    
    if version_resid == 1: #No rescaling
        R = (RHS - LHS)
    elif version_resid == 2: #V2: good scaling. Default choice.
        R = (RHS - LHS)/(0.5*RHS + 0.5*LHS)
    elif version_resid == 3: #Other rescaling
        R = 1 - (LHS/RHS)
    elif version_resid == 4: #Other rescaling
        R = 1 - (RHS/LHS)
    elif version_resid == 5: #Simplify
        R = 1 - params.α*params.β*(y/investment)
    elif version_resid == 6: #logs
        R = torch.log(torch.tensor(params.α*params.β)) + torch.log(c) - torch.log(c_tomorrow) + (params.α-1)*torch.log(investment) + torch.log(e_r)
    return R


def sim_states_linear(params, len_series, gamma):
    # Simulate approximate ergodic
    if params.x_distribution == "Lognormal_2":
        #mean_ = ((params.α*params.β)/(params.β*(1 - params.α) + gamma))*torch.log(torch.tensor(params.α*params.β))
        #var_ = (params.σ_e**2 * params.β**2)/(params.β**2 - (params.α*params.β - gamma)**2)
        mean_ = (params.α/(1 - params.α))*torch.log(torch.tensor(params.α*params.β))
        var_ = (params.σ_e**2)/(1 - params.α**2)
        std_ = torch.sqrt(torch.tensor([var_]))
        # log(y_t) is Normal
        log_x = torch.normal(mean=mean_.item(), std=std_.item(), size=(len_series,)).unsqueeze(1)
        # return y_t
        x = torch.exp(log_x)
    elif params.x_distribution == "Lognormal_3":
        # std dev is stored in params.ss_Lognormal_3
        log_x = torch.normal(mean=params.mm, std=params.ss_Lognormal_3, size=(len_series,)).unsqueeze(1)
        # Transform to lognormal.
        x = torch.exp(log_x)
    return x

def calculate_variance_gaussian_linear(params, model, nb_draws, grid_M, grid_N, gamma = 1e-6):
    """
    Calculate variance of the loss when joint gaussian assumption holds
    Use approximate solution
    """
    grid_T = torch.tensor(grid_M*grid_N/2)
    grid_N = torch.tensor(grid_N)
    
    # Calculate variance and covariance
    with torch.no_grad(): 
        # State
        ## Draw y_t
        #x = sim_states_linear(params, nb_draws, gamma)
        x = sim_states(params, nb_draws)
        
        e1 = simulate_shocks(params, nb_draws)
        e2 = simulate_shocks(params, nb_draws)
                
        # residuals for n random grid points under 2 realizations of shocks
        R1 = Residuals_torch_linear(model_bcMC, params, x, e1, gamma)
        R2 = Residuals_torch_linear(model_bcMC, params, x, e2, gamma)
        
        # Construct combinations
        # mean
        mean_val = 0.5*torch.mean(R1) + 0.5*torch.mean(R2)
        ## Var
        var_R1 = torch.var(R1)
        var_R2 = torch.var(R2)
        var_val = 0.5*var_R1 + 0.5*var_R2
        
        ## Cov
        cov_val = torch.cov(torch.column_stack((R1, R2)).T)[0,1]
        
        var_L = (1/(grid_T*(grid_N - 1)))*((grid_N**2 - 3*grid_N + 3)*(cov_val**2) + (2*(grid_N - 2)*cov_val + var_val)*var_val + 2*(grid_N - 1)*(var_val + (grid_N - 1)*cov_val)*(mean_val**2))
        
        # Break the variance of the loss into different elements
        # increasing with N
        b1 = (1/grid_T)*((grid_N **2 - 3*grid_N + 3)/(grid_N - 1))*(cov_val**2)
        b2 = (1/grid_T)*(2*(grid_N - 2)/(grid_N - 1))*cov_val*var_val
        b3 =(1/grid_T)*(2*(grid_N - 1))*(cov_val*mean_val**2)
        # decreasing in N
        b4 = (1/grid_T)*(var_val**2)/(grid_N - 1)
        # constant in N
        b5 = (1/grid_T)*2*var_val*mean_val**2

        return var_L, var_val, cov_val, b1, b2, b3, b4, b5

In [1]:
def sim_true_model(α, β, func, e_distribution, μ_e, σ_e, trunc_low, trunc_high, len_series, tol=torch.tensor([1e-6]), mult_std = 2.0):
    """
    Function to simulate the true model
    """
    
    with torch.no_grad():
        y_series = torch.zeros(len_series)
        c_series = torch.zeros(len_series)
        k_series = torch.zeros(len_series)

        # Initialization NS SS:
        y_series[0] = (α*β)**(α/(1 - α))
        c_series[0] = (1.0 - α*β)*y_series[0]
        k_series[0] = α*β*y_series[0]
                
        # Generate shocks
        e_shock_series = sim_shocks(e_distribution, μ_e, σ_e, trunc_low, trunc_high, len_series).squeeze(1) #simulate_shocks(params, len_series)
    
        for t in range(1, len_series):
            c_series[t] = (1.0 - α*β)*y_series[t-1]
            k_series[t] = α*β*y_series[t-1]
            y_series[t] = (k_series[t]**α)*e_shock_series[t] #func(k_series[t])*e_shock_series[t]

    df_series = pd.DataFrame({'y': y_series, 'c': c_series, 'k': k_series, 'e': e_shock_series})
    # calculate CI:
    series_names = ['y', 'c', 'k', 'e']
    list_mean = []
    list_std = []
    list_CI_lower = []
    list_CI_upper = []
    for var in series_names:
        list_mean.append(np.mean(df_series[var]))
        list_std.append(np.std(df_series[var]))
        #CI
        logged = np.log(df_series[var])
        mean_logged = np.mean(logged)
        std_logged = np.std(logged)
        list_CI_lower.append(np.exp(mean_logged - mult_std*std_logged))
        list_CI_upper.append(np.exp(mean_logged + mult_std*std_logged))

    return df_series, series_names, list_mean, list_std, list_CI_lower, list_CI_upper

def simulate_true_model(params, len_series =10000):
    return sim_true_model(params.α, params.β, params.f, params.e_distribution, params.μ_e, params.σ_e,  params.trunc_low, params.trunc_high, len_series) 

NameError: name 'torch' is not defined

## III. bc-MC operator

* Implement the developed formula

$$ \frac{1}{M} \frac{2}{(N)(N-1)} \sum_{m=1}^{M} \sum_{1\leq i < j}^{n} f(s_m, \epsilon_{m}^{(i)})f(s_m, \epsilon_{m}^{(j)})  $$

* This formula can be vectorized as follows:


$$ f' \Big(I_N \otimes U\Big). f $$

In [11]:
def Ξ_torch_MC(model, params): # objective function
    # randomly drawing current states  
    # M draws
    x = sim_states(params, params.M)
    
    # repeat elements N times
    x_repeated = x.repeat_interleave(params.N).unsqueeze(1) #MN*1 matrix
    
    # N draws for each value of x. MN draws
    e_shock = simulate_shocks(params, params.MN)
    
    # residuals for n random grid points under 2 realizations of shocks
    R1 = Residuals_torch(model, params, x_repeated, e_shock).squeeze(1)
    R_squared = torch.mean((2/((params.M)*(params.N)*(params.N - 1)))*torch.matmul(R1.unsqueeze(1).t(), torch.matmul(params.W_expanded, R1.unsqueeze(1))))
   
    return R_squared 


In [None]:
def draw_residual(model, params): # objective function
    """
    Function to draw from residual function
    """
    with torch.no_grad(): 
        x = sim_states(params, params.M)

        # repeat elements N times
        x_repeated = x.repeat_interleave(params.N).unsqueeze(1) #MN*1 matrix

        # N draws for each value of x. MN draws
        e_shock = simulate_shocks(params, params.MN)

        # residuals for n random grid points under 2 realizations of shocks
        R = Residuals_torch(model, params, x_repeated, e_shock).squeeze(1)
    
    return R

In [None]:
def evaluate_accuracy_pytorch_frozen_Gaussian(model, params, debug = False, distance_f = torch.abs):
    """
    Function to evaluate the accuracy using Monte Carlo for the expectation
    Use a pre-determined series of shocks to approximate the expectations
    """
    with torch.no_grad(): 
        # (M,1)
        # consumption today
        if debug == True:
            c = params.true_function(params.xvec_test_torch)
        else:
            c = model(params.xvec_test_torch) 
        u_prime_c = params.u_prime(c)
        #print(c.shape)
        #print(LHS.shape)

        # implies some investment
        # (M,1)
        investment = params.xvec_test_torch - c
        #print(investment.shape)

        # repeat values
        # shape (MN, 1)
        c_repeated = c.repeat_interleave(params.n_nodes).unsqueeze(1)
        u_prime_c_repeated  = u_prime_c.repeat_interleave(params.n_nodes).unsqueeze(1)
        investment_repeated = investment.repeat_interleave(params.n_nodes).unsqueeze(1)
        #print(c_repeated.shape)
        #print(investment_repeated.shape)

        # shape (MN, 1)
        # repeat nodes
        nodes_torch_repeated = params.nodes_torch.repeat(len(c)).unsqueeze(1)

        # investment scaled by shock (MN, 1)
        state_tomorrow = (params.f(investment_repeated)*nodes_torch_repeated).float()
        #print(state_tomorrow.shape)

        if debug == True:
            c_tomorrow = params.true_function(state_tomorrow)
        else:
            c_tomorrow = model(state_tomorrow)
        #print(c_tomorrow.shape)
        vals = (params.u_prime(c_tomorrow)/u_prime_c_repeated)*params.α*(state_tomorrow/investment_repeated)
        #print(vals.shape)

        # (100, 1)
        expect_t = params.β*torch.sparse.mm(params.W_gaussian, vals).squeeze(1)
        #print(expect_t.shape)
        #print(torch.mean(expect_t))
        euler_resid = distance_f(1.0 - (1.0/expect_t))

        #print(torch.mean(euler_resid)) 
        # Euler error: 
        return euler_resid.numpy()


## Optimal choice of training parameters

### Monte Carlo estimator: optimal choice of M and N


In [None]:
def compute_grad_norm(parameters, norm_type=2.0):
    """
    Compute norm over gradients of model parameters.

    :param parameters:
        the model parameters for gradient norm calculation. Iterable of
        Tensors or single Tensor
    :param norm_type:
        type of p-norm to use

    :returns:
        the computed gradient norm
    """
    if isinstance(parameters, torch.Tensor):
        parameters = [parameters]
    parameters = [p for p in parameters if p is not None and p.grad is not None]
    total_norm = 0
    for p in parameters:
        param_norm = p.grad.data.norm(norm_type)
        total_norm += param_norm.item() ** norm_type
    return total_norm ** (1.0 / norm_type) 

# Calculate the variance of the loss for several choices of M and N
# Start with a fresh model. Then train it for some iterations.
# Then calculate variance of loss
def calculate_variance_loss(params, nb_epoch_opt_M_N, nb_rep, 
                            nb_draws_loss, grid_M, grid_N,
                            norm_chosen = 2.0, freq_accuracy=1000, 
                            calculate_variance_gradient = False,
                            initial_guess = [1.0, 1.0]):
    # To store results
    df_variance_loss = pd.DataFrame()
    print("Norm chosen: {}".format(norm_chosen))
    
    for k in range(0, nb_rep):
        #-----------------------------------------
        # A. Train the model for nb_epoch_opt_M_N
        #-----------------------------------------
        # Initialize a network
        print("Rep {} / {}. Training a model for {} Iterations".format(int(k), nb_rep, nb_epoch_opt_M_N))
        model_opt_M_N = NeuralNetwork().to(device) # New model
        
        # Set initial value
        set_initial_values(model_opt_M_N, initial_guess[0], initial_guess[1])
        
        # Training mode
        model_opt_M_N.train()

        if params.optimizer == "Adam":
            optimizer = torch.optim.Adam(model_opt_M_N.parameters(), lr=params.lr, eps=1e-07, betas=(0.9, 0.999)) 
        elif params.optimizer == "SGD":
            optimizer = torch.optim.SGD(model_opt_M_N.parameters(), params.lr)
        elif params.optimizer == "SWA":
            base_opt = torch.optim.Adam(model_opt_M_N.parameters(), lr=params.lr, eps=1e-07, betas=(0.9, 0.999)) 
            optimizer = SWA(base_opt, swa_start=params.swa_start, swa_freq=params.swa_freq, swa_lr=params.lr)
        else:
            raise("optimizer unknown")

        scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=params.freq_gamma)
        list_perc_abs_error_opt_M_N = [] #store abs value percentage error
        list_perc_abs_error_opt_M_N_i = []
        loss_epochs_opt_M_N = torch.zeros(nb_epoch_opt_M_N)

        for i in range(0, nb_epoch_opt_M_N):
            
            optimizer.zero_grad() #clear gradient
            
            loss = Ξ_torch_MC(model_opt_M_N, params) #loss 
            loss_epochs_opt_M_N[[i]] = float(loss.item())

            # Backpropagation
            loss.backward()
            optimizer.step()

            if i % freq_accuracy == 0: #Monitor the predictive power
                # Define the grid
                with torch.no_grad():
                    xvec = params.xvec_test_torch #add a dimension
                    y = model_opt_M_N(xvec) 
                xvec = xvec.detach().numpy()
                y = y.detach().numpy()
                perc_abs_error = 100*(np.abs((y - params.true_function(xvec))/params.true_function(xvec)))
                list_perc_abs_error_opt_M_N.append(np.median(perc_abs_error))
                list_perc_abs_error_opt_M_N_i.append(i)
            if i % 1000 == 0:
                loss, current = float(loss.item()), i
                print(f"loss: {loss:>7f}, median percentage euler error {list_perc_abs_error_opt_M_N[-1]:>7f}, [{current:>5d}/{nb_epoch_opt_M_N:>5d}]")
            if (i % params.freq_scheduler == 0) & (i != 0) & (params.use_scheduler == True):
                scheduler.step()
                print("i : {}. Decreasing learning rate: {}".format(i, scheduler.get_last_lr()))

        if params.optimizer == "SWA":
            optimizer.swap_swa_sgd()
        #--------------------------------------
        # B. Calculate the variance of the loss
        #--------------------------------------
        print("Rep {} / {}. Calculting variance loss for {} Iterations".format(int(k), nb_rep, nb_draws_loss))
    
        model_opt_M_N.eval()
        var_loss = torch.zeros(len(grid_N))
        std_loss = torch.zeros(len(grid_N))
        mean_loss = torch.zeros(len(grid_N))

        # Loop over choice of N and M
        for (ind, (N_chosen, M_chosen)) in enumerate(zip(grid_N.astype('int'), grid_M.astype('int'))):
            # Change M and N
            params_local = MyParams(N_chosen, M_chosen, params.lr, 
                                    params.pre_train_model, params.nb_epochs,
                                    params.order_gauss, params.σ_e, params.use_Sobol, 
                                    params.optimizer)

            with torch.no_grad():        
                Xms = torch.zeros(nb_draws_loss)
                # Loop over realizations of loss function
                for (j_index, j) in enumerate(range(0, nb_draws_loss)):
                    Xms[j] = Ξ_torch_MC(model_opt_M_N, params_local)
                # Calculate mean and variance:
                var_loss[ind] = torch.var(Xms)
                std_loss[ind] = torch.sqrt(var_loss[ind])
                mean_loss[ind] = torch.mean(Xms)
        #-----------------------------------------------
        # C. Calculate variance of the norm the gradient
        #-----------------------------------------------
        print("Rep {} / {}. Calculting variance norm gradient for {} Iterations".format(int(k), nb_rep, nb_draws_loss))
        var_gradient_loss = torch.zeros(len(grid_N )) #variance 
        std_gradient_loss = torch.zeros(len(grid_N )) #standard deviation
        mean_gradient_loss = torch.zeros(len(grid_N ))
        
        if calculate_variance_gradient == True:
            for (ind, (N_chosen, M_chosen)) in enumerate(zip(grid_N.astype('int'), grid_M.astype('int'))):
                
                params_local = MyParams(N_chosen, M_chosen, params.lr, 
                        params.pre_train_model, params.nb_epochs,
                        params.order_gauss, params.σ_e, params.use_Sobol, 
                        params.optimizer)
    
                Xms = torch.zeros(nb_draws_loss)
                # Loop over draws 
                for (j_index, j) in enumerate(range(0, nb_draws_loss)):
                    optimizer.zero_grad() # clear gradient
                    loss = Ξ_torch_MC(model_opt_M_N, params_local)
                    loss.backward() #calculate gradient
                    # Store the norm of the gradient
                    total_norm = compute_grad_norm(model_opt_M_N.parameters(), norm_type=norm_chosen)
                    Xms[j] = total_norm
                var_gradient_loss[ind] = torch.var(Xms)
                std_gradient_loss[ind] = torch.sqrt(var_gradient_loss[ind])
                mean_gradient_loss[ind] = torch.mean(Xms)

        # Save to dataframe
        if df_variance_loss.empty == True:
            df_variance_loss = pd.DataFrame({'N': grid_N,
                                  'M': grid_M,
                                  'var_loss': var_loss,
                                  'std_loss': std_loss,
                                  'mean_loss': mean_loss,
                                  'var_gradient_loss': var_gradient_loss,
                                  'std_gradient_loss': std_gradient_loss,
                                  'mean_gradient_loss': mean_gradient_loss,
                                  'nb_rep': k})
        else:
            df_variance_loss_bis = pd.DataFrame({'N': grid_N,
                                  'M': grid_M,
                                  'var_loss': var_loss,
                                  'std_loss': std_loss,
                                  'mean_loss': mean_loss,
                                  'var_gradient_loss': var_gradient_loss,
                                  'std_gradient_loss': std_gradient_loss,
                                  'mean_gradient_loss': mean_gradient_loss,
                                  'nb_rep': k})
            df_variance_loss = pd.concat([df_variance_loss, df_variance_loss_bis])
        
    # Replace NAN by large values. Need to penalize non convergence of gradient descent.
    list_cols = ["var_loss", "std_loss", "mean_loss", 
                "std_gradient_loss", "var_gradient_loss", "mean_gradient_loss"]
    
    for col in list_cols:
        df_variance_loss[col] = df_variance_loss[col].fillna(np.nanmax(df_variance_loss[col]))
    
    # Stats on df_var
    # Median values
    df_variance_loss_median = df_variance_loss.groupby('M').median().reset_index()

    for col in list_cols:
        df_variance_loss_median["min_" + col] = df_variance_loss.groupby('M')[col].min().reset_index()[col]                     
        df_variance_loss_median["max_" + col] = df_variance_loss.groupby('M')[col].max().reset_index()[col]
        df_variance_loss_median["std_" + col] = df_variance_loss.groupby('M')[col].std().reset_index()[col]
        for qq in [10, 25, 50, 75, 90]:
            df_variance_loss_median["P" + str(qq) + "_" + col] = df_variance_loss.groupby('M')[col].quantile(qq/100).reset_index()[col]
    

    return df_variance_loss, df_variance_loss_median


In [None]:
#Given a model, calculate the current variance of the loss
def calculate_variance_loss_model(model, params, nb_draws_loss):
    model.eval() #eval mode
    with torch.no_grad():        
        Xms = torch.zeros(nb_draws_loss)
        # Loop over realizations of loss function
        for (j_index, j) in enumerate(range(0, nb_draws_loss)):
            Xms[j] = Ξ_torch_MC(model, params)
        # Calculate mean and variance:
        var_loss = torch.var(Xms)
        mean_loss = torch.mean(Xms)
    model.train() #train mode
    return var_loss, mean_loss

#Given a model, calculate the current variance of the loss
def calculate_variance_loss_model_grid(model, params, nb_draws_loss, grid_N, grid_M):
    var_loss = torch.zeros(len(grid_N)) #to store results
    # Loop over choice of N and M
    for (ind, (N_chosen, M_chosen)) in enumerate(zip(grid_N, grid_M)):
        # Change M and N
        params_local = MyParams(int(N_chosen), int(M_chosen), params.lr, params.pre_train_model, 
                      params.nb_epochs, params.order_gauss, params.σ_e, params.use_Sobol, params.optimizer,
                      params.α, params.β, params.e_distribution, params.x_distribution)

        var, mean = calculate_variance_loss_model(model, params_local, nb_draws_loss)
        var_loss[ind] = var
    return var_loss
        

In [None]:
def create_optimizer(model, optimizer_name, lr, momentum):
    """
    Function to create an optimizer
    """
    if optimizer_name == "Adam":
        #optimizer = torch.optim.Adam(model.parameters(), lr=lr, eps=1e-07, betas=(0.9, 0.999)) 
        optimizer = torch.optim.Adam(model.parameters(), lr=lr) 
    elif optimizer_name == "SGD":
        optimizer = torch.optim.SGD(model.parameters(), lr)
    elif optimizer_name == "SGD-momentum":
        optimizer = torch.optim.SGD(model.parameters(), lr, momentum)
    elif optimizer_name == "Adadelta":
        optimizer = torch.optim.Adadelta(model.parameters(), lr)
    elif optimizer_name == "RMSprop":
        optimizer = torch.optim.RMSprop(model.parameters(), lr)
    #elif optimizer_name == "LBFGS":
    #    torch.optim.LBFGS(model.parameters(), lr=lr)
    else:
        raise NameError(f"optimizer {optimizer_name} unknown")
    return optimizer


def calculate_effective_lr(optimizer, beta1=0.9, beta2=0.999, epsilon=1e-8):
    # default pytroch params for Adam betas=(0.9, 0.999), eps=1e-08
    """
    Function to calculate the effective learning rate used in Adam update
    """
    effective_lr_list = []
    for param_group in optimizer.param_groups:
        for param in param_group['params']:
            if param.grad is not None:
                state = optimizer.state[param]
                if 'step' in state and state['step'] > 0:
                    m_t = state['exp_avg']
                    v_t = state['exp_avg_sq']
                    step_size = param_group['lr'] * np.sqrt(1 - beta2 ** state['step']) / (1 - beta1 ** state['step'])
                    effective_lr = step_size / (torch.sqrt(v_t) + epsilon)
                    effective_lr_list.append(effective_lr.mean().item())
    return effective_lr_list

def set_initial_values(model, w, b):
    """
    Function to analyse the weigth and bias to certain values
    """
    with torch.no_grad():
        for name, param in model.named_parameters():
            if 'linear_relu_stack.0.weight' in name:
                param.copy_(torch.tensor([w]))
            elif 'linear_relu_stack.0.bias' in name:
                param.copy_(torch.tensor([b]))
                
def show_initial_values(model):
    """
    Function to analyse the weigth and bias to certain values
    """
    with torch.no_grad():
        for name, param in model.named_parameters():
            if 'linear_relu_stack.0.weight' in name:
                print(print(param))
            elif 'linear_relu_stack.0.bias' in name:
                print(print(param))

In [None]:
# Solve a model several times, holding initial parameters constant
def calculate_several_runs(params, nb_rep, freq_loss = 1, initial_guess = [1.0, 1.0]):
    #initialization of lists
    list_elapsed_time = []
    list_M = []
    list_N = []
    list_list_losses = [] 
    list_list_perc_abs_error_MC = [] 
    list_list_perc_abs_error_MC_i = [] 
    list_list_perc_abs_error_MC_loss = [] 
    list_lr = []
    list_index_rep = []
    list_list_beta = [] #store coefficients
    list_list_abs_perc_dev_bias = [] #absolute percentage deviation from true bias 
    list_list_abs_perc_dev_weight = [] #absolute percentage deviation from true weight
            
    # A.
    # Run seral times
    for k in range(0, nb_rep):
            print("Training model : {} / {}".format(int(k), nb_rep))
            list_index_rep.append(k)
            list_M.append(params.M)
            list_N.append(params.N)
            list_lr.append(params.lr)

            #-----------------------------------------------------
            # Train a model for some periods
            # Then look at the expected variance of the gradient
            #-----------------------------------------------------
            # Initialize a network
            model_MC = NeuralNetwork().to(device) 

            # Set initial value
            set_initial_values(model_MC, initial_guess[0], initial_guess[1])
        
            if params.optimizer == "Adam":
                optimizer_MC = torch.optim.Adam(model_MC.parameters(), lr=params.lr, eps=1e-07, betas=(0.9, 0.999)) 
            elif params.optimizer == "SGD":
                optimizer_MC = torch.optim.SGD(model_MC.parameters(), params.lr)
            elif params.optimizer == "SWA":
                base_opt_MC = torch.optim.Adam(model_MC.parameters(), lr=params.lr, eps=1e-07, betas=(0.9, 0.999)) 
                optimizer_MC = SWA(base_opt_MC, swa_start=params.swa_start, swa_freq=params.swa_freq, swa_lr=params.lr)
            else:
                raise("optimizer unknown")

            scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer_MC, gamma=params.freq_gamma)
            loss_epochs_MC = torch.zeros(params.nb_epochs)
            list_perc_abs_error_MC = [] #store abs value percentage error
            list_perc_abs_error_MC_i = [] #store index i
            list_perc_abs_error_MC_loss = [] #store loss
            list_beta = [] #store coefficients
            list_abs_perc_dev_bias = [] #np.zeros(params.nb_epochs) #absolute percentage deviation from true bias 
            list_abs_perc_dev_weight = [] #np.zeros(params.nb_epochs) #absolute percentage deviation from true weight
            
            for i in range(0, params.nb_epochs):
                
                optimizer_MC.zero_grad() #clear gradient
                
                loss = Ξ_torch_MC(model_MC, params) #calculate loss
                loss_epochs_MC[[i]] = float(loss.item())
                
                # Store coefficients
                if i % freq_loss == 0:
                    with torch.no_grad():
                        # Extract weight and bias
                        b_current = np.array([k.item() for k in model_MC.parameters()])
                        b_current_ordered = np.array((b_current[1], b_current[0])) #reorder (bias, weight)
                    list_beta.append(b_current_ordered)

                    # Calculate the absolute percentage deviation from true value:
                    ## Bias: b_current_ordered[0]
                    with torch.no_grad():
                        dev_b = 100*(np.abs((b_current_ordered[0] - params.true_bias)/params.true_bias))
                        #list_abs_perc_dev_bias[i] = dev_b #absolute percentage deviation from true bias 
                        list_abs_perc_dev_bias.append(dev_b[0])
                        ## Cannot divide by 0
                        dev_w = 100*(np.abs((b_current_ordered[1] - params.true_weight)))
                        #list_abs_perc_dev_weight[i] = dev_w #absolute percentage deviation from true weight
                        list_abs_perc_dev_weight.append(dev_w)
                        
                # Backpropagation
                loss.backward()
                
                # Gradient descent step
                optimizer_MC.step()

                if i % freq_loss == 0: #Monitor the predictive power
                    # Define the grid
                    with torch.no_grad():
                        xvec = params.xvec_test_torch #add a dimension
                        y_MC = model_MC(xvec)
                    xvec = xvec.detach().numpy()
                    y_MC = y_MC.detach().numpy()
                    perc_abs_error_MC = 100*(np.abs((y_MC - params.true_function(xvec))/params.true_function(xvec)))
                    list_perc_abs_error_MC.append(np.median(perc_abs_error_MC))
                    list_perc_abs_error_MC_i.append(i)
                    list_perc_abs_error_MC_loss.append(float(loss.item()))
                if i % 1000 == 0:
                    loss, current = float(loss.item()), i
                    print(f"loss: {loss:>7f} [{current:>5d}/{params.nb_epochs:>5d}]")
                if (i % params.freq_scheduler == 0) & (i != 0) & (params.use_scheduler == True):
                    scheduler.step()
                    print("i : {}. Decreasing learning rate: {}".format(i, scheduler.get_last_lr()))
                    print(f"loss: {loss:>7f}, median percentage error {list_perc_abs_error_MC[-1]:>7f}, [{current:>5d}/{params.nb_epochs:>5d}]")


            list_list_losses.append(list_perc_abs_error_MC_loss) #list_list_losses.append(loss_epochs_MC)
            list_list_perc_abs_error_MC.append(list_perc_abs_error_MC)
            list_list_perc_abs_error_MC_i.append(list_perc_abs_error_MC_i)
            list_list_perc_abs_error_MC_loss.append(list_perc_abs_error_MC_loss)
            list_list_beta.append(list_beta)
            list_list_abs_perc_dev_bias.append(list_abs_perc_dev_bias) #absolute percentage deviation from true bias 
            list_list_abs_perc_dev_weight.append(list_abs_perc_dev_weight) #absolute percentage deviation from true weight
       
            if params.optimizer == "SWA":
                optimizer_MC.swap_swa_sgd()
                
    # B. Compile results
    #-------------------
    df_MC = pd.DataFrame({'M': list_M[0],
                        'N': list_N[0],
                        'repetition': list_index_rep[0],
                        'iter': list_list_perc_abs_error_MC_i[0],
                        'loss': np.sqrt(np.abs(list_list_losses[0])),
                        'med_percentage_error': list_list_perc_abs_error_MC[0],
                        'intercept': np.array(list_list_beta[0])[:,0],
                        'slope': np.array(list_list_beta[0])[:,1],
                        'abs_perc_dev_bias': list_list_abs_perc_dev_bias[0], #absolute percentage deviation from true bias 
                        'abs_perc_dev_weight': list_list_abs_perc_dev_weight[0] #absolute percentage deviation from true weight
       })

    for k in range(0, len(list_list_perc_abs_error_MC)):
        df_MC_bis = pd.DataFrame({'M': list_M[k],
                                'N': list_N[k],
                                'repetition': list_index_rep[k],
                                'iter': list_list_perc_abs_error_MC_i[0],
                                'loss': np.sqrt(np.abs(list_list_losses[k])),
                                'med_percentage_error': list_list_perc_abs_error_MC[k],
                                'intercept': np.array(list_list_beta[k])[:,0],
                                'slope': np.array(list_list_beta[k])[:,1],
                                'abs_perc_dev_bias': list_list_abs_perc_dev_bias[k], #absolute percentage deviation from true bias 
                                'abs_perc_dev_weight': list_list_abs_perc_dev_weight[k]}) #absolute percentage deviation from true weight)
        df_MC = pd.concat([df_MC, df_MC_bis])
    
    # Replace NAN by large values
    for col in ["loss", "med_percentage_error", "intercept", "slope"]:
        df_MC[col] = df_MC[col].fillna(np.nanmax(df_MC[col]))
    
    # C. Statistics on results
    df_MC_average = df_MC.groupby('iter').mean().reset_index() #mean value by iteration


    list_cols = ["loss", "med_percentage_error", 
                 "intercept", "slope",
                 "abs_perc_dev_bias", "abs_perc_dev_weight"]
    
    for col in list_cols:
        df_MC_average["min_" + col] = df_MC.groupby('iter')[col].min().reset_index()[col]                     
        df_MC_average["max_" + col] = df_MC.groupby('iter')[col].max().reset_index()[col]
        df_MC_average["std_" + col] = df_MC.groupby('iter')[col].std().reset_index()[col]
        for qq in [1, 5, 10, 25, 50, 75, 90, 95, 99]:
            df_MC_average["P" + str(qq) + "_" + col] = df_MC.groupby('iter')[col].quantile(qq/100).reset_index()[col]

    # Add extra info
    df_MC_average['optimizer'] = params.optimizer
    df_MC_average['sigma_e'] = params.σ_e
    df_MC_average['Sobol'] = params.use_Sobol
    df_MC_average['user_scheduler'] = params.use_scheduler
    df_MC_average['gamma_scheduler'] = params.freq_gamma
    df_MC_average['lr'] = params.lr
    
    return df_MC, df_MC_average

In [None]:
def calculate_variance_gaussian(params, model, nb_draws, grid_M, grid_N):
    """
    Calculate variance of the loss when joint gaussian assumption holds
    Use var(f(s_m,e^i_m))
    and cov(f(s_m,e^i_m), f(s_m,e^j_m))
    Return: variance loss function on grid, var(resid), cov(resid)
    """
    grid_T = torch.tensor(grid_M*grid_N/2)
    grid_N = torch.tensor(grid_N)
    
    # Calculate variance and covariance
    with torch.no_grad(): 
        # state
        x = sim_states(params, nb_draws)

        e1 = simulate_shocks(params, nb_draws)
        e2 = simulate_shocks(params, nb_draws)
                
        # residuals for n random grid points under 2 realizations of shocks
        R1 = Residuals_torch(model, params, x, e1)
        R2 = Residuals_torch(model, params, x, e2)

        # Construct combinations
        # mean
        mean_val = 0.5*torch.mean(R1) + 0.5*torch.mean(R2)
        ## Var
        var_R1 = torch.var(R1)
        var_R2 = torch.var(R2)
        var_val = 0.5*var_R1 + 0.5*var_R2
        
        ## Cov
        cov_val = torch.cov(torch.column_stack((R1, R2)).T)[0,1]
        
        var_L = (1/(grid_T*(grid_N - 1)))*((grid_N**2 - 3*grid_N + 3)*(cov_val**2) + (2*(grid_N - 2)*cov_val + var_val)*var_val + 2*(grid_N - 1)*(var_val + (grid_N - 1)*cov_val)*(mean_val**2))
        
        return var_L, var_val, cov_val

In [None]:
def calculate_variance_loss_fast(params, model, nb_draws, grid_M, grid_N):
    """
    Calculate variance of the loss using proposition appendix
    Use four independent shocks 
    """
    grid_T = torch.tensor(grid_M*grid_N/2)
    grid_N = torch.tensor(grid_N)
    
    # Calculate variance and covariance
    with torch.no_grad(): 
        x = sim_states(params, nb_draws)
           
        e1 = simulate_shocks(params, nb_draws)
        e2 = simulate_shocks(params, nb_draws)
        e3 = simulate_shocks(params, nb_draws)
        e4 = simulate_shocks(params, nb_draws)
        
        # residuals for n random grid points under 2 realizations of shocks
        R1 = Residuals_torch(model, params, x, e1)
        R2 = Residuals_torch(model, params, x, e2)
        R3 = Residuals_torch(model, params, x, e3)
        R4 = Residuals_torch(model, params, x, e4)

        # Construct combinations
        R1_R2 = R1*R2
        R1_R3 = R1*R3
        R1_R4 = R1*R4
        R2_R3 = R2*R3
        R2_R4 = R2*R4
        R3_R4 = R3*R4

        # Variance cross
        var_R1_R2 = torch.var(R1_R2)

        # Co-variances with one shared element
        cov_R1R2_R1R3 = torch.cov(torch.column_stack((R1_R2, R1_R3)).T)[0,1]

        # Covariances with four different terms
        cov_R1R2_R3R4 = torch.cov(torch.column_stack((R1_R2, R3_R4)).T)[0,1]

        var_L = (1/(grid_T*(grid_N - 1)))*(var_R1_R2 + 2*(grid_N - 2)*(cov_R1R2_R1R3) + 2*((grid_N*(grid_N - 1)/4) - grid_N + 3/2)*(cov_R1R2_R3R4))

    return var_L

In [None]:
def calculate_variance_loss_fast_2(params, model, nb_draws, grid_M, grid_N):
    """
    Calculate variance of the loss using proposition appendix
    Use the four shocks to calculate more accurate values
    """
    grid_T = torch.tensor(grid_M*grid_N/2)
    grid_N = torch.tensor(grid_N)
    
    # Calculate variance and covariance
    with torch.no_grad(): 
        # States
        x = sim_states(params, nb_draws)
        # Shocks
        e1 = simulate_shocks(params, nb_draws)
        e2 = simulate_shocks(params, nb_draws)
        e3 = simulate_shocks(params, nb_draws)
        e4 = simulate_shocks(params, nb_draws)
        
        # residuals for n random grid points under 2 realizations of shocks
        R1 = Residuals_torch(model, params, x, e1)
        R2 = Residuals_torch(model, params, x, e2)
        R3 = Residuals_torch(model, params, x, e3)
        R4 = Residuals_torch(model, params, x, e4)

        # Construct combinations
        R1_R2 = R1*R2
        R1_R3 = R1*R3
        R1_R4 = R1*R4
        R2_R3 = R2*R3
        R2_R4 = R2*R4
        R3_R4 = R3*R4

        #var_R1 = torch.var(R1)

        # Variance cross
        var_R1_R2 = torch.var(R1_R2)
        var_R1_R3 = torch.var(R1_R3)
        var_R1_R4 = torch.var(R1_R4)
        var_R2_R3 = torch.var(R2_R3)
        var_R2_R4 = torch.var(R2_R4)
        var_R3_R4 = torch.var(R3_R4)

        mean_var_R1_R2 = (1/6)*(var_R1_R2 + var_R1_R3 + var_R1_R4 + var_R2_R3 + var_R2_R4 +  var_R3_R4)

        # Co-variances with one shared element
        cov_R1R2_R1R3 = torch.cov(torch.column_stack((R1_R2, R1_R3)).T)[0,1]
        cov_R1R2_R1R4 = torch.cov(torch.column_stack((R1_R2, R1_R4)).T)[0,1]
        cov_R1R2_R2R3 = torch.cov(torch.column_stack((R1_R2, R2_R3)).T)[0,1]
        cov_R1R2_R2R4 = torch.cov(torch.column_stack((R1_R2, R2_R4)).T)[0,1]
        cov_R1R3_R3R4 = torch.cov(torch.column_stack((R1_R3, R3_R4)).T)[0,1]

        mean_cov_R1R2_R1R3 = (1/5)*(cov_R1R2_R1R3 + cov_R1R2_R1R4 + cov_R1R2_R2R3 + cov_R1R2_R2R4 + cov_R1R3_R3R4)

        # Covariances with four different terms
        cov_R1R2_R3R4 = torch.cov(torch.column_stack((R1_R2, R3_R4)).T)[0,1]

        var_L = (1/(grid_T*(grid_N - 1)))*(mean_var_R1_R2 + 2*(grid_N - 2)*(mean_cov_R1R2_R1R3) + 2*((grid_N*(grid_N - 1)/4) - grid_N + 3/2)*(cov_R1R2_R3R4))

    return var_L

In [None]:
def Residuals_torch_2(model, params, s, e):
    # s: state
    # e: innovation
    # transform e into innnovation
    eta_t = torch.exp(params.μ_e + params.σ_e*e)
    return Residuals_torch(model, params, s, eta_t)
    
def calculate_N_star(params, model, mean_s, mean_e, var_cov_s, var_cov_e, T, grid_N):
    """
    Use formula N*. Assumption of joint normality.
    Also assume that mu_f^2 is close to 0
    """
    #Take the gradient of the residual wrt to input variables, evaluated at the mean
    ## Gradient wrt to space vector
    input_tensor = torch.tensor([[mean_s]], requires_grad=True)
    output_net = Residuals_torch_2(model, params, input_tensor, torch.tensor([[mean_e]]))
    output_net.backward(torch.ones_like(output_net))
    grad_s = input_tensor.grad

    ## Gradient wrt to innovation vector
    input_tensor = torch.tensor([[mean_e]], requires_grad=True)
    output_net = Residuals_torch_2(model, params, torch.tensor([[mean_s]]), input_tensor)
    output_net.backward(torch.ones_like(output_net))
    grad_e = input_tensor.grad

    ## calculate a and b
    sandwich_s = torch.matmul(grad_s.t(), torch.matmul(var_cov_s, grad_s))
    sandwich_e = torch.matmul(grad_e.t(), torch.matmul(var_cov_e, grad_e))
    #print(sandwich_s)
    #print(sandwich_e)
    a = sandwich_s**2
    b = 2*sandwich_s*sandwich_e + sandwich_e**2
    # ignore the integer constraint
    N_star = 1 + torch.sqrt(1 + (b/a))
    # find nearest corresponding element on grid
    N_star = grid_N[torch.argmin(torch.abs(N_star - grid_N))]
    return N_star

def calculate_propto_var(params, model, mean_s, mean_e, var_cov_s, var_cov_e, T, grid_N):
    """
    Calculate value proportional to variance of loss.
    Assumption of joint normality.
    Also assume that mu_f^2 is close to 0
    """
    #Take the gradient of the residual wrt to input variables, evaluated at the mean
    ## Gradient wrt to space vector
    input_tensor = torch.tensor([[mean_s]], requires_grad=True)
    output_net = Residuals_torch_2(model, params, input_tensor, torch.tensor([[mean_e]]))
    output_net.backward(torch.ones_like(output_net))
    grad_s = input_tensor.grad

    ## Gradient wrt to innovation vector
    input_tensor = torch.tensor([[mean_e]], requires_grad=True)
    output_net = Residuals_torch_2(model, params, torch.tensor([[mean_s]]), input_tensor)
    output_net.backward(torch.ones_like(output_net))
    grad_e = input_tensor.grad

    ## calculate a and b
    sandwich_s = torch.matmul(grad_s.t(), torch.matmul(var_cov_s, grad_s))
    sandwich_e = torch.matmul(grad_e.t(), torch.matmul(var_cov_e, grad_e))
    #print(sandwich_s)
    #print(sandwich_e)
    a = sandwich_s**2
    b = 2*sandwich_s*sandwich_e + sandwich_e**2
    # fomula (49):
    vals = (1/params.T)*(a * (torch.tensor(grid_N) + 2 + (1/(torch.tensor(grid_N) - 1))) + (1/(torch.tensor(grid_N) - 1))*b) 
    return vals.t()

In [None]:
def numpy_flat(a):
    """
    Function to flatten a list
    """
    return list(np.array(a).flat)

In [None]:
def getSystemInfo():
    try:
        info={}
        info['platform']=platform.system()
        info['platform-release']=platform.release()
        info['platform-version']=platform.version()
        info['architecture']=platform.machine()
        info['hostname']=socket.gethostname()
        info['ip-address']=socket.gethostbyname(socket.gethostname())
        info['mac-address']=':'.join(re.findall('..', '%012x' % uuid.getnode()))
        info['processor']=platform.processor()
        info['ram']=str(round(psutil.virtual_memory().total / (1024.0 **3)))+" GB"
        return json.dumps(info)
    except Exception as e:
        logging.exception(e)