# Functions for OLG

Create functions used when solving an OLG Model with the [bc-MC Operator](https://papers.ssrn.com/sol3/papers.cfm?abstract_id=4476122)

In [None]:
import numpy as np
import pandas as pd
import quantecon as qe
from interpolation import interp
from quantecon.optimize import brentq
from numba import njit, float64
from numba.experimental import jitclass
import Tasmanian # sparse grids

import random
import scipy.stats
import chaospy  ## for quadrature
from itertools import product
import os

import time
from math import sqrt
import seaborn as sns; sns.set()
from tqdm import tqdm as tqdm         # tqdm is a nice library to visualize ongoing loops
import datetime
# followint lines are used for indicative typing
from typing import Tuple
class Vector: pass
from scipy.stats import norm
import Tasmanian
import torch
from torch import nn
from torch.utils.data import TensorDataset, DataLoader, Dataset

# To create copies of NN
import copy
import matplotlib.ticker as mtick
# To use sparse kronecker product
from scipy import sparse
from torchcontrib.optim import SWA

import statsmodels.api as sm
import statsmodels.formula.api as smf

In [None]:
# Get cpu or gpu device for training.
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")

# Define model
# Ouputs:
# 1. the share of cash-in-hand consumed
# 2. the lagrange multiplier h 
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(5, 32),
            nn.ReLU(),
            nn.Linear(32, 32),
            nn.ReLU(),
            nn.Linear(32, 2)
        )

    def forward(self, x):
        #x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

In [None]:
def show_params(params, limited=True):
    """
    Function to display parameter values
    """
    print("learning rate: {}".format(params.lr))
    print("nb epochs: {}".format(params.nb_epochs))
    print("W_expanded.shape: {}".format(params.W_expanded.shape))
    print("M: {}".format(params.M))
    print("N: {}".format(params.N))
    print("MN: {}".format(params.MN))
    print("T: {}".format(params.T))
    print("optimizer_chosen: {}".format(params.optimizer))
    print("use_scheduler: {}".format(params.use_scheduler))


In [None]:
def sparse_mx_to_torch_sparse_tensor(sparse_mx):
    """Convert a scipy sparse matrix to a torch sparse tensor."""
    sparse_mx = sparse_mx.tocoo().astype(np.float32)
    indices = torch.from_numpy(
        np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64))
    values = torch.from_numpy(sparse_mx.data)
    shape = torch.Size(sparse_mx.shape)
    return torch.sparse.FloatTensor(indices, values, shape) 

# RMSE
class RMSELoss(nn.Module):
    def __init__(self):
        super().__init__()
        self.mse = nn.MSELoss()
        
    def forward(self,yhat,y):
        return torch.sqrt(self.mse(yhat,y))
    
# Gaussian quadrature rule
# See: https://chaospy.readthedocs.io/en/master/api/chaospy.generate_quadrature.html
def dist(order, distribution, rule = "gaussian", sp=True):
    #order=int(n**(1/d))-1
    x, w = chaospy.generate_quadrature(order, distribution, rule=(rule), sparse=sp)
    return x, w

def create_W_expanded_matrix(M, N, rep):
    """
    create a sparse matrix W_expanded with U repeate M times on the diagonal elements
    where U is an upper triangular matrix with 0 on the diagonal and 1 on the other upper elements
    W_expanded is a sparse torch matrix
    rep: number of times the matrix must be repeat vertically
    """
    A_expanded = np.ones((N, N))
    U = np.triu(A_expanded) # upper trianguler matrix of ones
    np.fill_diagonal(U, 0) #fill diagonal with 0
    U = sparse.csr_matrix(U) # convert to sparse
    # Unity matrix of size (M*M)
    B = sparse.csr_matrix(np.eye(M, M))
    D = sparse.kron(B, U)
    # To "repeat" D vertically M times
    #D_repeated_vertical = sparse.vstack([D] * rep )
    #D_repeated_horizontal = sparse.hstack([D] * rep)
    # create a larger block diagonal matrix with D on the diagonal
    I_rep = sparse.csr_matrix(np.eye(rep, rep))
    D_repeated = sparse.kron(I_rep, D)
    
    # Convert to sparse tensor
    W_expanded = sparse_mx_to_torch_sparse_tensor(D_repeated)
    
    return W_expanded

min_FB = lambda a,b: a+b-tf.sqrt(a**2+b**2)
min_FB_torch = lambda a,b: a+b-torch.sqrt(a**2+b**2)

## Data management with Pytorch

In [None]:
def sim_states(dataloader_replacement):
    """
    Simulate state vector. Use data to approximate density.
    Input is a pytorch data loader (with replacement)
    """
    # Create an iterator from the DataLoader
    iterator = iter(dataloader_replacement)
    # Draw one batch using next
    batch = next(iterator)  
    return batch
            
def simulate_shocks(params, len_series):
    # randomly drawing shocks 
    # chaospy.Normal(self.mean_tfp, self.std_tfp), chaospy.Normal(self.mean_delta, self.std_delta)
    if params.distribution_shocks == "Normal":
        e_tfp = torch.normal(mean=params.mean_tfp, std=params.std_tfp, size=(len_series,)) 
        e_delta = torch.normal(mean=params.mean_delta, std=params.std_delta, size=(len_series,)) 
        # Ensure we don't get negative values
        if (torch.sum((e_tfp < 0.0)) > 0) | (torch.sum((e_delta <0.0)) > 0):
            raise Exception(f"Negative values happened for tfp or delta") 
    else:
        raise Exception(f"{params.distribution_shocks} not implemented.") 
    return e_tfp, e_delta 

class InfiniteSampler(torch.utils.data.Sampler):
    """Infinite Sampler that generates infinite indices by sampling with replacement."""
    def __init__(self, data_source):
        self.num_samples = len(data_source)

    def __iter__(self):
        while True:  # Infinite loop
            yield torch.randint(high=self.num_samples, size=(1,), dtype=torch.int64).item()

class MyDataset(Dataset):
    """Custom Dataset class."""
    def __init__(self, data):
        self.data = data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]
    
def generate_data_debug(params, T, N):
    """
    Generate test data. Random draws from Uniform.
    """
    # T: Number of observations. represents length of simulation
    # N: Dimension of each observation vector. dimension of state vector.
    # Generate random observations (T vectors, each of N dimensions)
    # observations = torch.rand(T, N) + 1e-6 #torch.randn(T, N)
    
    # random capital allocation vectors
    # row: obs
    # col: variable
    r1 = 0.25
    r2 = 3.0
    distribution_capital = (r1 - r2) * torch.rand(T, params.nb_agents) + r2
    distribution_capital[:,0] = 0

    # random exo state vetors:
    e_tfp, e_delta = simulate_shocks(params, T)
    exo_states = torch.column_stack((e_tfp, e_delta))
    
    observations = torch.hstack((distribution_capital, exo_states)) 
    return observations

def generate_dataloaders(observations, batch_size):
    """
    Generate dataloaders
    """
    # batch_size: dimension of each draw. (M)
    # Create a TensorDataset
    dataset = TensorDataset(observations)

    # Create a DataLoader
    ## Draw without replacement
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
    
    ## Draw with replacement
    # Create a DataLoader with the InfiniteSampler
    dataset = MyDataset(observations)
    dataloader_with_replacement = DataLoader(dataset, batch_size=batch_size, sampler=InfiniteSampler(dataset))

    return dataloader, dataloader_with_replacement

def generate_data_and_dataloaders_debug(params, T, N, batch_size):
    if T < batch_size:
        raise Exception(f"T: {T} < batch_size: {batch_size}") 
    observations = generate_data_debug(params, T, N)
    dataloader, dataloader_with_replacement = generate_dataloaders(observations, batch_size)
    return dataloader, dataloader_with_replacement


## True model

In [None]:
def simulate_true_model(len_T, params, non_stochastic_ss = False):
    """
    Simulate true model
    """
    #add burnin
    burnin = 100
    T = len_T + burnin
    
    # random draws
    if non_stochastic_ss == False:
        tfp_vec, delta_vec = simulate_shocks(params, T)
    else:
        tfp_vec = params.mean_tfp*torch.ones(T)
        delta_vec = params.mean_delta*torch.ones(T)

    #distribution capital holdings:
    h_matrix = torch.zeros((T, params.nb_agents))
    h_matrix[0,1:] = 1 #first periods
    K_vec = torch.zeros(T)
    ## Exogeneous labour supply. One first period, 0, else
    l_matrix = torch.zeros((T, params.nb_agents))
    l_matrix[:, 0] = 1
    L_vec = torch.zeros(T)
    r = torch.zeros(T)
    w = torch.zeros(T)
    wealth = torch.zeros((T, params.nb_agents))
    a_matrix = torch.zeros((T, params.nb_agents))
    c_matrix = torch.zeros((T, params.nb_agents))

    # Loop over time periods:
    for t in range(0, T):
        # inherit from last period

        # infer sum of capital
        K_vec[t] = torch.sum(h_matrix[t,:])
        ## Total labour supply (useless calculations, but then we can generalize the code)
        L_vec[t] = torch.sum(l_matrix[t,:])

        r[t] = interest_rate(K_vec[t], L_vec[t], delta_vec[t], tfp_vec[t], params)
        w[t] = wage(K_vec[t], L_vec[t], tfp_vec[t], params)

        ## calculate wealth, before consumption decision made
        wealth[t, :] = h_matrix[t,:]*r[t] + l_matrix[t,:]*w[t]

        ## savings choice
        a_matrix[t,:] = wealth[t, :]*params.mult_wealth
        c_matrix[t,:] = wealth[t, :] - a_matrix[t,:]

        ## next period
        # Sift by one period
        if t < (T-1):
            h_matrix[t+1,1:] = a_matrix[t,0:-1].clone()
      
    exo_states = torch.column_stack((tfp_vec, delta_vec))
    observations = torch.hstack((h_matrix[burnin:, :], exo_states[burnin:, :])) 
    
    return observations
 
def generate_data_and_dataloaders_true(params, T, batch_size):
    """
    Generate data from true model
    """
    if T < batch_size:
        raise Exception(f"T: {T} < batch_size: {batch_size}") 
    observations = simulate_true_model(T, params)
    dataloader, dataloader_with_replacement = generate_dataloaders(observations, batch_size)
    return dataloader, dataloader_with_replacement

# Simulation of length 1000, batch size = M
#d, d_replacement = generate_data_and_dataloaders_true(params, 10000, params.M)

def generate_n_batches(nb_batches, d_replacement):
    """
    Draw nb_draws times M draws from ergodic distribution
    """
    for i in range(0, nb_batches):
        if i == 0:
            state_vec = sim_states(d_replacement)
        else:
            state_vec = torch.vstack((state_vec, sim_states(d_replacement)))
    return state_vec


Generate data using neural network

In [None]:
def simulate_current_model(neural_net, len_T, params, use_true_model = False, non_stochastic_ss = False):
    """
    Use current neural net to simulate the model
    Input:
    neural_net: a pytorch neural network
    len_T: length of simulation
    params: a Params object
    use_true_model: if true, use analytic solution. Else, use neural net
    non_stochastic_ss: if true, simulation with exo variables constant
    """
    #add burnin
    burnin = int(len_T/10) #10% burnin
    #print(burnin)
    T = len_T + burnin
    
    # random draws
    if non_stochastic_ss == False:
        tfp_vec, delta_vec = simulate_shocks(params, T)
    else:
        tfp_vec = params.mean_tfp*torch.ones(T)
        delta_vec = params.mean_delta*torch.ones(T)

    #distribution capital holdings:
    h_matrix = torch.zeros((T, params.nb_agents))
    h_matrix[0,1:] = 1 #first periods
    K_vec = torch.zeros(T)
    ## Exogeneous labour supply. One first period, 0, else
    l_matrix = torch.zeros((T, params.nb_agents))
    l_matrix[:, 0] = 1
    L_vec = torch.zeros(T)
    r = torch.zeros(T)
    w = torch.zeros(T)
    wealth = torch.zeros((T, params.nb_agents))
    a_matrix = torch.zeros((T, params.nb_agents))
    c_matrix = torch.zeros((T, params.nb_agents))

    # Loop over time periods:
    for t in range(0, T):
        # inherit from last period

        # infer sum of capital
        K_vec[t] = torch.sum(h_matrix[t,:])
        ## Total labour supply (useless calculations, but then we can generalize the code)
        L_vec[t] = torch.sum(l_matrix[t,:])

        r[t] = interest_rate(K_vec[t], L_vec[t], delta_vec[t], tfp_vec[t], params)
        w[t] = wage(K_vec[t], L_vec[t], tfp_vec[t], params)

        ## calculate wealth, before consumption decision made
        wealth[t, :] = h_matrix[t,:]*r[t] + l_matrix[t,:]*w[t]

        ## savings choice
        if use_true_model == True:
            a_matrix[t,:] = wealth[t, :]*params.mult_wealth
            c_matrix[t,:] = wealth[t, :] - a_matrix[t,:]
        else:
            c_matrix[t, :] = model_normalized(neural_net, wealth[t, :].view(1,-1), params)
            ## Infer capital decision
            a_matrix[t, :] = wealth[t, :] - c_matrix[t, :]
            
        ## next period
        # Sift by one period
        if t < (T-1):
            h_matrix[t+1,1:] = a_matrix[t,0:-1].clone()
      
    exo_states = torch.column_stack((tfp_vec, delta_vec))
    observations = torch.hstack((h_matrix[burnin:, :], exo_states[burnin:, :])) 
    
    return observations

def generate_data_and_dataloaders_current_model(neural_net, params, T, batch_size, use_true_model = False, non_stochastic_ss = False):
    """
    Generate data using current neural network. Return a dataloader without and with replacement.
    Input:
    neural_net: a pytorch neural network
    len_T: length of simulation
    params: a Params object
    use_true_model: if true, use analytic solution. Else, use neural net
    non_stochastic_ss: if true, simulation with exo variables constant
    """
    if T < batch_size:
        raise Exception(f"T: {T} < batch_size: {batch_size}") 
    observations = simulate_current_model(neural_net, T, params, use_true_model, non_stochastic_ss)
    dataloader, dataloader_with_replacement = generate_dataloaders(observations, batch_size)
    return dataloader, dataloader_with_replacement

# Simulation of length 1000, batch size = M
#d, d_replacement = generate_data_and_dataloaders_current_model(model_bcMC, params, 1000, params.M)

## bc-MC Operator

For a single age category, the bc-MC operator is:

$$ \frac{1}{M} \frac{2}{(N)(N-1)} \sum_{m=1}^{M} \sum_{1\leq i < j}^{n} f(s_m, \epsilon_{m}^{(i)})f(s_m, \epsilon_{m}^{(j)})  $$


### To measure the accuracy of the bc-MC operator


#### Monte Carlo integration

We want to calculate the mean value of the Euler equation error (EEE):

$$EEE =  \frac{1}{c_t}(u^{\prime-1})\Big(\mathbf{E}_{\varepsilon}\big[{\beta u^{\prime}(c_{t+1}) r_{t+1}} \big]\Big) - 1$$

Let's first use Monte Carlo to approximate the integral with respect to the innovation vector. Let's first fix the value of the state vector to $s_m$. Conditional on this value, the expectation with respect to the innovation vector is approximated by:

$$ \mathbf{E}_{\varepsilon} g(s_m,  \epsilon) \approx \frac{1}{N} \sum_{i=1}^{N}  g(s_m,  \epsilon^{(i)}) $$

wich can be vectorized as

$$ \mathbf{1}_N^{T} . \begin{pmatrix} g(s_m, \epsilon^{(1)}) \\ \vdots \\ g(s_m, \epsilon^{(N)}) \end{pmatrix} $$

where $\mathbf{1}_N^{T} = (\frac{1}{n}, \frac{1}{n}, ..., \frac{1}{n})$ is a $(N, 1)$ row vector.

Now, for several draws of $s_m$, we can calculate conditional means as:

$$ \begin{pmatrix} \frac{1}{N} \sum_{i=1}^{N}  g(s_1,  \epsilon^{(i)}) \\ \vdots \\ \frac{1}{N} \sum_{i=1}^{N}  g(s_m,  \epsilon^{(i)})) \end{pmatrix} = \begin{pmatrix}
\mathbf{1}_N^{T} & \mathbf{0} & ... & \mathbf{0}\\
\mathbf{0} & \mathbf{1}_N^{T} & \mathbf{0} & \mathbf{0} \\
... & \mathbf{0} & ... & ... \\
\mathbf{0} & \mathbf{0} & ... & \mathbf{1}_N^{T}
\end{pmatrix}  \begin{pmatrix} g(s_1,  \epsilon_1^{(i)}) \\ \vdots \\ g(s_1,  \epsilon_1^{(N)}) \\ \vdots \\ g(s_M,  \epsilon_M^{(1)}) \\ ... \\ g(s_M,  \epsilon_M^{(N)}) \end{pmatrix}$$

This is what I do in the function `evaluate_accuracy_pytorch_MC` below.

In [None]:
def evaluate_accuracy_pytorch_MC(neural_net, n, n_Monte_Carlo, params, dataloader_replacement, debug=True):
    """
    Function to evaluate the accuracy using Monte Carlo for the expectations
    """
    # n: number of draws for current state
    # n_Monte_Carlo: number of draws for next state
    
    with torch.no_grad():
        # To calculate means quickly, vectorize
        # Sparse version
        A = sparse.eye(n) #(n,n) identity matrix
        B = sparse.csr_matrix(np.ones(n_Monte_Carlo)/n_Monte_Carlo) #(1, n_Monte_Carlo) row vector
        # Sparse kronecker product. Then convert to pytorch sparse.
        D = sparse.kron(A, B) #(n, n_Monte_Carlo) matrix, with repeated row vectors B. 
        # Repeat A-1 times
        rep = params.nb_agents - 1
        I_rep = sparse.csr_matrix(np.eye(rep, rep))
        D_repeated = sparse.kron(I_rep, D)
        # Convert to sparse tensor
        W = sparse_mx_to_torch_sparse_tensor(D_repeated)
    
        # State vector
        ## Get the numer batch size necessary to get n draws
        if n < params.M:
            nb_draws = params.M
        else:
            nb_draws = int(n/params.M)
        state_vec = generate_n_batches(nb_draws, dataloader_replacement) #each draw is of size M
        # Select right size
        state_vec = state_vec[:n,:]
        
        #print(state_vec.shape)
        h_matrix = state_vec[:,:-2] 
        # current value for z and delta
        tfp_vec = state_vec[:, -2]
        delta_vec = state_vec[:, -1]
        
        ## Innovation vector
        # n_Monte_Carlo for each value today
        e_tfp, e_delta = simulate_shocks(params, n*n_Monte_Carlo)
        innovation_vec =  torch.column_stack((e_tfp, e_delta))
    
        ## Current period
        # infer sum of capital
        K_vec = torch.sum(h_matrix, 1)

        ## Exogeneous labour supply. One first period, 0, else
        l_matrix = torch.zeros_like(h_matrix)
        l_matrix[:, 0] = 1
        ## Total labour supply (useless calculations, but then we can generalize the code)
        L_vec = torch.sum(l_matrix, 1)

        r = interest_rate(K_vec, L_vec, delta_vec, tfp_vec, params)
        w = wage(K_vec, L_vec, tfp_vec, params)

        ## calculate wealth, before consumption decision made
        wealth = h_matrix*r.view(-1,1) + l_matrix*w.view(-1,1)

        ## Consumption curent period
        if debug == False:
            c = model_normalized(neural_net, wealth, params)
            ## Infer capital decision
            a = wealth - c
        else:
            a = wealth*params.mult_wealth.view(1, -1)
            c = wealth - a
       
        ## Period t+1
        ## comes from last period. But first generation has zero capital
        h_matrix_next = torch.zeros_like(a)
        # Sift by one period
        h_matrix_next[:,1:] = a[:,0:-1].clone()
        h_matrix_next = h_matrix_next.repeat_interleave(n_Monte_Carlo, dim=0) # shape (n*n_Monte_Carlo, A)

        ## Repeat values from period t to vectorize code
        c_repeated = c.repeat_interleave(n_Monte_Carlo, dim=0) # shape (n*n_Monte_Carlo, A)
        a_repeated = a.repeat_interleave(n_Monte_Carlo, dim=0) # shape (n*n_Monte_Carlo, A)
        
        # transitions of the exogenous processes
        ## No persistence here (but easy to change that)
        ## No need to repeat. Already rigth shape
        tfp_vec_next = innovation_vec[:, -2]
        delta_vec_next = innovation_vec[:, -1]

        K_vec_next = torch.sum(h_matrix_next, 1)

        ## Exogeneous labour supply. One first period, 0, else
        l_matrix_next = torch.zeros_like(h_matrix_next)
        l_matrix_next[:, 0] = 1
        ## Total labour supply (useless calculations, but then we can generalize the code)
        L_vec_next = torch.sum(l_matrix_next, 1)

        ## get factor prices (wages and interest rate)
        r_next = interest_rate(K_vec_next, L_vec_next, delta_vec_next, tfp_vec_next, params)
        w_next = wage(K_vec_next, L_vec_next, tfp_vec_next, params)

        ## calculate wealth, before consumption decision made
        wealth_next = h_matrix_next*r_next.view(-1,1) + l_matrix_next*w_next.view(-1,1)

        ## Consumption curent period
        if debug == False:
            c_next = model_normalized(neural_net, wealth_next, params)
        else:
            a_next = wealth_next*params.mult_wealth.view(1, -1)
            c_next = wealth_next - a_next

        # Each column is the euler equation for one agent
        # rows are observations
        #s = c_next[:, 1:params.nb_agents].shape
        #print(f"shape c next: {s}")
        u_prime_next = params.u_prime(c_next)
        #u_prime_next = c_next**(-params.gamma) 

        # Calculate beta (u'-1){E[r_{t+1} u'(c_{t+1})]}
        vals = u_prime_next[:, 1:params.nb_agents]*r_next.view(-1,1)
        #print(vals.shape)
        # Reshape matrix (MN, nb_agents) to a single column array of size (nb_agents*MN, 1)
        # First column, then second column, the third column, and so on..
        vals_reshaped = vals.t().contiguous().view(-1, 1)
        
        #print(vals_reshaped.shape)
        #print(W.shape)
        #torch.sparse.mm(W, vals_reshaped)
        u_prime_inverse = (params.beta*torch.sparse.mm(W, vals_reshaped))**(-1.0/params.gamma)
        #print(u_prime_inverse.shape)
        
        # Euler equation error
        c_reshaped = c[:, 0:params.nb_agents-1].t().contiguous().view(-1, 1)
        EEE = (u_prime_inverse/c_reshaped) - 1
        
    return EEE.numpy()

#### Gaussian quadrature

We can also use [Gaussian quadrature](https://en.wikipedia.org/wiki/Gaussian_quadrature) to approximate the integral with respect to the innovation vector. 
Once again, let's first fix the value of the state vector to $s_m$. Conditional on this value, the expectation with respect to the innovation vector is approximated by:

$$ \mathbf{E}_{\varepsilon} g(s_m,  \epsilon) \approx \sum_{i=1}^{N} w_{i}  g(s_m,  \epsilon^{(i)}) $$

If we set $w_i = \frac{1}{n}$ and if we use random and independent draws for $\epsilon^{(i)}$, we are back to the Monte Carlo case discussed above. This observation makes us realize that we can reuse the same vectorization scheme as above, with minor modifications. More specifically, conditional on a given value for $s_m$, the expectation with respect to the innovation vector is approximated by:

$$ \mathbf{E}_{\varepsilon} g(s_m,  \epsilon) \approx \sum_{i=1}^{N} w_i g(s_m,  \epsilon^{(i)}) $$

with $w_i$ Gaussian quadrature weights and $\epsilon^{(i)}$ the corresponding Gaussian quadrature nodes.

This can be vectorized as

$$ \mathbf{1}_{w}^{T} . \begin{pmatrix} g(s_m, \epsilon^{(1)}) \\ \vdots \\ g(s_m, \epsilon^{(N)}) \end{pmatrix} $$

where $\mathbf{1}_w^{T} = (w_1, w_2, ..., w_N)$ is a $(N, 1)$ row vector.

Now, for several draws of $s_m$, we can calculate conditional means as:

$$ \begin{pmatrix} \frac{1}{N} \sum_{i=1}^{N}  g(s_1,  \epsilon^{(i)}) \\ \vdots \\ \frac{1}{N} \sum_{i=1}^{N}  g(s_m,  \epsilon^{(i)})) \end{pmatrix} = \begin{pmatrix}
\mathbf{1}_{w}^{T} & \mathbf{0} & ... & \mathbf{0}\\
\mathbf{0} & \mathbf{1}_{w}^{T} & \mathbf{0} & \mathbf{0} \\
... & \mathbf{0} & ... & ... \\
\mathbf{0} & \mathbf{0} & ... & \mathbf{1}_{w}^{T}
\end{pmatrix}  \begin{pmatrix} g(s_1,  \epsilon_1^{(i)}) \\ \vdots \\ g(s_1,  \epsilon_1^{(N)}) \\ \vdots \\ g(s_M,  \epsilon_M^{(1)}) \\ ... \\ g(s_M,  \epsilon_M^{(N)}) \end{pmatrix}$$

This is what I do in the function `evaluate_accuracy_pytorch_Gaussian` below.



In [None]:
def evaluate_accuracy_pytorch_Gaussian(neural_net, n, params, dataloader_replacement, debug=True):
    """
    Function to evaluate the accuracy using Gaussian quadrature for the expectations
    Use new draws at each call
    """
    # n: number of draws for current state
    
    with torch.no_grad():
        # To repeat vectors
        n_Monte_Carlo = len(params.weights) #length of nodes
        
        # To calculate means quickly, vectorize
        # Sparse version
        A = sparse.eye(n)
        B = sparse.csr_matrix(params.weights)
        # Sparse kronecker product. Then convert to pytorch sparse
        D = sparse.kron(A, B)
        
        # Repeat A-1 times
        rep = params.nb_agents - 1
        I_rep = sparse.csr_matrix(np.eye(rep, rep))
        D_repeated = sparse.kron(I_rep, D)
        # Convert to sparse tensor
        W = sparse_mx_to_torch_sparse_tensor(D_repeated)
        
        # State vector
        ## Get the numer batch size necessary to get n draws
        if n < params.M:
            nb_draws = params.M
        else:
            nb_draws = int(n/params.M)
        state_vec = generate_n_batches(nb_draws, dataloader_replacement) #each draw is of size M
        # Select right size
        state_vec = state_vec[:n,:]
        
        #print(state_vec.shape)
        h_matrix = state_vec[:,:-2] 
        # current value for z and delta
        tfp_vec = state_vec[:, -2]
        delta_vec = state_vec[:, -1]
          
        # Innovation vector
        #e_r = params.nodes_torch[:,0].float().repeat(n)
        #e_δ = params.nodes_torch[:,1].float().repeat(n)
        #e_ps = params.nodes_torch[:,2:].float().repeat(n, 1) #e_p1, ..., e_p2

        ## Innovation vector
        # n_Monte_Carlo for each value today
        #e_tfp, e_delta = simulate_shocks(params, n*n_Monte_Carlo)
        ## Gaussian quadrature nodes
        ## repeat to match the number of draws for the state vector
        innovation_vec =  params.nodes_torch.float().repeat(n, 1)
    
        ## Current period
        # infer sum of capital
        K_vec = torch.sum(h_matrix, 1)

        ## Exogeneous labour supply. One first period, 0, else
        l_matrix = torch.zeros_like(h_matrix)
        l_matrix[:, 0] = 1
        ## Total labour supply (useless calculations, but then we can generalize the code)
        L_vec = torch.sum(l_matrix, 1)

        r = interest_rate(K_vec, L_vec, delta_vec, tfp_vec, params)
        w = wage(K_vec, L_vec, tfp_vec, params)

        ## calculate wealth, before consumption decision made
        wealth = h_matrix*r.view(-1,1) + l_matrix*w.view(-1,1)

        ## Consumption curent period
        if debug == False:
            c = model_normalized(neural_net, wealth, params)
            ## Infer capital decision
            a = wealth - c
        else:
            a = wealth*params.mult_wealth.view(1, -1)
            c = wealth - a
       
        ## Period t+1
        ## comes from last period. But first generation has zero capital
        h_matrix_next = torch.zeros_like(a)
        # Sift by one period
        h_matrix_next[:,1:] = a[:,0:-1].clone()
        h_matrix_next = h_matrix_next.repeat_interleave(n_Monte_Carlo, dim=0) # shape (n*n_Monte_Carlo, A)

        ## Repeat values from period t to vectorize code
        c_repeated = c.repeat_interleave(n_Monte_Carlo, dim=0) # shape (n*n_Monte_Carlo, A)
        a_repeated = a.repeat_interleave(n_Monte_Carlo, dim=0) # shape (n*n_Monte_Carlo, A)
        
        # transitions of the exogenous processes
        ## No persistence here (but easy to change that)
        ## No need to repeat. Already rigth shape
        tfp_vec_next = innovation_vec[:, -2]
        delta_vec_next = innovation_vec[:, -1]

        K_vec_next = torch.sum(h_matrix_next, 1)

        ## Exogeneous labour supply. One first period, 0, else
        l_matrix_next = torch.zeros_like(h_matrix_next)
        l_matrix_next[:, 0] = 1
        ## Total labour supply (useless calculations, but then we can generalize the code)
        L_vec_next = torch.sum(l_matrix_next, 1)

        ## get factor prices (wages and interest rate)
        r_next = interest_rate(K_vec_next, L_vec_next, delta_vec_next, tfp_vec_next, params)
        w_next = wage(K_vec_next, L_vec_next, tfp_vec_next, params)

        ## calculate wealth, before consumption decision made
        wealth_next = h_matrix_next*r_next.view(-1,1) + l_matrix_next*w_next.view(-1,1)

        ## Consumption curent period
        if debug == False:
            c_next = model_normalized(neural_net, wealth_next, params)
        else:
            a_next = wealth_next*params.mult_wealth.view(1, -1)
            c_next = wealth_next - a_next

        # Each column is the euler equation for one agent
        # rows are observations
        #s = c_next[:, 1:params.nb_agents].shape
        #print(f"shape c next: {s}")
        u_prime_next = params.u_prime(c_next)
        #u_prime_next = c_next**(-params.gamma) 

        # Calculate beta (u'-1){E[r_{t+1} u'(c_{t+1})]}
        vals = u_prime_next[:, 1:params.nb_agents]*r_next.view(-1,1)
        #print(vals.shape)
        # Reshape matrix (MN, nb_agents) to a single column array of size (nb_agents*MN, 1)
        # First column, then second column, the third column, and so on..
        vals_reshaped = vals.t().contiguous().view(-1, 1)
        
        #print(vals_reshaped.shape)
        #print(W.shape)
        #torch.sparse.mm(W, vals_reshaped)
        u_prime_inverse = (params.beta*torch.sparse.mm(W, vals_reshaped))**(-1.0/params.gamma)
        #print(u_prime_inverse.shape)
        
        # Euler equation error
        c_reshaped = c[:, 0:params.nb_agents-1].t().contiguous().view(-1, 1)
        EEE = (u_prime_inverse/c_reshaped) - 1
        
    return EEE.numpy()

In [None]:
def compute_grad_norm(parameters, norm_type=2.0):
    """
    Compute norm over gradients of model parameters.

    :param parameters:
        the model parameters for gradient norm calculation. Iterable of
        Tensors or single Tensor
    :param norm_type:
        type of p-norm to use

    :returns:
        the computed gradient norm
    """
    if isinstance(parameters, torch.Tensor):
        parameters = [parameters]
    parameters = [p for p in parameters if p is not None and p.grad is not None]
    total_norm = 0
    for p in parameters:
        param_norm = p.grad.data.norm(norm_type)
        total_norm += param_norm.item() ** norm_type
    return total_norm ** (1.0 / norm_type) 

In [None]:
def create_optimizer(model, optimizer_name, lr, momentum):
    """
    Function to create an optimizer
    """
    if optimizer_name == "Adam":
        #optimizer = torch.optim.Adam(model.parameters(), lr=lr, eps=1e-07, betas=(0.9, 0.999)) 
        optimizer = torch.optim.Adam(model.parameters(), lr=lr) 
    elif optimizer_name == "SGD":
        optimizer = torch.optim.SGD(model.parameters(), lr)
    elif optimizer_name == "SGD-momentum":
        optimizer = torch.optim.SGD(model.parameters(), lr, momentum)
    elif optimizer_name == "Adadelta":
        optimizer = torch.optim.Adadelta(model.parameters(), lr)
    elif optimizer_name == "RMSprop":
        optimizer = torch.optim.RMSprop(model.parameters(), lr)
    #elif optimizer_name == "LBFGS":
    #    torch.optim.LBFGS(model.parameters(), lr=lr)
    else:
        raise NameError(f"optimizer {optimizer_name} unknown")
    return optimizer


## Variance estimation functions

In [None]:
#Given a model, calculate the current variance of the loss
#Brute force
def calculate_variance_loss_model(model, params, nb_draws_loss):
    model.eval() #eval mode
    with torch.no_grad():        
        Xms = torch.zeros(nb_draws_loss)
        # Loop over realizations of loss function
        for (j_index, j) in enumerate(range(0, nb_draws_loss)):
            Xms[j] = Ξ_torch_MC(model, params)
        # Calculate mean and variance:
        var_loss = torch.var(Xms)
        mean_loss = torch.mean(Xms)
    model.train() #train mode
    return var_loss, mean_loss

#Given a model, calculate the current variance of the loss
"""
def calculate_variance_loss_model_grid(model, params, nb_draws_loss, grid_N, grid_M):
    var_loss = torch.zeros(len(grid_N)) #to store results
    # Loop over choice of N and M
    for (ind, (N_chosen, M_chosen)) in enumerate(zip(grid_N, grid_M)):
        # Change M and N
        params_local = MyParams(int(N_chosen), int(M_chosen), params.lr, params.pre_train_model,
                  params.nb_epochs, params.bc, params.order_gauss,
                  params.σ_shocks, params.use_Sobol, params.optimizer, 
                  params.dim_p, params.grid_depth, 
                  params.nb_refinements, params.surplus_threshold, 
                  "params", params.n_points_w, params.n_points_grid,
                  params.w1, params.w2)

        var, mean = calculate_variance_loss_model(model, params_local, nb_draws_loss)
        var_loss[ind] = var
    return var_loss
"""

In [None]:
def calculate_variance_gaussian(params, neural_net, nb_draws, d_replacement, grid_M, grid_N, debug=False, tol = torch.tensor([1e-6])):
    """
    Calculate variance of the loss when joint gaussian assumption holds
    Use var(f(s_m,e^i_m))
    and cov(f(s_m,e^i_m), f(s_m,e^j_m))
    Return: variance loss function on grid, var(resid), cov(resid)
    """
    grid_T = torch.tensor(grid_M*grid_N/2)
    grid_N = torch.tensor(grid_N)
    # Calculate nb of batches required to get the right number of draws
    if nb_draws < params.M:
        nb_batches_var = params.M
    else:
        nb_batches_var = int(nb_draws/params.M)

    # Calculate variance and covariance
    with torch.no_grad(): 
        ## state vector
        state_vec = generate_n_batches(nb_batches_var, d_replacement)
        # endo state
        #h_matrix = state_vec[:,:-2] #distribution of capital
        # exo states
        #tfp_vec = state_vec[:, -2]
        #delta_vec = state_vec[:, -1]
    
        ## Get two independent innovation vectors
        e_tfp_1, e_delta_1  = simulate_shocks(params, nb_draws)
        innovation_vec_1 =  torch.column_stack((e_tfp_1, e_delta_1))
        
        e_tfp_2, e_delta_2   = simulate_shocks(params, nb_draws)
        innovation_vec_2 =  torch.column_stack((e_tfp_2, e_delta_2))
        
        # residuals for n random grid points under 2 realizations of shocks        
        R1 = Residuals_torch(neural_net, state_vec, innovation_vec_1, params, debug, tol)
        R2 = Residuals_torch(neural_net, state_vec, innovation_vec_2, params, debug, tol)

        ## Age-specific losses
        # Reshape to (MN, nb_agents)
        #R1_matrix = R1.view(params.nb_agents-1, params.MN).t()
        #R2_matrix = R2.view(params.nb_agents-1, params.MN).t()
        #R1_mean = torch.mean(R1_matrix, axis=0) #mean by age group
        #R2_mean = torch.mean(R2_matrix, axis=0) #mean by age group
        # Construct combinations
        # mean
        ##mean_val = 0.5*torch.mean(R1_mean) + 0.5*torch.mean(R2_mean)
        ## Var
        #var_R1 = torch.var(R1_matrix, axis=0) #var by age group
        #var_R2 = torch.var(R2_matrix, axis=0) #var by age group
        #var_val = 0.5*var_R1 + 0.5*var_R2

        # Construct combinations
        # mean
        mean_val = 0.5*torch.mean(R1) + 0.5*torch.mean(R2)
        
        ## Var
        var_R1 = torch.var(R1)
        var_R2 = torch.var(R2)
        var_val = 0.5*var_R1 + 0.5*var_R2
        
        ## Cov
        cov_val = torch.cov(torch.column_stack((R1, R2)).T)[0,1]
        
        var_L = (1/(grid_T*(grid_N - 1)))*((grid_N**2 - 3*grid_N + 3)*(cov_val**2) + (2*(grid_N - 2)*cov_val + var_val)*var_val + 2*(grid_N - 1)*(var_val + (grid_N - 1)*cov_val)*(mean_val**2))
        
        return var_L, var_val, cov_val
    
def calculate_variance_loss_fast(params, neural_net, nb_draws, d_replacement, grid_M, grid_N, debug=False, tol = torch.tensor([1e-6])):
    """
    Calculate variance of the loss using proposition appendix
    Use four independent shocks 
    """
    grid_T = torch.tensor(grid_M*grid_N/2)
    grid_N = torch.tensor(grid_N)
    
    # Calculate nb of batches required to get the right number of draws
    if nb_draws < params.M:
        nb_batches_var = params.M
    else:
        nb_batches_var = int(nb_draws/params.M)
        
    # Calculate variance and covariance
    with torch.no_grad():            
        # state vector
        state_vec = generate_n_batches(nb_batches_var, d_replacement)

        # innovation vectors
        e_tfp_1, e_delta_1  = simulate_shocks(params, nb_draws)
        innovation_vec_1 =  torch.column_stack((e_tfp_1, e_delta_1))
        
        e_tfp_2, e_delta_2   = simulate_shocks(params, nb_draws)
        innovation_vec_2 =  torch.column_stack((e_tfp_2, e_delta_2))
        
        e_tfp_3, e_delta_3  = simulate_shocks(params, nb_draws)
        innovation_vec_3 =  torch.column_stack((e_tfp_3, e_delta_3))
        
        e_tfp_4, e_delta_4   = simulate_shocks(params, nb_draws)
        innovation_vec_4 =  torch.column_stack((e_tfp_4, e_delta_4))
        
        # residuals for n random grid points under 2 realizations of shocks
        R1 = Residuals_torch(neural_net, state_vec, innovation_vec_1, params, debug, tol)
        R2 = Residuals_torch(neural_net, state_vec, innovation_vec_2, params, debug, tol)
        R3 = Residuals_torch(neural_net, state_vec, innovation_vec_3, params, debug, tol)
        R4 = Residuals_torch(neural_net, state_vec, innovation_vec_4, params, debug, tol)

        # Construct combinations
        R1_R2 = R1*R2
        R1_R3 = R1*R3
        R1_R4 = R1*R4
        R2_R3 = R2*R3
        R2_R4 = R2*R4
        R3_R4 = R3*R4

        # Variance cross
        var_R1_R2 = torch.var(R1_R2)

        # Co-variances with one shared element
        cov_R1R2_R1R3 = torch.cov(torch.column_stack((R1_R2, R1_R3)).T)[0,1]

        # Covariances with four different terms
        cov_R1R2_R3R4 = torch.cov(torch.column_stack((R1_R2, R3_R4)).T)[0,1]

        var_L = (1/(grid_T*(grid_N - 1)))*(var_R1_R2 + 2*(grid_N - 2)*(cov_R1R2_R1R3) + 2*((grid_N*(grid_N - 1)/4) - grid_N + 3/2)*(cov_R1R2_R3R4))

    return var_L

## Logging and other utilities

In [None]:
def numpy_flat(a):
    """
    Function to flatten a list
    """
    return list(np.array(a).flat)

In [None]:
def getSystemInfo():
    """
    Get info on computer hardware
    """
    try:
        info={}
        info['platform']=platform.system()
        info['platform-release']=platform.release()
        info['platform-version']=platform.version()
        info['architecture']=platform.machine()
        info['hostname']=socket.gethostname()
        info['ip-address']=socket.gethostbyname(socket.gethostname())
        info['mac-address']=':'.join(re.findall('..', '%012x' % uuid.getnode()))
        info['processor']=platform.processor()
        info['ram']=str(round(psutil.virtual_memory().total / (1024.0 **3)))+" GB"
        return json.dumps(info)
    except Exception as e:
        logging.exception(e)