In [2]:
## Preamble: Package Loading
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import gridspec
from mpl_toolkits.mplot3d import Axes3D
import pandas as pd
import itertools as iter
import math 
import sys

<h2> Panel Selection and Control: Monte Carlo Data Generating Process  </h2> 

<h3> 1.0 DGP Description </h3>

Here I decribe the manner in which the data used in the monte carlo simulation is generated. 

<h3> 1.1 Error, Instrument, and Exogenous Variable Generation </h3>

Let; $n_{tp} \equiv T$ be the total number of time periods, $n_{end} \equiv p_1$ be the number of endogneous regressors included in the primary regression, $n_{exo} \equiv p_2$ be the number of exogenous regressors included in the primary regression, and $n_{tinst} \equiv w$, $ n_{cinst} \equiv w_j$ be the total number of available instruments and the number of instruments relevant to each crossection respectively. Now let,

$$
\begin{align*} 
\rho_{er} &= \begin{bmatrix} \rho_{er,1} & \rho_{er,2} & \cdots & \rho_{er,n_{end}} \end{bmatrix} \\
\rho_{inst} &= \begin{bmatrix} \rho_{inst,1} & \rho_{inst,2} & \cdots & \rho_{inst,n_{inst}-1} \end{bmatrix}\\
\rho_{ex} &= \begin{bmatrix} \rho_{ex,1} & \rho_{ex,2} & \cdots & \rho_{ex,n_{ex}-1} \end{bmatrix}  
\end{align*}
$$

So that I can define the covariance matrices for *each* cross section as follows 

$$
\begin{align*}
v_{er} &= \begin{bmatrix} 
1 & \rho_{er,1} & \rho_{er,2} & \cdots & \rho_{er,n_{end}} \\
\rho_{er,1} & 1  & \rho_{er,1} &\cdots & \rho_{er,n_{end}-1} \\
\rho_{er,2} & \rho_{er,1} & 1 & \cdots & \rho_{er,n_{end}-2} \\
\vdots & &&\ddots&  \\
 \rho_{er,n_{end}} & \rho_{er,n_{end}-1} & \rho_{er,n_{end}-2} & \cdots &  1 
\end{bmatrix}
%
\hspace{1cm} 
%
v_{inst} = \begin{bmatrix} 
1 & \rho_{inst,1} & \rho_{inst,2} & \cdots & \rho_{inst,n_{tinst}-1} \\
\rho_{inst,1} & 1  & \rho_{inst,1} &\cdots & \rho_{inst,n_{tinst}-2} \\
\rho_{inst,2} & \rho_{tinst,1} & 1 & \cdots & \rho_{inst,n_{tinst}-3} \\
\vdots & &&\ddots&  \\
 \rho_{inst,n_{tinst}-1} & \rho_{inst,n_{tinst}-2} & \rho_{inst,n_{tinst}-3} & \cdots &  1 
\end{bmatrix}
%
\\[15pt]
%
v_{ex} &= \begin{bmatrix} 
1 & \rho_{ex,1} & \rho_{ex,2} & \cdots & \rho_{ex,n_{ex}-1} \\
\rho_{ex,1} & 1  & \rho_{ex,1} &\cdots & \rho_{ex,n_{ex}-2} \\
\rho_{ex,2} & \rho_{ex,1} & 1 & \cdots & \rho_{ex,n_{ex}-3} \\
\vdots & &&\ddots&  \\
 \rho_{ex,n_{ex}-1} & \rho_{ex,n_{ex}-2} & \rho_{ex,n_{ex}-2} & \cdots &  1 
\end{bmatrix}
\end{align*} 
$$

As a result we can construct the covariance matrices for all cross sections,

$$
\begin{align*}
V_{er} &= 
\begin{bmatrix}
v_{er} & \mathbf{0}_{(n_{end}+1 \times n_{end}+1)} & \cdots & \mathbf{0}_{(n_{end}+1 \times n_{end}+1)}  \\
\mathbf{0}_{(n_{end}+1 \times n_{end}+1)} & v_{er} & \cdots & \mathbf{0}_{(n_{end}+1 \times n_{end}+1)}  \\
\vdots & \vdots & \ddots & \vdots \\
\mathbf{0}_{(n_{end}+1 \times n_{end}+1)} & \mathbf{0}_{(n_{end}+1 \times n_{end}+1)} & \cdots & v_{er}
\end{bmatrix} 
%
\hspace{1cm}
%
V_{inst} = 
\begin{bmatrix}
v_{inst} & \mathbf{0}_{(n_{tinst} \times n_{tinst})} & \cdots & \mathbf{0}_{(n_{tinst} \times n_{tinst})}  \\
\mathbf{0}_{(n_{tinst} \times n_{tinst})} & v_{inst} & \cdots & \mathbf{0}_{(n_{tinst} \times n_{tinst})}  \\
\vdots & \vdots & \ddots & \vdots \\
\mathbf{0}_{(n_{tinst} \times n_{tinst})} & \mathbf{0}_{(n_{tinst} \times n_{tinst})} & \cdots & v_{inst}
\end{bmatrix} \\[15pt]
%
V_{ex} & = 
\begin{bmatrix}
v_{ex} & \mathbf{0}_{(n_{ex} \times n_{ex})} & \cdots & \mathbf{0}_{(n_{ex} \times n_{ex})}  \\
\mathbf{0}_{(n_{ex} \times n_{ex})} & v_{ex} & \cdots & \mathbf{0}_{(n_{ex} \times n_{ex})}  \\
\vdots & \vdots & \ddots & \vdots \\
\mathbf{0}_{(n_{ex} \times n_{ex})} & \mathbf{0}_{(n_{ex} \times n_{ex})} & \cdots & v_{ex}
\end{bmatrix} 
\end{align*}
$$


<h3> 2.0 Block Diagonal Matrix Function </h3>

In order to facilitate construction of the block diagonal matrices discussed above, I define a following function

In [3]:
def blkdiag(mat,nb):
    """
INPUTS
mat     Square Matrix which will form the block in a block diagonal matrix
nb      Number of diagonal block in output matrix 

OUTPUT
v       Block diagonal matrix of dimension ( nb*mat.shape[0] x nb*mat.shape[0] )
    """
    # Initializing the varcov matrix for all crosssections
    v = np.hstack((mat,np.zeros((mat.shape[0],(nb-1)*(mat.shape[1])))))
    # Registry matrix used in following loops
    vreg = np.eye(nb-1)
    for j in np.arange(nb-1):
        # Initializing current block of rows 
        pv = np.zeros((mat.shape[0],mat.shape[0]))
        # Horiz Stacking either zeros or var_err depending on ve_reg[j,i]
        for i in np.arange(nb-1):
            if vreg[j,i] == 1: # Stack var_err onto pv_err
                pv = np.hstack((pv,mat))
            if vreg[j,i] == 0: # Stack zeros onto pv_err
                pv = np.hstack((pv,np.zeros((mat.shape[0],mat.shape[0]))))
        # Vertically stacking block rows on top of one another
        v = np.vstack((v,pv))
    return v

<h3> 2.1 Block Diagonal Matrix Function Demonstration </h3>

In [4]:
blkdiag(np.ones((3,3)),4)

array([[ 1.,  1.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 1.,  1.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 1.,  1.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  1.,  1.,  1.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  1.,  1.,  1.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  1.,  1.,  1.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  1.,  1.,  1.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  1.,  1.,  1.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  1.,  1.,  1.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  1.,  1.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  1.,  1.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  1.,  1.]])

<h3> 3.0 DGP Generation Code </h3>

What follows is the function used to generate all monte carol simulation data sets. 

In [5]:
def psc_dgp(inpt_d): 
    '''
PURPOSE: 
Generate Draws from the dgp detailed in penner(2018) 'Panel Selection and Control'

INPUTS: 
inpt_d      Dictionary with the following keyword items
 r_seed      Integer random number generator seed
 nds         Number of generated data sets (int)
 ntp         Number of time periods (int)
 ncs         Number of cross sections (int)
 n_end       Number of endogenous variables in primary regression (int)
 n_exo       Number of exogenous variables in primary regression (int)
 t_inst      Number of total instruments available to all cross sections (int)
 c_inst      Number of valid instrument per cross section (int < t_inst )
 frc         Indicator for forcing control function to be other than correlation based
 sec_pan     Indicator for whether secondary equation is panel (=1) or not (=0)
 ex_vpro     List of exogenous regr covariances where cov(Z21,i , Z21,(i+j)) = ex_vpro[j-1] 
 inst_vpro   List of instrument covariances where cov(Wi , W(i+j)) = inst_vpro[j-1] 
 err_vpro    List of error covariances where cov(V1,i , V1,(i+j)) = err_vpro[j-1] 
 
OUPUTS: 
data_sets                      List with the following elements
 data_sets[0][0]                  dictionary inpt_d  returned
 data_sets[0][1]['Derr_nms']      list of column names for each error term df
 data_sets[0][1]['Dins_nms']      list of column names for each instrument df
 data_sets[0][1]['Dlng_nms']      list of column names for each long df
 data_sets[i][0]['err_df']        ith error term data array in list form
 data_sets[i][0]['prim_df']       ith primary regression varibles data array in list form
 data_sets[i][0]['inst_df]        ith instruments data array in list form    
    '''
    # Extracting all input variables from inpt_d dictionary
    r_seed = inpt_d['r_seed']
    nds = inpt_d['nds']
    ntp = inpt_d['ntp']
    ncs = inpt_d['ncs']
    n_end = inpt_d['n_end']
    n_exo = inpt_d['n_exo']
    t_inst = inpt_d['t_inst']
    c_inst = inpt_d['c_inst']
    frc = inpt_d['frc']
    sec_pan = inpt_d['sec_pan']
    ex_vpro = inpt_d['ex_vpro']
    inst_vpro = inpt_d['inst_vpro']
    err_vpro = inpt_d['err_vpro']
    np.random.seed([r_seed])

    ## Coefficients on instruments in secondary equation
    if sec_pan == 0:
        # Not panel so coefficients have seperate coeff vector of length c_inst
        icoeffs_reg = list(iter.product([-1,1],repeat = c_inst))
        # All permutation of ncs pairs of n_end coeff vectors on t_inst instruments  
        picfs = list(iter.permutations(range(0,len(icoeffs_reg)),n_end))
        # Rand choosing 1 (if panel) or ncs (if !panel) coeff for ex regress in secondary reg.
        icfs = [ picfs[i] for i in np.random.randint(len(picfs),size = ncs)]
    else: 
        # Is panel so coefficients have common vector of length t_inst
        icoeffs_reg = list(iter.product([-1,1],repeat = t_inst))
        # All permutation of ncs pairs of n_end coeff vectors on t_inst instruments 
        if len(icoeffs_reg) < 99:
            picfs = list(iter.permutations(range(0,len(icoeffs_reg)),n_end))
            # Rand choosing 1 (if panel) coeff for ex regress in secondary reg.
            icfs = [ picfs[i] for i in np.random.randint(len(picfs),size =1)]*ncs
        else:
            icfs = [tuple(np.random.randint(len(icoeffs_reg),size =n_end))]*ncs

    ## Assignment of the relvant instruments to each cross section.
    # Registry of instrument assignments
    insts_reg = list(iter.combinations(np.arange(1,t_inst+1),c_inst))
    # List of which instruments (col #'s) are relevant for each crossection 
    icr = [insts_reg[np.random.randint(len(insts_reg))] for i in range(ncs)]

    ## Coefficients on exogenous variables in secondary equations       
    # Collection of all Coefficients on Exogenous Variables in secondary eqns
    excoeffs_reg = list(iter.product([-1,1],repeat = n_exo))
    # All permutation of combinations of ncs coeff vectors on  for ex regressors 
    pxcfs = list(iter.permutations(range(0,len(excoeffs_reg)),n_end))
    # Rand choosing 1 (if panel) or ncs (if !panel) coeff for ex regress in secondary reg.
    if sec_pan == 0:
        # Not panel so rand choosing ncs collections of n_exo reg numbers from pxcfs
        xcfs = [ pxcfs[i] for i in np.random.randint(len(pxcfs),size = ncs)]
    else:
        # Is panel so need only 1 collection of n_exo reg numbers from pxcfs & duplicating
        xcfs = [ pxcfs[i] for i in np.random.randint(len(pxcfs),size = 1)]*ncs

    ## Generation of the coefficient matrix for secondary regression.             
    # Initializing Coefficient Matrix            
    coeff = np.zeros((n_end,ncs,t_inst+n_exo))
    for j in range(n_end):
        for i in range(ncs):
            for k in range(n_exo):
                coeff[j,i,k] = excoeffs_reg[xcfs[i][j]][k]
            if sec_pan == 0:
                for l in range(c_inst):
                    k = icr[i][l]
                    coeff[j,i,k+n_exo-1] = icoeffs_reg[icfs[i][j]][l]   
            else: 
                for l in icr[i]:
                    coeff[j,i,l+n_exo-1] = icoeffs_reg[icfs[i][j]][l-1] 

    ## Joint Distribution of Exogenous regressors
    # Vector of Means (=0)
    mu_ex = np.zeros(n_exo)
    # Diagonal matrix of variaces (=1)
    var_ex = np.eye(n_exo)
    # Variance Covariance Matrix Generation or EACH crossection
    for i in np.arange(len(ex_vpro)):
        var_ex = (var_ex + ex_vpro[i]*np.eye(n_exo,k=i+1)
                             + ex_vpro[i]*np.eye(n_exo,k=-(i+1))) 

    ## Joint Distribution of Instruments for all cross sections
    # Vector of Means (=0)
    mu_inst = np.zeros(t_inst)
    # Diagonal Matrix of Variaces (=1)
    var_inst = np.eye(t_inst)
    # Variance Covariance Matrix Generation
    for i in np.arange(len(inst_vpro)):
        var_inst = (var_inst + inst_vpro[i]*np.eye(t_inst,k=i+1) 
                             + inst_vpro[i]*np.eye(t_inst,k=-(i+1)))

    ## Joint Distribution of Error Terms for EACH crossection
    # Vector of means
    mu_err = np.zeros(n_end+1)
    # Diagonal Matrix of Variances 
    var_err = np.eye(n_end+1)
    # Variance Covariance Matrix Generation
    if frc == 0 : 
        # Var Cov matrix for correlated errors ==> additive linear control functions 
        for i in np.arange(len(err_vpro)):
            var_err = (var_err + err_vpro[i]*np.eye(n_end+1,k=i+1) 
                                 + err_vpro[i]*np.eye(n_end+1,k=-(i+1)))
    else: 
        # Error not explicitly correlated ==> have to force control functions.
        var_err = np.eye(n_end+1)

    # Error term mean vector for ALL Crossections
    Mu_err = np.tile(mu_err,ncs)        
    # Error term variance covariance matrix for ALL Crossections
    V_err = blkdiag(var_err,ncs)

    # Exogenous regressor mean vector for ALL Crossections
    Mu_ex = np.tile(mu_ex,ncs) 
    # Exogenous regressor variance covariance matrix for ALL Crossections
    V_ex = blkdiag(var_ex,ncs)

    ## Variable Name Generation
    # exogenous variable name generation 
    #          [Z21,1 , Z21,2 ......, Z22,1 , Z22,2 , ..... ]        
    ex_nms = [''.join(['Z2',str(i),',',str(j)]) 
              for i in list(range(1,ncs+1)) 
              for j in list(range(1,n_exo+1))]
    # instruments names generation 
    #          [W1 , W2 , .... ]
    inst_nms = [''.join(['W',str(i)]) for i in list(range(1,t_inst+1))]
    # Error terms names generation
    #          [V1,1 , V1,2 , .... ,e1 , V2,1 , V2,2 , ...... e2 , ......]
    err_nm1 = ['e' if val == n_end+1 else 'V' for val in  list(range(1,n_end+2))*ncs]
    err_nm2 = [ str(i) for y in range(1,ncs+1) for i in iter.repeat(y,n_end+1)]
    err_nm3 = ['' if val == n_end+1 else ''.join([',',str(val)]) 
               for val in list(range(1,n_end+2))*ncs]
    err_nm  = [''.join([err_nm1[i],err_nm2[i],err_nm3[i]]) for i in range(len(err_nm1))]

    # Initializing the data sets list
    data_sets = []

    for k in range(nds):
        ## Variable Generation
        time = np.arange(1,ntp+1).reshape(ntp,1)
        # Exogenous Regressor Generation
        Ex = np.random.multivariate_normal(Mu_ex,V_ex,ntp)
        Ex = np.hstack((time,Ex))
        # Instruments Generation
        Inst = np.random.multivariate_normal(mu_inst,var_inst,ntp)
        Inst = np.hstack((time,Inst))
        # Error Terms Generation 
        Err = np.random.multivariate_normal(Mu_err,V_err,ntp)
        Err = np.hstack((time,Err))
        ## Data Frame Generation 
        Ex_df = pd.DataFrame(Ex,columns = ['t'] + ex_nms)
        Inst_df = pd.DataFrame(Inst, columns = ['t'] + inst_nms)
        Err_df = pd.DataFrame(Err,columns = ['t'] + err_nm)

        ## Generating Endogenous (primary) regressors
        for j in range(n_end):
            for i in range(ncs):
                # Regular expression for the relevant exogenous regressors
                ex_pat = ''.join(['^Z2',str(i+1),','])
                # Regular expression for the relevant error term. 
                err_pat = ''.join(['V',str(i+1),',',str(j+1)])
                # Extracting exog regresors converting to numpy array
                pe1 = pd.concat([Ex_df.filter(regex = ex_pat),Inst_df.iloc[:,1:]], axis = 1).values
                # Extracting error variable and converting to numpy array
                pe2 = Err_df.filter(regex = err_pat).values
                # Calculating the endogenous primary regressor
                pe = pe1.dot(coeff[j,i,:]).reshape(pe1.shape[0],1) + pe2
                # Constructing the appropriate name for the endo regressor
                end_nm = ''.join(['Z1',str(i+1),',',str(j+1)])
                if j == 0 and i == 0:
                    # Initializing the endog df with first calculated regressor
                    End_df = pd.DataFrame(pe,columns = [end_nm])
                else:
                    # Adding calculated endog regressor onto df
                    End_df[end_nm] = pe

        ## Generation primary regression coefficient vector
        # Common Primary Coeff Vector
        p_commoncf = np.array(([1,-1]*10)[:n_end+n_exo]).reshape(n_end+n_exo,1)
        # Fixed Effect for each crossection
        fe = [ 1+x/2 for x in np.arange(0,ncs)]

        ## Generation of primary regressand
        for i in range(ncs):
            # Regular expression for the relevant endogenous regressors
            en_pat = ''.join(['^Z1',str(i+1),','])
            # Regular expression for the relevant exogenous regressors
            ex_pat = ''.join(['^Z2',str(i+1),','])
            # Name of apporpriate primary error term
            er_nm = ''.join(['e',str(i+1)])
            # Extracting appropriate regressor for primary equation
            pr3 = pd.concat([End_df.filter(regex = en_pat),
                             Ex_df.filter(regex = ex_pat)], axis = 1).values
            # Extracting appropriate error term
            pr4 = Err_df[er_nm].values.reshape(Err_df.shape[0],1)
            # Generating primary regressand
            if frc == 0:
                # if no forcing no need to include control functions explicity
                pr = fe[i] + pr3.dot(p_commoncf)+pr4
            else:
                # need to include control functions explicitly
                # (inprogress)
                pass
            # Constructing the appropriate name for the endo regressor
            pr_nm = ''.join(['Y',str(i+1)])
            if i == 0:
                # Initializing the regressand df 
                Pr_df = pd.DataFrame(pr,columns = [pr_nm])   
            else:
                # Adding generated regressand to df
                Pr_df[pr_nm] = pr

        ## Converting Data To Long Panel Type
        for i in range(ncs):
            # Initializing temporary df
            pL = None
            # Columns Names for endogeneous regressors
            Z1_nm = [ ''.join(['Z1',',',str(j)]) for j in range(1,n_end+1)]
            # Columns Names for exogenous regressors
            Z2_nm = [ ''.join(['Z2',',',str(j)]) for j in range(1,n_exo+1)]
            # Adding regressand columns to pL
            pL = pd.DataFrame(Pr_df[''.join(['Y',str(i+1)])].values,columns = ['Y'])
            # Adding endog regressors to pL
            pL = pd.concat([pL,pd.DataFrame(
                             End_df.filter(regex = ''.join(['^Z1',str(i+1),','])).values
                             ,columns = Z1_nm)],axis = 1)
            # Adding exog regressors to pL
            pL = pd.concat([pL,pd.DataFrame(
                             Ex_df.filter(regex = ''.join(['^Z2',str(i+1),','])).values
                             ,columns = Z2_nm)],axis = 1)
            # Adding the crossection variable
            pL['crs'] = i+1
            # Adding the time component variable
            pL['t'] = pd.DataFrame(np.arange(1,ntp+1).reshape(ntp,1))
            if i == 0 :
                # Initializing Data_long
                Data_long = pL
            else:
                # Adding pL to the bottom of Data_long
                Data_long = pd.concat([Data_long,pL], axis = 0)

        # Sorting Data_long by column name      
        Data_long = Data_long[list(Data_long.columns)[-2:] + list(Data_long.columns)[:-2]]

        # Extract names in Df_s only once
        if k == 0:
            # Names for export data sets
            Dlng_nms = list(Data_long.columns)
            Dins_nms = list(Inst_df.columns)
            Derr_nms = list(Err_df.columns)
            nms_cfs = {}
            nms_cfs['Dlng_nms'] = Dlng_nms
            nms_cfs['Dins_nms'] = Dins_nms
            nms_cfs['Derr_nms'] = Derr_nms
            nms_cfs['coeff'] = coeff.tolist()
            nms_cfs['var_inst'] = var_inst.tolist()
            nms_cfs['V_ex'] = V_ex.tolist()
            nms_cfs['V_err'] = V_err.tolist() 
            data_sets.append([inpt_d , nms_cfs])

        # Adding constructed data sets to data_sets list
        c = {}
        c['err_df'] = np.array(Err_df).tolist()
        c['prim_df'] = np.array(Data_long).tolist()
        c['inst_df'] = np.array(Inst_df).tolist()
        data_sets.append([c])
        
    return data_sets

<h3> DGP Inputs </h3>

In [6]:
%%time
# Setting a Seed
r_seed = 199
# Number of Time Periods
ntp = 10
# Number of Cross-Sections
ncs = 4
# Number of Endogenous Variables in Primary Equation
n_end = 3
# Number of Endogenous Variables in Primary Equation
n_exo = 2
# Total Number of Instruments
t_inst = 10
# Number of Instruments per Crossectiona
c_inst = 8
# Indicator for whether to force additive non linear cntrl function. 1 = yes 
frc = 0
# Indicator whether the secondary regression data is panel or not. 
sec_pan = 1
# number of data sets
nds = 1000
# Vector of exog off diagonal covariances i.e. cov(Z2t_l,Z2t_(l+j)) = ex_vpro[j-1]
ex_vpro = [0.5]
# Vector of inst off diagonal covariances i.e. cov(Wt_l,Wt_(l+j)) = inst_vpro[j-1]
inst_vpro = [0.5 , 0.25]
# Vector of error off diagonal covariances i.e. cov(Wt_l,Wt_(l+j)) = inst_vpro[j-1]
err_vpro = [0.8 , 0.36 ]
# Construction of the input dictionary
inpt_d = {'r_seed':r_seed,'nds': nds, 'ntp':ntp , 'ncs': ncs , 'n_end':n_end,
             'n_exo': n_exo, 't_inst':t_inst , 'c_inst': c_inst,
             'frc':frc, 'sec_pan': sec_pan, 'ex_vpro': ex_vpro,
             'inst_vpro': inst_vpro, 'err_vpro':err_vpro }
# Function call
psc_data = psc_dgp(inpt_d)

CPU times: user 1min 29s, sys: 1.76 s, total: 1min 30s
Wall time: 48 s


<h3> DGP JSON encoding and saving </h3>

In [7]:
#output_filename = 'pscdata_5_17_1.json'
#with open(output_filename, 'w') as f_obj:
# json.dump(psc_data, f_obj)

<h3> Data set meta data dictionary </h3>

In [8]:
psc_data[0][0]

{'c_inst': 8,
 'err_vpro': [0.8, 0.36],
 'ex_vpro': [0.5],
 'frc': 0,
 'inst_vpro': [0.5, 0.25],
 'n_end': 3,
 'n_exo': 2,
 'ncs': 4,
 'nds': 1000,
 'ntp': 10,
 'r_seed': 199,
 'sec_pan': 1,
 't_inst': 10}

<h3> Error term variance covariance matrix </h3>

<h3> Coefficient vector for regression of $Z_{1j}$ on $[Z_{2j} , W ]$ </h3>

In [9]:
c1 = [''.join(['a11,',str(i)]) for i in range(1,n_exo+1)]
for i in range(1,t_inst+1):
    c1.append(''.join(['a12,',str(i)]))

pd.DataFrame(np.array(psc_data[0][1]['coeff'][0]),columns = c1)

Unnamed: 0,"a11,1","a11,2","a12,1","a12,2","a12,3","a12,4","a12,5","a12,6","a12,7","a12,8","a12,9","a12,10"
0,-1.0,1.0,-1.0,1.0,1.0,1.0,0.0,1.0,-1.0,-1.0,0.0,1.0
1,-1.0,1.0,-1.0,0.0,0.0,1.0,-1.0,1.0,-1.0,-1.0,-1.0,1.0
2,-1.0,1.0,-1.0,0.0,1.0,1.0,-1.0,1.0,-1.0,-1.0,-1.0,0.0
3,-1.0,1.0,-1.0,1.0,1.0,0.0,-1.0,1.0,-1.0,0.0,-1.0,1.0


<h3> Coefficient vector for regression of $Z_{2j}$ on $[Z_{2j} , W ]$ </h3>

In [10]:
c2 = [''.join(['a21,',str(i)]) for i in range(1,n_exo+1)]
for i in range(1,t_inst+1):
    c2.append(''.join(['a22,',str(i)]))

pd.DataFrame(np.array(psc_data[0][1]['coeff'][1]),columns = c2)

Unnamed: 0,"a21,1","a21,2","a22,1","a22,2","a22,3","a22,4","a22,5","a22,6","a22,7","a22,8","a22,9","a22,10"
0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0
1,1.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,-1.0,1.0
2,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,-1.0,0.0
3,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,-1.0,1.0


In [11]:
np.array(psc_data[0][1]['V_err']).shape

(16, 16)

<h3> Instruments variance covariance matrix </h3>

In [12]:
np.array(psc_data[0][1]['var_inst'])

array([[ 1.  ,  0.5 ,  0.25,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ],
       [ 0.5 ,  1.  ,  0.5 ,  0.25,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ],
       [ 0.25,  0.5 ,  1.  ,  0.5 ,  0.25,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ],
       [ 0.  ,  0.25,  0.5 ,  1.  ,  0.5 ,  0.25,  0.  ,  0.  ,  0.  ,  0.  ],
       [ 0.  ,  0.  ,  0.25,  0.5 ,  1.  ,  0.5 ,  0.25,  0.  ,  0.  ,  0.  ],
       [ 0.  ,  0.  ,  0.  ,  0.25,  0.5 ,  1.  ,  0.5 ,  0.25,  0.  ,  0.  ],
       [ 0.  ,  0.  ,  0.  ,  0.  ,  0.25,  0.5 ,  1.  ,  0.5 ,  0.25,  0.  ],
       [ 0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.25,  0.5 ,  1.  ,  0.5 ,
         0.25],
       [ 0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.25,  0.5 ,  1.  ,  0.5 ],
       [ 0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.25,  0.5 ,  1.  ]])

<h3> Exogenous regressors variance covariance </h3>

In [13]:
np.array(psc_data[0][1]['V_ex']).shape

(8, 8)

<h3> Long panel of primary regression variables </h3>

In [14]:
#Reconstructed error term df
pd.DataFrame(np.array(psc_data[1][0]['prim_df']), columns = psc_data[0][1]['Dlng_nms'])

Unnamed: 0,crs,t,Y,"Z1,1","Z1,2","Z1,3","Z2,1","Z2,2"
0,1.0,1.0,3.50515,-3.198027,-5.148302,1.252921,1.191989,0.59253
1,1.0,2.0,3.716243,2.934104,1.037307,-1.671159,-0.472168,0.603311
2,1.0,3.0,4.351773,3.665093,-1.803665,-1.555457,0.127461,0.633591
3,1.0,4.0,3.178635,-0.512513,0.989708,1.097196,-0.621726,0.525024
4,1.0,5.0,21.991051,4.240302,-10.560328,6.248514,0.477654,1.042266
5,1.0,6.0,7.016897,-2.103318,-4.2104,3.185124,-1.148958,-1.488057
6,1.0,7.0,10.792104,1.653458,-3.924259,3.925782,-0.57375,0.094258
7,1.0,8.0,-8.78824,-0.701966,6.08313,-2.656891,0.62354,-0.417547
8,1.0,9.0,-6.327398,0.346419,1.897748,-5.33581,-0.083849,1.050506
9,1.0,10.0,10.47463,2.885425,0.064253,5.621979,0.646907,0.173529


<h3> Instruments data frame </h3>

In [15]:
#Reconstructed instrument df 
pd.DataFrame(np.array(psc_data[1][0]['inst_df']), columns = psc_data[0][1]['Dins_nms'])

Unnamed: 0,t,W1,W2,W3,W4,W5,W6,W7,W8,W9,W10
0,1.0,-1.097072,-1.294587,-0.784817,-0.08181,0.03006,-0.237676,0.297739,-1.18258,-1.515786,-1.797995
1,2.0,-0.910355,0.131095,-0.413752,2.163026,-1.047012,0.622015,-0.203973,0.544062,-0.281678,0.896179
2,3.0,-0.485875,-0.703691,1.065028,0.942312,1.977462,-0.309367,-2.056033,-0.226159,-0.232351,0.868076
3,4.0,0.901882,-0.766641,-0.878492,-0.399663,-0.506998,-0.226604,0.451554,-0.652288,-0.206968,0.896127
4,5.0,-1.415944,0.041724,-0.230805,-0.328809,-1.188298,-2.392957,-3.96078,-2.720006,-2.056456,0.076719
5,6.0,0.165394,-0.403599,-0.299638,-0.054435,0.328413,0.329082,1.074923,-1.740731,-0.484493,-1.151339
6,7.0,-0.290031,-0.293182,-1.223609,-0.481709,-0.126403,-0.329762,-0.133029,-1.638266,-0.34347,-1.186645
7,8.0,-0.089977,-0.080953,0.603242,0.54353,-0.315348,0.396449,0.438532,1.593711,0.936514,2.094613
8,9.0,-0.475299,-0.108072,0.779757,0.088956,0.021188,-0.724288,0.846953,1.084052,2.016628,0.094568
9,10.0,0.102052,0.618713,0.25682,0.551595,0.078519,-0.515403,-0.889415,-1.469384,-1.170668,-0.066825


<h3> Error term data frame </h3>

In [16]:
#Reconstructed error term df
pd.DataFrame(np.array(psc_data[1][0]['err_df']), columns = psc_data[0][1]['Derr_nms'])

Unnamed: 0,t,"V1,1","V1,2","V1,3",e1,"V2,1","V2,2","V2,3",e2,"V3,1","V3,2","V3,3",e3,"V4,1","V4,2","V4,3",e4
0,1.0,-0.383596,-0.754023,-0.578398,-0.098588,0.123381,-1.535627,-1.840897,-0.694376,0.181513,-0.313618,-0.223749,0.339851,1.654685,1.799467,1.28883,0.294243
1,2.0,-2.110203,-1.922131,-0.534917,1.415127,-0.516824,-1.149022,-0.886479,0.188024,1.318691,1.045885,0.185297,-0.499512,1.149023,0.11759,-1.494695,-2.457055
2,3.0,-1.471461,-1.659008,-1.460528,-1.067658,-0.210845,-0.443997,-0.699777,-0.898123,-2.155053,-2.482466,-1.681826,-0.638174,0.920401,0.283716,-0.487619,-0.923602
3,4.0,0.417157,1.760533,2.098667,1.436909,-1.086545,-0.859645,0.645572,2.113842,0.477524,-0.441164,-0.253875,0.783266,-1.461752,-1.15909,-0.937432,-1.172035
4,5.0,-1.58691,-1.14939,-0.711674,-0.622704,-1.329282,-0.912439,0.287622,1.111821,0.121185,-0.886741,-1.391542,-0.787546,0.553217,1.165308,1.192606,0.526032
5,6.0,-0.684704,0.506957,1.350963,1.06379,-1.089992,-1.512047,-0.973471,0.109881,-0.062634,-0.204503,-0.396725,-0.402296,0.171588,-1.334096,-1.786598,-1.406957
6,7.0,2.439031,2.131467,1.1474,-0.379403,-1.796945,-1.621144,-0.606196,0.615397,0.311207,0.828932,0.752252,0.469771,-0.622187,-0.120592,0.482674,0.347553
7,8.0,-1.275494,0.377988,1.05597,0.694835,-0.21742,-0.56013,-0.651177,-0.494658,0.482911,0.60286,0.551417,0.421026,0.708505,-0.745546,-1.474417,-0.752249
8,9.0,0.53685,-0.655534,-1.625999,-1.574613,0.751552,0.110438,0.072373,0.948517,-0.490491,0.326896,1.26465,1.673082,-1.237598,-1.630733,-1.784118,-1.209904
9,10.0,0.257156,0.655663,1.192035,1.504857,-1.48355,-1.162026,-0.212813,0.344378,2.970732,2.237661,0.751652,-0.342688,0.638374,0.611771,0.135478,-0.399419


In [17]:
# input_filename = 'pscdata_5_17_1.json'
# with open(input_filename) as f_obj: 
#     data_in = json.load(f_obj)