In [1]:
## Preamble: Package Loading
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import gridspec
from mpl_toolkits.mplot3d import Axes3D
import pandas as pd
import itertools as iter
import math

<h3> Block Diagonal Matrix Function </h3>

In [2]:
def blkdiag(mat,nb):
    """
INPUTS
mat     Square Matrix which will form the block in a block diagonal matrix
nb      Number of diagonal block in output matrix 

OUTPUT
v       Block diagonal matrix of dimension ( nb*mat.shape[0] x nb*mat.shape[0] )
    """
    # Initializing the varcov matrix for all crosssections
    v = np.hstack((mat,np.zeros((mat.shape[0],(nb-1)*(mat.shape[1])))))
    # Registry matrix used in following loops
    vreg = np.eye(nb-1)
    for j in np.arange(nb-1):
        # Initializing current block of rows 
        pv = np.zeros((mat.shape[0],mat.shape[0]))
        # Horiz Stacking either zeros or var_err depending on ve_reg[j,i]
        for i in np.arange(nb-1):
            if vreg[j,i] == 1: # Stack var_err onto pv_err
                pv = np.hstack((pv,mat))
            if vreg[j,i] == 0: # Stack zeros onto pv_err
                pv = np.hstack((pv,np.zeros((mat.shape[0],mat.shape[0]))))
        # Vertically stacking block rows on top of one another
        v = np.vstack((v,pv))
    return v

<h3> Block Diagonal Matrix Function Demonstration </h3>

In [83]:
blkdiag(np.ones((3,3)),4)

array([[ 1.,  1.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 1.,  1.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 1.,  1.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  1.,  1.,  1.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  1.,  1.,  1.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  1.,  1.,  1.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  1.,  1.,  1.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  1.,  1.,  1.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  1.,  1.,  1.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  1.,  1.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  1.,  1.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  1.,  1.]])

<h2> DGP Generation Code </h2>

In [84]:
%%time

def psc_dgp(inpt_d): 
    '''
PURPOSE: 
Generate Draws from the dgp detailed in penner(2018) 'Panel Selection and Control'

INPUTS: 
inpt_d      Dictionary with the following keyword items
 r_seed      Integer random number generator seed
 nds         Number of generated data sets (int)
 ntp         Number of time periods (int)
 ncs         Number of cross sections (int)
 n_end       Number of endogenous variables in primary regression (int)
 n_exo       Number of exogenous variables in primary regression (int)
 t_inst      Number of total instruments available to all cross sections (int)
 c_inst      Number of valid instrument per cross section (int < t_inst )
 frc         Indicator for forcing control function to be other than correlation based
 sec_pan     Indicator for whether secondary equation is panel (=1) or not (=0)
 ex_vpro     List of exogenous regr covariances where cov(Z21,i , Z21,(i+j)) = ex_vpro[j-1] 
 inst_vpro   List of instrument covariances where cov(Wi , W(i+j)) = inst_vpro[j-1] 
 err_vpro    List of error covariances where cov(V1,i , V1,(i+j)) = err_vpro[j-1] 
 
OUPUTS: 
data_sets                      List with the following elements
 data_sets[0][0]                  dictionary inpt_d  returned
 data_sets[0][1]['Derr_nms']      list of column names for each error term df
 data_sets[0][1]['Dins_nms']      list of column names for each instrument df
 data_sets[0][1]['Dlng_nms']      list of column names for each long df
 data_sets[i][0]['err_df']        ith error term data array in list form
 data_sets[i][0]['prim_df']       ith primary regression varibles data array in list form
 data_sets[i][0]['inst_df]        ith instruments data array in list form    
    '''
    # Extracting all input variables from inpt_d dictionary
    r_seed = inpt_d['r_seed']
    nds = inpt_d['nds']
    ntp = inpt_d['ntp']
    ncs = inpt_d['ncs']
    n_end = inpt_d['n_end']
    n_exo = inpt_d['n_exo']
    t_inst = inpt_d['t_inst']
    c_inst = inpt_d['c_inst']
    frc = inpt_d['frc']
    sec_pan = inpt_d['sec_pan']
    ex_vpro = inpt_d['ex_vpro']
    inst_vpro = inpt_d['inst_vpro']
    err_vpro = inpt_d['err_vpro']
    np.random.seed([r_seed])

    ## Coefficients on instruments in secondary equation
    if sec_pan == 0:
        # Not panel so coefficients have seperate coeff vector of length c_inst
        icoeffs_reg = list(iter.product([-1,1],repeat = c_inst))
        # All permutation of ncs pairs of n_end coeff vectors on t_inst instruments  
        picfs = list(iter.permutations(range(0,len(icoeffs_reg)),n_end))
        # Rand choosing 1 (if panel) or ncs (if !panel) coeff for ex regress in secondary reg.
        icfs = [ picfs[i] for i in np.random.randint(len(picfs),size = ncs)]
    else: 
        # Is panel so coefficients have common vector of length t_inst
        icoeffs_reg = list(iter.product([-1,1],repeat = t_inst))
        # All permutation of ncs pairs of n_end coeff vectors on t_inst instruments  
        picfs = list(iter.permutations(range(0,len(icoeffs_reg)),n_end))
        # Rand choosing 1 (if panel) coeff for ex regress in secondary reg.
        icfs = [ picfs[i] for i in np.random.randint(len(picfs),size =1)]*ncs

    ## Assignment of the relvant instruments to each cross section.
    # Registry of instrument assignments
    insts_reg = list(iter.combinations(np.arange(1,t_inst+1),c_inst))
    # List of which instruments (col #'s) are relevant for each crossection 
    icr = [insts_reg[np.random.randint(len(insts_reg))] for i in range(ncs)]

    ## Coefficients on exogenous variables in secondary equations       
    # Collection of all Coefficients on Exogenous Variables in secondary eqns
    excoeffs_reg = list(iter.product([-1,1],repeat = n_exo))
    # All permutation of combinations of ncs coeff vectors on  for ex regressors 
    pxcfs = list(iter.permutations(range(0,len(excoeffs_reg)),n_end))
    # Rand choosing 1 (if panel) or ncs (if !panel) coeff for ex regress in secondary reg.
    if sec_pan == 0:
        # Not panel so rand choosing ncs collections of n_exo reg numbers from pxcfs
        xcfs = [ pxcfs[i] for i in np.random.randint(len(pxcfs),size = ncs)]
    else:
        # Is panel so need only 1 collection of n_exo reg numbers from pxcfs & duplicating
        xcfs = [ pxcfs[i] for i in np.random.randint(len(pxcfs),size = 1)]*ncs


    ## Generation of the coefficient matrix for secondary regression.             
    # Initializing Coefficient Matrix            
    coeff = np.zeros((n_end,ncs,t_inst+n_exo))
    for j in range(n_end):
        for i in range(ncs):
            for k in range(n_exo):
                coeff[j,i,k] = excoeffs_reg[xcfs[i][j]][k]
            for k in range(1,t_inst+1):
                if k in icr[i]:
                    coeff[j,i,k+n_exo-1] = icoeffs_reg[icfs[i][j]][k-1] 
                else: 
                    coeff[j,i,k+n_exo-1] = 0            

    ## Joint Distribution of Exogenous regressors
    # Vector of Means (=0)
    mu_ex = np.zeros(n_exo)
    # Diagonal matrix of variaces (=1)
    var_ex = np.eye(n_exo)
    # Variance Covariance Matrix Generation or EACH crossection
    for i in np.arange(len(ex_vpro)):
        var_ex = (var_ex + ex_vpro[i]*np.eye(n_exo,k=i+1)
                             + ex_vpro[i]*np.eye(n_exo,k=-(i+1))) 


    ## Joint Distribution of Instruments for all cross sections
    # Vector of Means (=0)
    mu_inst = np.zeros(t_inst)
    # Diagonal Matrix of Variaces (=1)
    var_inst = np.eye(t_inst)
    # Variance Covariance Matrix Generation
    for i in np.arange(len(inst_vpro)):
        var_inst = (var_inst + inst_vpro[i]*np.eye(t_inst,k=i+1) 
                             + inst_vpro[i]*np.eye(t_inst,k=-(i+1)))

    ## Joint Distribution of Error Terms for EACH crossection
    # Vector of means
    mu_err = np.zeros(n_end+1)
    # Diagonal Matrix of Variances 
    var_err = np.eye(n_end+1)
    # Variance Covariance Matrix Generation
    if frc == 0 : 
        # Var Cov matrix for correlated errors ==> additive linear control functions 
        for i in np.arange(len(err_vpro)):
            var_err = (var_err + err_vpro[i]*np.eye(n_end+1,k=i+1) 
                                 + err_vpro[i]*np.eye(n_end+1,k=-(i+1)))
    else: 
        # Error not explicitly correlated ==> have to force control functions.
        var_err = np.eye(n_end+1)

    # Error term mean vector for ALL Crossections
    Mu_err = np.tile(mu_err,ncs)        
    # Error term variance covariance matrix for ALL Crossections
    V_err = blkdiag(var_err,ncs)

    # Exogenous regressor mean vector for ALL Crossections
    Mu_ex = np.tile(mu_ex,ncs) 
    # Exogenous regressor variance covariance matrix for ALL Crossections
    V_ex = blkdiag(var_ex,ncs)

    ## Variable Name Generation
    # exogenous variable name generation 
    #          [Z21,1 , Z21,2 ......, Z22,1 , Z22,2 , ..... ]        
    ex_nms = [''.join(['Z2',str(i),',',str(j)]) 
              for i in list(range(1,ncs+1)) 
              for j in list(range(1,n_exo+1))]
    # instruments names generation 
    #          [W1 , W2 , .... ]
    inst_nms = [''.join(['W',str(i)]) for i in list(range(1,t_inst+1))]
    # Error terms names generation
    #          [V1,1 , V1,2 , .... ,e1 , V2,1 , V2,2 , ...... e2 , ......]
    err_nm1 = ['e' if val == n_end+1 else 'V' for val in  list(range(1,n_end+2))*ncs]
    err_nm2 = [ str(i) for y in range(1,n_end+2) for i in iter.repeat(y,n_end+1)]
    err_nm3 = ['' if val == n_end+1 else ''.join([',',str(val)]) 
               for val in list(range(1,n_end+2))*ncs]
    err_nm  = [''.join([err_nm1[i],err_nm2[i],err_nm3[i]]) for i in range(len(err_nm1))]

    # Initializing the data sets list
    data_sets = []

    for k in range(nds):
        ## Variable Generation
        # Exogenous Regressor Generation
        Ex = np.random.multivariate_normal(Mu_ex,V_ex,ntp)
        # Instruments Generation
        Inst = np.random.multivariate_normal(mu_inst,var_inst,ntp)
        # Error Terms Generation 
        Err = np.random.multivariate_normal(Mu_err,V_err,ntp)

        ## Data Frame Generation 
        Ex_df = pd.DataFrame(Ex,columns = ex_nms)
        Inst_df = pd.DataFrame(Inst, columns = inst_nms)
        Err_df = pd.DataFrame(Err,columns = err_nm)

        ## Generating Endogenous (primary) regressors
        for j in range(n_end):
            for i in range(ncs):
                # Regular expression for the relevant exogenous regressors
                ex_pat = ''.join(['^Z2',str(i+1)])
                # Regular expression for the relevant error term. 
                err_pat = ''.join(['V',str(i+1),',',str(j+1)])
                # Extracting exog regresors converting to numpy array
                pe1 = pd.concat([Ex_df.filter(regex = ex_pat),Inst_df], axis = 1).values
                # Extracting error variable and converting to numpy array
                pe2 = Err_df.filter(regex = err_pat).values
                # Calculating the endogenous primary regressor
                pe = pe1.dot(coeff[j,i,:]).reshape(pe1.shape[0],1) + pe2
                # Constructing the appropriate name for the endo regressor
                end_nm = ''.join(['Z1',str(i+1),',',str(j+1)])
                if j == 0 and i == 0:
                    # Initializing the endog df with first calculated regressor
                    End_df = pd.DataFrame(pe,columns = [end_nm])
                else:
                    # Adding calculated endog regressor onto df
                    End_df[end_nm] = pe

        ## Generation primary regression coefficient vector
        # Common Primary Coeff Vector
        p_commoncf = np.array(([1,-1]*10)[:n_end+n_exo]).reshape(n_end+n_exo,1)
        # Fixed Effect for each crossection
        fe = [ 1+x/2 for x in np.arange(0,ncs)]

        ## Generation of primary regressand
        for i in range(ncs):
            # Regular expression for the relevant endogenous regressors
            en_pat = ''.join(['^Z1',str(i+1)])
            # Regular expression for the relevant exogenous regressors
            ex_pat = ''.join(['^Z2',str(i+1)])
            # Name of apporpriate primary error term
            er_nm = ''.join(['e',str(i+1)])
            # Extracting appropriate regressor for primary equation
            pr3 = pd.concat([End_df.filter(regex = en_pat),
                             Ex_df.filter(regex = ex_pat)], axis = 1).values
            # Extracting appropriate error term
            pr4 = Err_df[er_nm].values.reshape(Err_df.shape[0],1)
            # Generating primary regressand
            if frc == 0:
                # if no forcing no need to include control functions explicity
                pr = fe[i] + pr3.dot(p_commoncf)+pr4
            else:
                # need to include control functions explicitly
                # (inprogress)
                pass
            # Constructing the appropriate name for the endo regressor
            pr_nm = ''.join(['Y',str(i+1)])
            if i == 0:
                # Initializing the regressand df 
                Pr_df = pd.DataFrame(pr,columns = [pr_nm])   
            else:
                # Adding generated regressand to df
                Pr_df[pr_nm] = pr

        ## Converting Data To Long Panel Type
        for i in range(ncs):
            # Initializing temporary df
            pL = None
            # Columns Names for endogeneous regressors
            Z1_nm = [ ''.join(['Z1',',',str(j)]) for j in range(1,n_end+1)]
            # Columns Names for exogenous regressors
            Z2_nm = [ ''.join(['Z2',',',str(j)]) for j in range(1,n_end+1)]
            # Adding regressand columns to pL
            pL = pd.DataFrame(Pr_df[''.join(['Y',str(i+1)])].values,columns = ['Y'])
            # Adding endog regressors to pL
            pL = pd.concat([pL,pd.DataFrame(End_df.filter(
                            regex = ''.join(['^Z1',str(i+1)])).values
                            ,columns = Z1_nm)],axis = 1)
            # Adding exog regressors to pL
            pL = pd.concat([pL,pd.DataFrame(
                            Ex_df.filter(regex = ''.join(['^Z2',str(i+1)])).values
                            ,columns = Z2_nm)],axis = 1)
            # Adding the crossection variable
            pL['crs'] = i+1
            # Adding the time component variable
            pL['t'] = pd.DataFrame(np.arange(1,ntp+1).reshape(ntp,1))
            if i == 0 :
                # Initializing Data_long
                Data_long = pL
            else:
                # Adding pL to the bottom of Data_long
                Data_long = pd.concat([Data_long,pL], axis = 0)

        # Sorting Data_long by column name      
        Data_long = Data_long[list(Data_long.columns)[-2:] + list(Data_long.columns)[:-2]]

        # Extract names in Df_s only once
        if k == 0:
            # Names for export data sets
            Dlng_nms = list(Data_long.columns)
            Dins_nms = list(Inst_df.columns)
            Derr_nms = list(Err_df.columns)
            nms_cfs = {}
            nms_cfs['Dlng_nms'] = Dlng_nms
            nms_cfs['Dins_nms'] = Dins_nms
            nms_cfs['Derr_nms'] = Derr_nms
            nms_cfs['coeff'] = coeff.tolist()
            nms_cfs['var_inst'] = var_inst.tolist()
            nms_cfs['V_ex'] = V_ex.tolist()
            nms_cfs['V_err'] = V_err.tolist() 
            data_sets.append([inpt_d , nms_cfs])

        # Adding constructed data sets to data_sets list
        c = {}
        c['err_df'] = np.array(Err_df).tolist()
        c['prim_df'] = np.array(Data_long).tolist()
        c['inst_df'] = np.array(Inst_df).tolist()
        data_sets.append([c])
        
    return data_sets



CPU times: user 15 µs, sys: 0 ns, total: 15 µs
Wall time: 20 µs


<h3> DGP Inputs </h3>

In [82]:
%%time
# Setting a Seed
r_seed = 100
# Number of Time Periods
ntp = 100
# Number of Cross-Sections
ncs = 3
# Number of Endogenous Variables in Primary Equation
n_end = 2
# Number of Endogenous Variables in Primary Equation
n_exo = 2
# Total Number of Instruments
t_inst = 10
# Number of Instruments per Crossectiona
c_inst = 3
# Indicator for whether to force additive non linear cntrl function. 1 = yes 
frc = 0
# Indicator whether the secondary regression data is panel or not. 
sec_pan = 1
# number of data sets
nds = 100 
# Vector of exog off diagonal covariances i.e. cov(Z2t_l,Z2t_(l+j)) = ex_vpro[j-1]
ex_vpro = [0.5]
# Vector of inst off diagonal covariances i.e. cov(Wt_l,Wt_(l+j)) = inst_vpro[j-1]
inst_vpro = [0.5 , 0.25]
# Vector of error off diagonal covariances i.e. cov(Wt_l,Wt_(l+j)) = inst_vpro[j-1]
err_vpro = [0.8 , 0.36 ]
# Construction of the input dictionary
inpt_test = {'r_seed':r_seed,'nds': nds, 'ntp':ntp , 'ncs': ncs , 'n_end':n_end,
             'n_exo': n_exo, 't_inst':t_inst , 'c_inst': c_inst,
             'frc':frc, 'sec_pan': sec_pan, 'ex_vpro': ex_vpro,
             'inst_vpro': inst_vpro, 'err_vpro':err_vpro }
# Function call
psc_data = psc_dgp(inpt_test)

CPU times: user 3.87 s, sys: 94.2 ms, total: 3.96 s
Wall time: 4.17 s


<h3> DGP JSON encoding and saving </h3>

In [70]:
output_filename = 'pscdata_5_17_1.json'
with open(output_filename, 'w') as f_obj:
    json.dump(psc_data, f_obj)

<h3> Data set meta data dictionary </h3>

In [71]:
psc_data[0][0]

{'c_inst': 3,
 'err_vpro': [0.8, 0.36],
 'ex_vpro': [0.5],
 'frc': 0,
 'inst_vpro': [0.5, 0.25],
 'n_end': 2,
 'n_exo': 2,
 'ncs': 3,
 'nds': 100,
 'ntp': 100,
 'r_seed': 100,
 'sec_pan': 1,
 't_inst': 10}

<h3> Error term variance covariance matrix </h3>

In [75]:
np.array(psc_data[0][1]['V_err'])

array([[ 1.  ,  0.8 ,  0.36,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ],
       [ 0.8 ,  1.  ,  0.8 ,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ],
       [ 0.36,  0.8 ,  1.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ],
       [ 0.  ,  0.  ,  0.  ,  1.  ,  0.8 ,  0.36,  0.  ,  0.  ,  0.  ],
       [ 0.  ,  0.  ,  0.  ,  0.8 ,  1.  ,  0.8 ,  0.  ,  0.  ,  0.  ],
       [ 0.  ,  0.  ,  0.  ,  0.36,  0.8 ,  1.  ,  0.  ,  0.  ,  0.  ],
       [ 0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  1.  ,  0.8 ,  0.36],
       [ 0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.8 ,  1.  ,  0.8 ],
       [ 0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.36,  0.8 ,  1.  ]])

<h3> Instruments variance covariance matrix </h3>

In [80]:
np.array(psc_data[0][1]['var_inst'])

array([[ 1.  ,  0.5 ,  0.25,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ],
       [ 0.5 ,  1.  ,  0.5 ,  0.25,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ],
       [ 0.25,  0.5 ,  1.  ,  0.5 ,  0.25,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ],
       [ 0.  ,  0.25,  0.5 ,  1.  ,  0.5 ,  0.25,  0.  ,  0.  ,  0.  ,  0.  ],
       [ 0.  ,  0.  ,  0.25,  0.5 ,  1.  ,  0.5 ,  0.25,  0.  ,  0.  ,  0.  ],
       [ 0.  ,  0.  ,  0.  ,  0.25,  0.5 ,  1.  ,  0.5 ,  0.25,  0.  ,  0.  ],
       [ 0.  ,  0.  ,  0.  ,  0.  ,  0.25,  0.5 ,  1.  ,  0.5 ,  0.25,  0.  ],
       [ 0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.25,  0.5 ,  1.  ,  0.5 ,
         0.25],
       [ 0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.25,  0.5 ,  1.  ,  0.5 ],
       [ 0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.25,  0.5 ,  1.  ]])

<h3> Exogenous regressors variance covariance </h3>

In [77]:
np.array(psc_data[0][1]['V_ex'])

array([[ 1. ,  0.5,  0. ,  0. ,  0. ,  0. ],
       [ 0.5,  1. ,  0. ,  0. ,  0. ,  0. ],
       [ 0. ,  0. ,  1. ,  0.5,  0. ,  0. ],
       [ 0. ,  0. ,  0.5,  1. ,  0. ,  0. ],
       [ 0. ,  0. ,  0. ,  0. ,  1. ,  0.5],
       [ 0. ,  0. ,  0. ,  0. ,  0.5,  1. ]])

<h3> Coefficient vector for regression of $Z_{1j}$ on $[Z_{2j} , W ]$ </h3>

In [35]:
c1 = [''.join(['a11,',str(i)]) for i in range(1,n_exo+1)]
for i in range(1,t_inst+1):
    c1.append(''.join(['a12,',str(i)]))

pd.DataFrame(np.array(psc_data[0][1]['coeff'][0]),columns = c1)

Unnamed: 0,"a11,1","a11,2","a12,1","a12,2","a12,3","a12,4","a12,5","a12,6","a12,7","a12,8","a12,9","a12,10"
0,-1.0,-1.0,0.0,-1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0
1,-1.0,-1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0
2,-1.0,-1.0,0.0,0.0,0.0,0.0,0.0,-1.0,0.0,1.0,-1.0,0.0


<h3> Coefficient vector for regression of $Z_{2j}$ on $[Z_{2j} , W ]$ </h3>

In [37]:
c2 = [''.join(['a21,',str(i)]) for i in range(1,n_exo+1)]
for i in range(1,t_inst+1):
    c2.append(''.join(['a22,',str(i)]))

pd.DataFrame(np.array(psc_data[0][1]['coeff'][1]),columns = c2)

Unnamed: 0,"a21,1","a21,2","a22,1","a22,2","a22,3","a22,4","a22,5","a22,6","a22,7","a22,8","a22,9","a22,10"
0,-1.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,-1.0,0.0,0.0
1,-1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0
2,-1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,-1.0,-1.0,0.0


<h3> Long panel of primary regression variables </h3>

In [18]:
#Reconstructed error term df
pd.DataFrame(np.array(psc_data[1][0]['prim_df']), columns = test_ds[0][1]['Dlng_nms'])

Unnamed: 0,crs,t,Y,"Z1,1","Z1,2","Z2,1","Z2,2"
0,1.0,1.0,5.144284,-1.758586,-3.548433,1.427814,-0.761867
1,1.0,2.0,3.086426,-0.110062,-1.490349,-0.730024,-1.425261
2,1.0,3.0,6.249838,0.137885,-4.125386,0.150004,-2.172780
3,1.0,4.0,2.848146,1.086099,-2.612141,-1.105009,-0.626228
4,1.0,5.0,-2.366212,-0.256083,3.528184,0.174369,0.210931
5,1.0,6.0,-1.273242,-0.045520,4.392996,-0.747565,-0.376139
6,1.0,7.0,1.957284,0.565151,0.785354,-0.319287,-0.462957
7,1.0,8.0,-3.187221,-2.190069,0.352942,0.892933,1.757172
8,1.0,9.0,-1.539407,-2.897991,0.269763,1.556317,1.474453
9,1.0,10.0,-3.889355,-1.406344,0.693342,-0.799856,0.211932


<h3> Instruments data frame </h3>

In [67]:
#Reconstructed instrument df 
pd.DataFrame(np.array(psc_data[1][0]['inst_df']), columns = psc_data[0][1]['Dins_nms'])

Unnamed: 0,W1,W2,W3,W4,W5,W6,W7,W8,W9,W10
0,0.272181,0.270168,0.579911,-1.948801,-1.010046,-0.366367,-0.814283,0.642567,-1.289345,0.384794
1,0.109600,0.702940,0.095724,-0.927411,-0.609211,-1.804080,-0.590295,0.041874,1.409375,0.961940
2,1.971291,0.946372,-0.275964,0.156784,-0.344993,0.218743,0.437485,0.780898,-0.032221,-0.156474
3,0.316509,-0.212618,0.201315,0.267991,0.120956,0.193019,1.747461,0.985616,0.100748,-0.011749
4,-0.736308,0.236463,1.104826,1.591240,0.611725,-0.145081,-1.692722,-1.682642,-1.303430,0.899336
5,-0.656441,1.580822,1.247956,1.485829,-0.629308,-0.526520,0.558086,-0.485831,0.390152,0.547973
6,-0.264349,0.255498,0.961210,-0.007102,-0.315309,0.807189,0.343323,-0.042235,1.045294,0.990015
7,-0.224601,-1.380548,-0.595376,0.016685,0.517338,1.112730,0.655615,-1.026728,0.414387,-0.234669
8,-1.304212,-1.829790,-1.002594,-0.861247,0.070109,-0.519253,-1.149247,-2.034131,0.269250,1.192879
9,0.105855,0.900984,1.877764,1.249162,0.536998,1.198282,-0.536542,-0.107344,-0.408782,0.323295


<h3> Error term data frame </h3>

In [17]:
#Reconstructed error term df
pd.DataFrame(np.array(psc_data[1][0]['err_df']), columns = test_ds[0][1]['Derr_nms'])

Unnamed: 0,"V1,1","V1,2",e1,"V2,1","V2,2",e2,"V3,1","V3,2",e3
0,-0.454992,0.023692,0.164756,-0.278605,0.052930,0.812293,-0.068311,-0.065040,-0.591919
1,-0.995069,-0.846967,0.010902,2.180212,2.146608,1.764463,-0.286440,-0.044739,0.195620
2,-1.374425,-1.623082,-1.336217,-1.031773,-0.168045,1.252521,0.379672,0.477493,0.641718
3,-1.964329,-2.013645,-1.371311,-0.553386,-1.396935,-2.132793,-0.798070,-0.721129,-0.285098
4,1.436597,0.960791,0.454618,0.750139,0.765863,0.644991,-1.313612,-0.929120,-0.231429
5,1.526737,2.584224,2.536700,0.165280,1.377728,1.936921,1.101445,0.973993,0.483350
6,0.395949,0.946600,1.033817,-0.304216,-0.540009,0.036058,-0.637555,-0.167954,1.047788
7,-0.411122,-0.674814,-0.779971,0.296721,1.006580,2.130766,0.825289,0.873211,0.539137
8,0.267009,0.077177,0.546483,0.787462,0.721170,0.146775,0.120676,0.232241,0.445547
9,-1.522938,-1.863771,-1.777882,0.691886,1.112515,1.200164,-1.062392,-1.070893,-0.582829


In [26]:
input_filename = 'pscdata_5_17_1.json'
with open(input_filename) as f_obj: 
    data_in = json.load(f_obj)