In [4]:
## Preamble: Package Loading
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import gridspec
from mpl_toolkits.mplot3d import Axes3D
import pandas as pd
import itertools as iter
import math

<h4> Block Diagonal Matrix Function </h4>

In [5]:
def blkdiag(mat,nb):
    """
INPUTS
mat     Square Matrix which will form the block in a block diagonal matrix
nb      Number of diagonal block in output matrix 

OUTPUT
v       Block diagonal matrix of dimension ( nb*mat.shape[0] x nb*mat.shape[0] )
    """
    # Initializing the varcov matrix for all crosssections
    v = np.hstack((mat,np.zeros((mat.shape[0],(nb-1)*(mat.shape[1])))))
    # Registry matrix used in following loops
    vreg = np.eye(nb-1)
    for j in np.arange(nb-1):
        # Initializing current block of rows 
        pv = np.zeros((mat.shape[0],mat.shape[0]))
        # Horiz Stacking either zeros or var_err depending on ve_reg[j,i]
        for i in np.arange(nb-1):
            if vreg[j,i] == 1: # Stack var_err onto pv_err
                pv = np.hstack((pv,mat))
            if vreg[j,i] == 0: # Stack zeros onto pv_err
                pv = np.hstack((pv,np.zeros((mat.shape[0],mat.shape[0]))))
        # Vertically stacking block rows on top of one another
        v = np.vstack((v,pv))
    return v

#### Block Diagonal Matrix Function Demonstration

In [6]:
blkdiag(np.ones((3,3)),4)

array([[ 1.,  1.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 1.,  1.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 1.,  1.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  1.,  1.,  1.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  1.,  1.,  1.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  1.,  1.,  1.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  1.,  1.,  1.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  1.,  1.,  1.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  1.,  1.,  1.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  1.,  1.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  1.,  1.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  1.,  1.]])

<h3> DGP Inputs </h3>

In [64]:
# Setting a Seed
np.random.seed([10])

# Number of Time Periods
ntp = 10
# Number of Cross-Sections
ncs = 3
# Number of Endogenous Variables in Primary Equation
n_end = 2
# Number of Endogenous Variables in Primary Equation
n_exo = 2
# Total Number of Instruments
t_inst = 10
# Number of Instruments per Crossection
c_inst = 3
# Indicator for whether to force additive non linear cntrl function. 1 = yes 
frc = 0
# Indicator whether the secondary regression data is panel or not. 
sec_pan = 1
# number of data sets
nds = 10 

# Vector of exog off diagonal covariances i.e. cov(Z2t_l,Z2t_(l+j)) = ex_vpro[j-1]
ex_vpro = [0.5]
# Vector of inst off diagonal covariances i.e. cov(Wt_l,Wt_(l+j)) = inst_vpro[j-1]
inst_vpro = [0.5 , 0.25 ]
# Vector of error off diagonal covariances i.e. cov(Wt_l,Wt_(l+j)) = inst_vpro[j-1]
err_vpro = [0.8 , 0.36 ]

<h2> DGP Generation Code </h2>

In [67]:
%%time

## Coefficients on instruments in secondary equation
if sec_pan == 0:
    # Not panel so coefficients have seperate coeff vector of length c_inst
    icoeffs_reg = list(iter.product([-1,1],repeat = c_inst))
    # All permutation of combinations of ncs pairs of n_end coeff vectors on t_inst instruments  
    picfs = list(iter.permutations(range(0,len(icoeffs_reg)),n_end))
    # Rand choosing 1 (if panel) or ncs (if !panel) coeff for ex regress in secondary reg.
    icfs = [ picfs[i] for i in np.random.randint(len(picfs),size = ncs)]
else: 
    # Is panel so coefficients have common vector of length t_inst
    icoeffs_reg = list(iter.product([-1,1],repeat = t_inst))
    # All permutation of combinations of ncs pairs of n_end coeff vectors on t_inst instruments  
    picfs = list(iter.permutations(range(0,len(icoeffs_reg)),n_end))
    # Rand choosing 1 (if panel) coeff for ex regress in secondary reg.
    icfs = [ picfs[i] for i in np.random.randint(len(picfs),size =1)]*ncs

## Assignment of the relvant instruments to each cross section.
# Registry of instrument assignments
insts_reg = list(iter.combinations(np.arange(1,t_inst+1),c_inst))
# List of which instruments (col #'s) are relevant for each crossection 
icr = [insts_reg[np.random.randint(len(insts_reg))] for i in range(ncs)]

## Coefficients on exogenous variables in secondary equations       
# Collection of all Coefficients on Exogenous Variables in secondary eqns
excoeffs_reg = list(iter.product([-1,1],repeat = n_exo))
# All permutation of combinations of ncs coeff vectors on  for ex regressors 
pxcfs = list(iter.permutations(range(0,len(excoeffs_reg)),n_end))
# Rand choosing 1 (if panel) or ncs (if !panel) coeff for ex regress in secondary reg.
if sec_pan == 0:
    # Not panel so rand choosing ncs collections of n_exo reg numbers from pxcfs
    xcfs = [ pxcfs[i] for i in np.random.randint(len(pxcfs),size = ncs)]
else:
    # Is panel so need only 1 coeff collections of n_exo reg numbers from pxcfs & duplicating
    xcfs = [ pxcfs[i] for i in np.random.randint(len(pxcfs),size = 1)]*ncs
      

## Generation of the coefficient matrix for secondary regression.             
# Initializing Coefficient Matrix            
coeff = np.zeros((n_end,ncs,t_inst+n_exo))
for j in range(n_end):
    for i in range(ncs):
        for k in range(n_exo):
            coeff[j,i,k] = excoeffs_reg[xcfs[i][j]][k]
        for k in range(c_inst): 
            coeff[j,i,icr[i][k]+n_exo-1] = icoeffs_reg[icfs[i][j]][k]            
            
## Joint Distribution of Exogenous regressors
# Vector of Means (=0)
mu_ex = np.zeros(n_exo)
# Diagonal matrix of variaces (=1)
var_ex = np.eye(n_exo)
# Variance Covariance Matrix Generation or EACH crossection
for i in np.arange(len(ex_vpro)):
    var_ex = (var_ex + ex_vpro[i]*np.eye(n_exo,k=i+1)
                         + ex_vpro[i]*np.eye(n_exo,k=-(i+1))) 

    
## Joint Distribution of Instruments for all cross sections
# Vector of Means (=0)
mu_inst = np.zeros(t_inst)
# Diagonal Matrix of Variaces (=1)
var_inst = np.eye(t_inst)
# Variance Covariance Matrix Generation
for i in np.arange(len(inst_vpro)):
    var_inst = (var_inst + inst_vpro[i]*np.eye(t_inst,k=i+1) 
                         + inst_vpro[i]*np.eye(t_inst,k=-(i+1)))
    
## Joint Distribution of Error Terms for EACH crossection
# Vector of means
mu_err = np.zeros(n_end+1)
# Diagonal Matrix of Variances 
var_err = np.eye(n_end+1)
# Variance Covariance Matrix Generation
if frc == 0 : 
    # Var Cov matrix for correlated errors ==> additive linear control functions 
    for i in np.arange(len(err_vpro)):
        var_err = (var_err + err_vpro[i]*np.eye(n_end+1,k=i+1) 
                             + err_vpro[i]*np.eye(n_end+1,k=-(i+1)))
else: 
    # Error not explicitly correlated ==> have to force control functions.
    var_err = np.eye(n_end+1)
    
# Error term mean vector for ALL Crossections
Mu_err = np.tile(mu_err,ncs)        
# Error term variance covariance matrix for ALL Crossections
V_err = blkdiag(var_err,ncs)

# Exogenous regressor mean vector for ALL Crossections
Mu_ex = np.tile(mu_ex,ncs) 
# Exogenous regressor variance covariance matrix for ALL Crossections
V_ex = blkdiag(var_ex,ncs)

## Variable Name Generation
# exogenous variable name generation 
#          [Z21,1 , Z21,2 ......, Z22,1 , Z22,2 , ..... ]        
ex_nms = [''.join(['Z2',str(i),',',str(j)]) for i in list(range(1,ncs+1)) for j in list(range(1,n_exo+1))]
# instruments names generation 
#          [W1 , W2 , .... ]
inst_nms = [''.join(['W',str(i)]) for i in list(range(1,t_inst+1))]
# Error terms names generation
#          [V1,1 , V1,2 , .... ,e1 , V2,1 , V2,2 , ...... e2 , ......]
err_nm1 = ['e' if val == n_end+1 else 'V' for val in  list(range(1,n_end+2))*ncs]
err_nm2 = [ str(i) for y in range(1,n_end+2) for i in iter.repeat(y,n_end+1)]
err_nm3 = ['' if val == n_end+1 else ''.join([',',str(val)]) for val in list(range(1,n_end+2))*ncs]
err_nm  = [''.join([err_nm1[i],err_nm2[i],err_nm3[i]]) for i in range(len(err_nm1))]



for k in range(nds):
    ## Variable Generation
    # Exogenous Regressor Generation
    Ex = np.random.multivariate_normal(Mu_ex,V_ex,ntp)
    # Instruments Generation
    Inst = np.random.multivariate_normal(mu_inst,var_inst,ntp)
    # Error Terms Generation 
    Err = np.random.multivariate_normal(Mu_err,V_err,ntp)

    ## Data Frame Generation 
    Ex_df = pd.DataFrame(Ex,columns = ex_nms)
    Inst_df = pd.DataFrame(Inst, columns = inst_nms)
    Err_df = pd.DataFrame(Err,columns = err_nm)

    ## Generating Endogenous (primary) regressors
    for j in range(n_end):
        for i in range(ncs):
            # Regular expression for the relevant exogenous regressors
            ex_pat = ''.join(['^Z2',str(i+1)])
            # Regular expression for the relevant error term. 
            err_pat = ''.join(['V',str(i+1),',',str(j+1)])
            # Extracting exog regresors converting to numpy array
            pe1 = pd.concat([Ex_df.filter(regex = ex_pat),Inst_df], axis = 1).values
            # Extracting error variable and converting to numpy array
            pe2 = Err_df.filter(regex = err_pat).values
            # Calculating the endogenous primary regressor
            pe = pe1.dot(coeff[j,i,:]).reshape(pe1.shape[0],1) + pe2
            # Constructing the appropriate name for the endo regressor
            end_nm = ''.join(['Z1',str(i+1),',',str(j+1)])
            if j == 0 and i == 0:
                # Initializing the endog df with first calculated regressor
                End_df = pd.DataFrame(pe,columns = [end_nm])
            else:
                # Adding calculated endog regressor onto df
                End_df[end_nm] = pe

    ## Generation primary regression coefficient vector
    # Common Primary Coeff Vector
    p_commoncf = np.array(([1,-1]*10)[:n_end+n_exo]).reshape(n_end+n_exo,1)
    # Fixed Effect for each crossection
    fe = [ 1+x/2 for x in np.arange(0,ncs)]

    ## Generation of primary regressand
    for i in range(ncs):
        # Regular expression for the relevant endogenous regressors
        en_pat = ''.join(['^Z1',str(i+1)])
        # Regular expression for the relevant exogenous regressors
        ex_pat = ''.join(['^Z2',str(i+1)])
        # Name of apporpriate primary error term
        er_nm = ''.join(['e',str(i+1)])
        # Extracting appropriate regressor for primary equation
        pr3 = pd.concat([End_df.filter(regex = en_pat),Ex_df.filter(regex = ex_pat)], axis = 1).values
        # Extracting appropriate error term
        pr4 = Err_df[er_nm].values.reshape(Err_df.shape[0],1)
        # Generating primary regressand
        if frc == 0:
            # if no forcing no need to include control functions explicity
            pr = fe[i] + pr3.dot(p_commoncf)+pr4
        else:
            # need to include control functions explicitly
            # (inprogress)
            pass
        # Constructing the appropriate name for the endo regressor
        pr_nm = ''.join(['Y',str(i+1)])
        if i == 0:
            # Initializing the regressand df 
            Pr_df = pd.DataFrame(pr,columns = [pr_nm])
            
        else:
            # Adding generated regressand to df
            Pr_df[pr_nm] = pr

    ## Converting Data To Long Panel Type
    for i in range(ncs):
        # Initializing temporary df
        pL = None
        # Columns Names for endogeneous regressors
        Z1_nm = [ ''.join(['Z1',',',str(j)]) for j in range(1,n_end+1)]
        # Columns Names for exogenous regressors
        Z2_nm = [ ''.join(['Z2',',',str(j)]) for j in range(1,n_end+1)]
        # Adding regressand columns to pL
        pL = pd.DataFrame(Pr_df[''.join(['Y',str(i+1)])].values,columns = ['Y'])
        # Adding endog regressors to pL
        pL = pd.concat([pL,pd.DataFrame(End_df.filter(regex = ''.join(['^Z1',str(i+1)])).values
                                        ,columns = Z1_nm)],axis = 1)
        # Adding exog regressors to pL
        pL = pd.concat([pL,pd.DataFrame(Ex_df.filter(regex = ''.join(['^Z2',str(i+1)])).values
                                        ,columns = Z2_nm)],axis = 1)
        # Adding the crossection variable
        pL['crs'] = i+1
        # Adding the time component variable
        pL['t'] = pd.DataFrame(np.arange(1,ntp+1).reshape(ntp,1))
        if i == 0 :
            # Initializing Data_long
            Data_long = pL
        else:
            # Adding pL to the bottom of Data_long
            Data_long = pd.concat([Data_long,pL], axis = 0)

    # Sorting Data_long by column name      
    Data_long = Data_long[list(Data_long.columns)[-2:] + list(Data_long.columns)[:-2]]
    
    # Extract names in Df_s only once
    if k == 0:
        # Names for export data sets
        Dlng_nms = list(Data_long.columns)
        Dins_nms = list(Inst_df.columns)
        Derr_nms = list(Err_df.columns)
        
    


CPU times: user 352 ms, sys: 30.6 ms, total: 383 ms
Wall time: 383 ms


In [52]:
Data_long

Unnamed: 0,crs,t,Y,"Z1,1","Z1,2","Z2,1","Z2,2"
0,1,1,0.99426,2.322905,0.61048,-0.798539,0.25106
1,1,2,-1.067928,-1.988221,0.255169,0.104994,-0.106327
2,1,3,-1.093921,-2.843405,-0.647992,0.196805,0.177864
3,1,4,3.561068,3.969038,3.17953,1.238475,-0.988961
4,1,5,1.326278,-0.545791,0.423686,1.279394,0.319818
5,1,6,-0.023311,-1.027015,-0.956078,0.079022,1.563763
6,1,7,3.448681,-0.394221,-2.69418,-0.039582,-0.166033
7,1,8,0.243074,-0.637442,0.294954,0.073013,0.770699
8,1,9,4.370519,3.463006,0.325492,-1.439839,-1.414885
9,1,10,-0.357142,-4.724371,-4.304291,1.163125,2.079946


In [68]:
[Dlng_nms,Dins_nms,Derr_nms]

[['crs', 't', 'Y', 'Z1,1', 'Z1,2', 'Z2,1', 'Z2,2'],
 ['W1', 'W2', 'W3', 'W4', 'W5', 'W6', 'W7', 'W8', 'W9', 'W10'],
 ['V1,1', 'V1,2', 'e1', 'V2,1', 'V2,2', 'e2', 'V3,1', 'V3,2', 'e3']]

In [23]:
Dins_nms

['W1', 'W2', 'W3', 'W4', 'W5', 'W6', 'W7', 'W8', 'W9', 'W10']

In [24]:
Derr_nms

['V1,1', 'V1,2', 'e1', 'V2,1', 'V2,2', 'e2', 'V3,1', 'V3,2', 'e3']

In [207]:
Inst_df

Unnamed: 0,W1,W2,W3,W4,W5,W6,W7,W8,W9,W10
0,-0.336831,0.363675,-0.219602,-0.514095,-1.761057,-1.719165,-1.838557,-2.405004,-1.581108,-0.062666
1,-1.190555,1.395981,0.63139,0.556478,0.790438,0.130878,-1.203316,-0.391544,-0.001022,0.744871
2,-1.066021,0.133978,0.664962,-0.320977,0.535553,-0.692756,-0.506288,-0.242951,1.379805,2.544652
3,-0.381072,-0.551566,-2.425389,-1.821401,0.299511,-0.065049,0.499591,-0.078486,-0.813943,-0.352481
4,1.877648,1.163436,-0.683579,0.162539,-0.047819,0.628357,-0.688901,-2.540504,-0.352744,0.071824
5,0.18793,0.565109,2.050468,1.831618,0.098676,-0.106102,0.572099,1.576602,1.779447,1.059104
6,1.118027,1.729392,2.338271,0.89817,0.883344,0.899368,0.359707,-0.993909,0.794074,2.07252
7,1.609428,0.735214,-0.720846,-0.539955,0.95955,-0.278857,-1.152211,-1.272111,-1.72974,0.796578
8,-0.759029,-0.223474,-0.96335,-0.417483,0.062446,-1.638526,-1.502468,-0.583206,-0.294837,0.402358
9,1.287655,0.778355,0.153482,0.039271,-1.747644,-1.277662,-1.368861,-0.760211,0.194257,-0.775516
