In [1]:
## Preamble: Package Loading
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import gridspec
from mpl_toolkits.mplot3d import Axes3D
import pandas as pd
import itertools as iter
import math

<h4> Block Diagonal Matrix Function </h4>

In [2]:
def blkdiag(mat,nb):
    """
INPUTS
mat     Square Matrix which will form the block in a block diagonal matrix
nb      Number of diagonal block in output matrix 

OUTPUT
v       Block diagonal matrix of dimension ( nb*mat.shape[0] x nb*mat.shape[0] )
    """
    # Initializing the varcov matrix for all crosssections
    v = np.hstack((mat,np.zeros((mat.shape[0],(nb-1)*(mat.shape[1])))))
    # Registry matrix used in following loops
    vreg = np.eye(nb-1)
    for j in np.arange(nb-1):
        # Initializing current block of rows 
        pv = np.zeros((mat.shape[0],mat.shape[0]))
        # Horiz Stacking either zeros or var_err depending on ve_reg[j,i]
        for i in np.arange(nb-1):
            if vreg[j,i] == 1: # Stack var_err onto pv_err
                pv = np.hstack((pv,mat))
            if vreg[j,i] == 0: # Stack zeros onto pv_err
                pv = np.hstack((pv,np.zeros((mat.shape[0],mat.shape[0]))))
        # Vertically stacking block rows on top of one another
        v = np.vstack((v,pv))
    return v

#### Block Diagonal Matrix Function Demonstration

In [3]:
blkdiag(np.ones((3,3)),4)

array([[ 1.,  1.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 1.,  1.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 1.,  1.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  1.,  1.,  1.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  1.,  1.,  1.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  1.,  1.,  1.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  1.,  1.,  1.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  1.,  1.,  1.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  1.,  1.,  1.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  1.,  1.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  1.,  1.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  1.,  1.]])

<h3> DGP Inputs </h3>

In [4]:
# Setting a Seed
np.random.seed([10])

# Number of Time Periods
ntp = 10
# Number of Cross-Sections
ncs = 3
# Number of Endogenous Variables in Primary Equation
n_end = 2
# Number of Endogenous Variables in Primary Equation
n_exo = 2
# Total Number of Instruments
t_inst = 10
# Number of Instruments per Crossection
c_inst = 3
# Indicator for whether to force additive non linear cntrl function. 1 = yes 
frc = 0

# Vector of exog off diagonal covariances i.e. cov(Z2t_l,Z2t_(l+j)) = ex_vpro[j-1]
ex_vpro = [0.5]
# Vector of inst off diagonal covariances i.e. cov(Wt_l,Wt_(l+j)) = inst_vpro[j-1]
inst_vpro = [0.5 , 0.25 ]
# Vector of error off diagonal covariances i.e. cov(Wt_l,Wt_(l+j)) = inst_vpro[j-1]
err_vpro = [0.8 , 0.36 ]

<h2> DGP Generation Code </h2>

In [206]:
%%time
# Collection of all Coefficients on Instruments 
icoeffs_reg = list(iter.product([-1,1],repeat = c_inst))
# Collection of all Coefficients on Exogenous Variables
excoeffs_reg = list(iter.product([-1,1],repeat = n_exo))
# Registry of instrument assignments
insts_reg = list(iter.combinations(np.arange(1,t_inst+1),c_inst))

# List of which instruments (col #'s) are used for each crossection 
icr = [insts_reg[np.random.randint(len(insts_reg))] for i in range(ncs)]

# All permutation of combinations of n_end icoefficient vectors  
picfs = list(iter.permutations(range(0,len(icoeffs_reg)),n_end))
icfs = [ picfs[i] for i in np.random.randint(len(picfs),size = ncs)]

# All permutation of combinations of n_end icoefficient vectors  
pxcfs = list(iter.permutations(range(0,len(excoeffs_reg)),n_end))
xcfs = [ pxcfs[i] for i in np.random.randint(len(pxcfs),size = ncs)]
            
# Initializing Coefficient Matrix            
coeff = np.zeros((n_end,ncs,t_inst+n_exo))
for j in range(n_end):
    for i in range(ncs):
        for k in range(n_exo):
            coeff[j,i,k] = excoeffs_reg[xcfs[i][j]][k]
        
        for k in range(c_inst): 
            coeff[j,i,icr[i][k]+n_exo-1] = icoeffs_reg[icfs[i][j]][k]            
            
## Joint Distribution of Exogenous regressors
# Vector of Means (=0)
mu_ex = np.zeros(n_exo)
# Diagonal matrix of variaces (=1)
var_ex = np.eye(n_exo)
# Variance Covariance Matrix Generation or EACH crossection
for i in np.arange(len(ex_vpro)):
    var_ex = (var_ex + ex_vpro[i]*np.eye(n_exo,k=i+1)
                         + ex_vpro[i]*np.eye(n_exo,k=-(i+1))) 
# Exogenous regressor mean vector for ALL Crossections
Mu_ex = np.tile(mu_ex,ncs) 
# Exogenous regressor variance covariance matrix for ALL Crossections
V_ex = blkdiag(var_ex,ncs)
    
## Joint Distribution of Instruments for all cross sections
# Vector of Means (=0)
mu_inst = np.zeros(t_inst)
# Diagonal Matrix of Variaces (=1)
var_inst = np.eye(t_inst)
# Variance Covariance Matrix Generation
for i in np.arange(len(inst_vpro)):
    var_inst = (var_inst + inst_vpro[i]*np.eye(t_inst,k=i+1) 
                         + inst_vpro[i]*np.eye(t_inst,k=-(i+1)))
    
## Joint Distribution of Error Terms for EACH crossection
# Vector of means
mu_err = np.zeros(n_end+1)
# Diagonal Matrix of Variances 
var_err = np.eye(n_end+1)
# Variance Covariance Matrix Generation
if frc == 0 : 
    # Var Cov matrix for correlated errors ==> additive linear control functions 
    for i in np.arange(len(err_vpro)):
        var_err = (var_err + err_vpro[i]*np.eye(n_end+1,k=i+1) 
                             + err_vpro[i]*np.eye(n_end+1,k=-(i+1)))
# Error term mean vector for ALL Crossections
Mu_err = np.tile(mu_err,ncs)        
# Error term variance covariance matrix for ALL Crossections
V_err = blkdiag(var_err,ncs)

## Variable Generation
# Exogenous Regressor Generation
Ex = np.random.multivariate_normal(Mu_ex,V_ex,ntp)
# Instruments Generation
Inst = np.random.multivariate_normal(mu_inst,var_inst,ntp)
# Error Terms Generation 
Err = np.random.multivariate_normal(Mu_err,V_err,ntp)

## Variable Name Generation
ex_nms = [''.join(['Z2',str(i),',',str(j)]) for i in list(range(1,ncs+1)) for j in list(range(1,n_exo+1))]

# Constructing names for instruments
inst_nms = [''.join(['W',str(i)]) for i in list(range(1,t_inst+1))]

# Constructing names for error terms
err_nm1 = ['e' if val == n_end+1 else 'V' for val in  list(range(1,n_end+2))*ncs]
err_nm2 = [ str(i) for y in range(1,n_end+2) for i in iter.repeat(y,n_end+1)]
err_nm3 = ['' if val == n_end+1 else ''.join([',',str(val)]) for val in list(range(1,n_end+2))*ncs]
err_nm  = [''.join([err_nm1[i],err_nm2[i],err_nm3[i]]) for i in range(len(err_nm1))]

## Optional Data Frame Generation 
Ex_df = pd.DataFrame(Ex,columns = ex_nms)
Inst_df = pd.DataFrame(Inst, columns = inst_nms)
Err_df = pd.DataFrame(Err,columns = err_nm)

## Generating Endogenous (primary) regressors
for j in range(n_end):
    for i in range(ncs):
        # Regular expression for the relevant exogenous regressors
        ex_pat = ''.join(['^Z2',str(i+1)])
        # Regular expression for the relevant error term. 
        err_pat = ''.join(['V',str(i+1),',',str(j+1)])
        # Extracting exog regresors converting to numpy array
        pe1 = pd.concat([Ex_df.filter(regex = ex_pat),Inst_df], axis = 1).values
        # Extracting error variable and converting to numpy array
        pe2 = Err_df.filter(regex = err_pat).values
        # Calculating the endogenous primary regressor
        pe = pe1.dot(coeff[j,i,:]).reshape(pe1.shape[0],1) + pe2
        # Constructing the appropriate name for the endo regressor
        end_nm = ''.join(['Z1',str(i+1),',',str(j+1)])
        if j == 0 and i == 0:
            # Initializing the endog df with first calculated regressor
            End_df = pd.DataFrame(pe,columns = [end_nm])
        else:
            # Adding calculated endog regressor onto df
            End_df[end_nm] = pe

# Common Primary Coeff Vector
p_commoncf = np.array(([1,-1]*10)[:n_end+n_exo]).reshape(n_end+n_exo,1)
# Fixed Effect for each crossection
fe = [ 1+x/2 for x in np.arange(0,ncs)]

## Generation of primary regressand
for i in range(ncs):
    # Regular expression for the relevant endogenous regressors
    en_pat = ''.join(['^Z1',str(i+1)])
    # Regular expression for the relevant exogenous regressors
    ex_pat = ''.join(['^Z2',str(i+1)])
    # Name of apporpriate primary error term
    er_nm = ''.join(['e',str(i+1)])
    # Extracting appropriate regressor for primary equation
    pr3 = pd.concat([End_df.filter(regex = en_pat),Ex_df.filter(regex = ex_pat)], axis = 1).values
    # Extracting appropriate error term
    pr4 = Err_df[er_nm].values.reshape(Err_df.shape[0],1)
    # Generating primary regressand
    pr = fe[i] + pr3.dot(p_commoncf)+pr4
    # Constructing the appropriate name for the endo regressor
    pr_nm = ''.join(['Y',str(i+1)])
    if i == 0:
        # Initializing the regressand df 
        Pr_df = pd.DataFrame(pr,columns = [pr_nm])
    else:
        # Adding generated regressand to df
        Pr_df[pr_nm] = pr

# Constructing full data frame
Full_df = pd.concat([Pr_df,End_df],axis = 1) 
Full_df = pd.concat([Full_df,Ex_df],axis = 1)
Full_df = pd.concat([Full_df,Inst_df],axis = 1)

## Converting Data To Long Panel Type
for i in range(ncs):
    # Initializing temporary df
    pL = None
    # Columns Names for endogeneous regressors
    Z1_nm = [ ''.join(['Z1',',',str(j)]) for j in range(1,n_end+1)]
    # Columns Names for exogenous regressors
    Z2_nm = [ ''.join(['Z2',',',str(j)]) for j in range(1,n_end+1)]
    # Adding regressand columns to pL
    pL = pd.DataFrame(Full_df[''.join(['Y',str(i+1)])].values,columns = ['Y'])
    # Adding endog regressors to pL
    pL = pd.concat([pL,pd.DataFrame(Full_df.filter(regex = ''.join(['^Z1',str(i+1)])).values
                                    ,columns = Z1_nm)],axis = 1)
    # Adding exog regressors to pL
    pL = pd.concat([pL,pd.DataFrame(Full_df.filter(regex = ''.join(['^Z2',str(i+1)])).values
                                    ,columns = Z2_nm)],axis = 1)
    # Adding the crossection variable
    pL['crs'] = i+1
    # Adding the time component variable
    pL['t'] = pd.DataFrame(np.arange(1,ntp+1).reshape(ntp,1))
    if i == 0 :
        # Initializing Data_long
        Data_long = pL
    else:
        # Adding pL to the bottom of Data_long
        Data_long = pd.concat([Data_long,pL], axis = 0)

# Sorting Data_long by column name      
Data_long = Data_long[list(Data_long.columns)[-2:] + list(Data_long.columns)[:-2]]


CPU times: user 27.3 ms, sys: 44 µs, total: 27.3 ms
Wall time: 27.4 ms


In [205]:
Data_long

Unnamed: 0,crs,t,Y,"Z1,1","Z1,2","Z2,1","Z2,2"
0,1,1,0.411392,1.49902,1.467108,0.28974,0.097556
1,1,2,2.211925,2.19285,1.993913,0.843663,0.031877
2,1,3,5.070628,0.087205,-3.000579,-0.618488,-1.190888
3,1,4,-3.29819,-1.193794,1.625526,-0.436003,0.06696
4,1,5,1.66701,2.245576,0.984216,-0.828782,0.700467
5,1,6,5.436114,4.152523,0.296751,0.672411,0.883835
6,1,7,0.917096,2.571581,4.099342,2.137784,1.069043
7,1,8,3.563742,-0.055058,-2.67218,0.207906,-0.483857
8,1,9,-1.65915,-1.347085,0.544211,-1.347863,0.226386
9,1,10,3.396231,3.708844,0.8668,0.69351,0.83767
