In [49]:
## Preamble: Package Loading
import numpy as np
import ipywidgets as ipw
from IPython.display import display,display_html
import matplotlib.pyplot as plt
from matplotlib import gridspec
import pandas as pd
import json
import kernel as kr
import psc_dbl_sumdisp as psd 

In [52]:
%%html
<style>
#  .cell.selected~.unselected { display: none; }
 .cell.code_cell.unselected .input { display: none; }
</style>

# Selection of Heterogenous Instruments in Partially Linear Fixed Effects Panel Regression
##  Monte Carlo Results
### By: Eric Penner

<h2> Summary </h2>

The following notebook contains results of a Monte Carlo Exercise conducted on the estimator detailed in 'psc.ipynb' and 'psc_proposal.pdf' with a data sets generated by 'psc_dgp.ipnyb' (see this notebook for details of the DGP). 

Important features of each of the following trials are presented here

* In all data sets the endogneous variables $Z_1$ have been generated by secondary equations which are panel fixed effects type, corresponding to section 3.3 and 3.4 of 'psc_dgp.ipynb'. 


* All estimates have been generated with the knowledge that the secondary equations are panel type (i.e. the estimation of the secondary equations is properly specified). 


* The number of datasets used from each component of each trial is 'nds = 500'

In [47]:
# Trial 1.1: 0
inpt_filenames=[['pscout_9_4_1347.json' ,'pscout_9_4_1505.json' ,'pscout_9_4_1913.json' ,'pscout_9_4_1816.json']]
line_nms=[['Oracle','Known','Unknown','Lasso']]

#Trial 1.2: 1
inpt_filenames.append(['pscout_9_4_1232.json' ,'pscout_9_4_1837.json','pscout_9_4_1969.json' ,'pscout_9_4_1655.json' ])
line_nms.append(['Oracle','Known','Unknown','Lasso'])

#Trial 1.3: 2
inpt_filenames.append(['pscout_9_4_1636.json','pscout_9_4_1191.json','pscout_9_4_1510.json'])
line_nms.append(['Oracle','Known','Lasso'])

#Trial 1.4: 3
inpt_filenames.append(['pscout_9_4_1816.json','pscout_9_4_1655.json','pscout_9_4_1510.json','pscout_9_5_1624.json'])
line_nms.append(['t_inst=15','t_inst=30','t_inst=45','t_inst=100'])

#Trial 2.0: 4
inpt_filenames.append(['pscout_9_5_1624.json','pscout_9_6_1331.json','pscout_9_6_1606.json'])
line_nms.append(['ncs = 10','ncs = 25','ncs = 40'])

#Trial 3.0: 5
inpt_filenames.append(['pscout_9_5_1624.json','pscout_9_6_1537.json','pscout_9_6_1725.json'])
line_nms.append(['ntp = 50','ntp = 100','ntp = 150'])

In [48]:
res_out = [[psd.psc_load(inpt_filenames[k][i]) for i in range(len(inpt_filenames[k]))] 
                                               for k in range(len(line_nms))]

estin_dcts = [[res_out[k][i][0] for i in range(len(inpt_filenames[k]))]
                                for k in range(len(line_nms))]

dgp_sum_filenames = [[estin_dcts[k][i]['input_filename'].replace('pscdata','pscsum')
                                for i in range(len(inpt_filenames[k]))]
                                for k in range(len(line_nms))]
    
dgp_dicts = [[psd.pscsum_load(dgp_sum_filenames[k][i]) 
                                for i in range(len(dgp_sum_filenames[k]))]
                                for k in range(len(line_nms))]

dgpin_dcts =  [[dgp_dicts[k][i][0] for i in range(len(inpt_filenames[k]))]
                                   for k in range(len(line_nms))]

merged_dcts = [[{**estin_dcts[k][i],**dgpin_dcts[k][i]} 
                                   for i in range(len(inpt_filenames[k]))]
                                   for k in range(len(line_nms))]

true_bcoeffs = [[dgp_dicts[k][i][1] for i in range(len(inpt_filenames[k]))]
                                    for k in range(len(line_nms))]

true_acoeffs = [[dgp_dicts[k][i][2] for i in range(len(inpt_filenames[k]))]
                                    for k in range(len(line_nms))]

bcoeff  = [[res_out[k][i][1] for i in range(len(inpt_filenames[k]))]
                             for k in range(len(line_nms))]

acoeff  = [[res_out[k][i][3] for i in range(len(inpt_filenames[k]))]
                             for k in range(len(line_nms))]

btables = [[res_out[k][i][2] for i in range(len(inpt_filenames[k]))]
                             for k in range(len(line_nms))]

atables = [[res_out[k][i][4] for i in range(len(inpt_filenames[k]))]
                             for k in range(len(line_nms))]

<a id='index'><a>

    
<h2> Index </h2>
<ul>
    <li> <a href='#trial_1'> Trial Set 1:  Estimator comparison varying the total number of instruments</a> <br>
        <br>
    <ul> 
        <li> <a href='#trial_11'> Trial Set 1.1:  Estimator Comparison when $t_{inst} = 15$ </a> <br>
        <br>
        <li> <a href='#trial_12'> Trial Set 1.2:  Estimator Comparison when $t_{inst} = 30$ </a> <br>
        <br>
        <li> <a href='#trial_13'> Trial Set 1.3:  Estimator Comparison when $t_{inst} = 45$ </a> <br>
        <br>
        <li> <a href='#trial_14'> Trial Set 1.4:   Lasso Comparison where $t_{inst} \in \{15,30,45,100\}$ </a> <br>
        <br>
    </ul> 
     <li> <a href='#trial_2'> Trial Set 2.0: Properties of Lasso Estimator, Increasing Number of Cross Sections </a> <br>
       <br>
     <li> <a href='#trial_3'> Trial Set 3.0: Properties of Lasso Estimator, Increasing Number of Time Periods </a> <br>
       <br>
<!--    <ul> 
    <li> <a href='#trial_21'> Trial Set 2.1: Varying the number of Cross Sections, Known Subset </a> <br>
    <br>
    <li> <a href='#trial_22'> Trial Set 2.2: Varying the number of Cross Sections, Lasso </a> <br>
    <br>
    </ul>
    <li> <a href='#trial_3'> Trial Set 3:  Known Subset vs. Unknown Subset vs. Lasso with $t_{inst} = 5$</a> <br>
              <br>
    <li> <a href='#trial_4'> Trial Set 4: Known Subset vs. Unknown Subset vs. Lasso with $t_{inst} = 10$</a> <br>
        <br>
    <li> <a href='#trial_5'> Trial Set 5: Known Subset vs. Unknown Subset vs. Lasso with $t_{inst} = 20$</a> <br>
        <br>
    <li> <a href='#trial_6'> Trial Set 6: Two Instruments per Cross Section: Unknown Subset vs. Lasso </a> <br><br>
    <ul>
        <li> <a href='#trial_61'>  Trial Set 6.1:  Unknown Subset vs. Lasso, $ncs = 15,\;\; t_{inst} = 30$<br>
            <br>
        <li> <a href='#trial_62'>  Trial Set 6.2: Unknown Subset vs. Lasso, $ncs = 25,\;\; t_{inst} = 50$<br> 
            <br>
        <li> <a href='#trial_63'>   Trial Set 6.3: Unknown Subset vs. Lasso, $ncs = 35,\;\; t_{inst} = 70$<br> <br>
        <li> <a href='#trial_64'>   Trial Set 6.4: Lasso Comparison <br> <br>
    </ul> 
    <li> <a href='#trial_7'> Trial Set 7: Five Instruments per Cross Section: Unknown Subset vs. Lasso </a> <br><br>
    <ul>
        <li> <a href='#trial_71'>  Trial Set 7.1:  Unknown Subset vs. Lasso, $ncs = 10,\;\; t_{inst} = 50$<br>
            <br>
        <li> <a href='#trial_72'>  Trial Set 7.2: Unknown Subset vs. Lasso, $ncs = 20,\;\; t_{inst} = 100$<br> 
            <br>
        <li> <a href='#trial_73'>   Trial Set 7.3: Unknown Subset vs. Lasso, $ncs = 30,\;\; t_{inst} = 150$<br><br>
        <li> <a href='#trial_74'>   Trial Set 7.4: Lasso Comparison <br> <br>
    </ul>  -->
</ul>


<h2> Monte Carlo Data Generating Process </h2>
<h3> Equivalences and Covariances</h3>

*Let* 
<ul> 
     <li> $n_{tp} \equiv T$ be the total number of time periods  </li> <br>
     <li> $n_{end} \equiv p_1$ be the number of endogneous regressors </li> <br>
     <li>  $n_{exo} \equiv p_2$ be the number of exogenous regressors  </li> <br>
     <li>  $n_{tinst} \equiv w$ be the total number of available instruments </li> <br>
     <li> $ n_{cinst} \equiv w_j$  the number of instruments relevant to each crossection respectively </li> <br>
</ul>
    
    
and 
    
$$
\begin{align*} 
\rho_{er} &= \begin{bmatrix} \rho_{er,1} & \rho_{er,2} & \cdots & \rho_{er,n_{end}} \end{bmatrix} \\[10pt]
\rho_{inst} &= \begin{bmatrix} \rho_{inst,1} & \rho_{inst,2} & \cdots & \rho_{inst,n_{inst}-1} \end{bmatrix}\\[10pt]
\rho_{ex} &= \begin{bmatrix} \rho_{ex,1} & \rho_{ex,2} & \cdots & \rho_{ex,n_{ex}-1} \end{bmatrix}  
\end{align*}
$$


be vectors of covariances.

*****

<h2> Monte Carlo Data Generating Process </h2>
<h3> Error Covariance Matrix </h3>


For each cross section

$$
\begin{align*}
V_{er} &= \begin{bmatrix} 
1 & \rho_{er,1} & \rho_{er,2} & \cdots & \rho_{er,n_{end}} \\[10pt]
\rho_{er,1} & 1  & \rho_{er,1} &\cdots & \rho_{er,n_{end}-1} \\[10pt]
\rho_{er,2} & \rho_{er,1} & 1 & \cdots & \rho_{er,n_{end}-2} \\[10pt]
\vdots & &&\ddots&  \\[10pt]
 \rho_{er,n_{end}} & \rho_{er,n_{end}-1} & \rho_{er,n_{end}-2} & \cdots &  1 
\end{bmatrix}
\end{align*}
$$

For all cross sections

$$
\begin{align*}
CV_{er} &= 
\begin{bmatrix}
V_{er} & \mathbf{0}_{(n_{end}+1 \times n_{end}+1)} & \cdots & \mathbf{0}_{(n_{end}+1 \times n_{end}+1)}  \\[10pt]
\mathbf{0}_{(n_{end}+1 \times n_{end}+1)} & V_{er} & \cdots & \mathbf{0}_{(n_{end}+1 \times n_{end}+1)}  \\[10pt]
\vdots & \vdots & \ddots & \vdots \\[10pt]
\mathbf{0}_{(n_{end}+1 \times n_{end}+1)} & \mathbf{0}_{(n_{end}+1 \times n_{end}+1)} & \cdots & V_{er}
\end{bmatrix} 
\end{align*}
$$

******

<h2> Monte Carlo Data Generating Process </h2>
<h3> Exogenous Variable Covariance Matrix </h3>

For each cross section

$$ 
\begin{align*}
V_{ex} &= \begin{bmatrix} 
1 & \rho_{ex,1} & \rho_{ex,2} & \cdots & \rho_{ex,n_{ex}-1} \\[10pt]
\rho_{ex,1} & 1  & \rho_{ex,1} &\cdots & \rho_{ex,n_{ex}-2} \\[10pt]
\rho_{ex,2} & \rho_{ex,1} & 1 & \cdots & \rho_{ex,n_{ex}-3} \\[10pt]
\vdots & &&\ddots&  \\[10pt]
 \rho_{ex,n_{ex}-1} & \rho_{ex,n_{ex}-2} & \rho_{ex,n_{ex}-2} & \cdots &  1 
\end{bmatrix}
\end{align*} 
$$

For all cross sections

$$
\begin{align*}
CV_{ex}  = 
\begin{bmatrix}
V_{ex} & \mathbf{0}_{(n_{ex} \times n_{ex})} & \cdots & \mathbf{0}_{(n_{ex} \times n_{ex})}  \\[10pt]
\mathbf{0}_{(n_{ex} \times n_{ex})} & V_{ex} & \cdots & \mathbf{0}_{(n_{ex} \times n_{ex})}  \\[10pt]
\vdots & \vdots & \ddots & \vdots \\[10pt]
\mathbf{0}_{(n_{ex} \times n_{ex})} & \mathbf{0}_{(n_{ex} \times n_{ex})} & \cdots & V_{ex}
\end{bmatrix} 
\end{align*}
$$
******

<h2> Monte Carlo Data Generating Process </h2>
<h3> Common Instrument Covariance Matrix </h3>


$$
V_{inst} = \begin{bmatrix} 
1 & \rho_{inst,1} & \rho_{inst,2} & \cdots & \rho_{inst,n_{tinst}-1} \\[10pt]
\rho_{inst,1} & 1  & \rho_{inst,1} &\cdots & \rho_{inst,n_{tinst}-2} \\[10pt]
\rho_{inst,2} & \rho_{tinst,1} & 1 & \cdots & \rho_{inst,n_{tinst}-3} \\[10pt]
\vdots & &&\ddots&  \\[10pt]
 \rho_{inst,n_{tinst}-1} & \rho_{inst,n_{tinst}-2} & \rho_{inst,n_{tinst}-3} & \cdots &  1 
\end{bmatrix}
%
$$
<h3> Exogenous Variable Generation </h3>

Let 
$$ 
\begin{align*} 
Z_{2jt} &= \begin{bmatrix} Z_{2jt,1} & Z_{2jt,2} & \cdots & Z_{2jt,n_{ex}} \end{bmatrix}' \\[10pt]  
W_t &= \begin{bmatrix} W_{t,1} & W_{t,2} & \cdots & W_{t,n_{inst}} \end{bmatrix}' \\[10pt]
\tilde{V}_{jt} &= \begin{bmatrix} V_{jt,1} & V_{jt,2}& \cdots & V_{jt,n_{end}} & \varepsilon_{j} \end{bmatrix}' 
\end{align*} 
$$

Then consider, $ W_{t} \sim N(\mathbf{0}_{n_{inst} \times 1}, CV_{inst})$

$$
\begin{bmatrix} Z_{21t}' & Z_{22t}' & \cdots & Z_{2n_{cs}t}' \end{bmatrix}' \sim N(\mathbf{0}_{n_{cs} \cdot n_{exo} \times 1}, CV_{ex})
\hspace{1cm} \text{ and } \hspace{1cm} 
\begin{bmatrix} \tilde{V}_{1t}' & \tilde{V}_{2t}' & \cdots & \tilde{V}_{n_{cs},t}' \end{bmatrix}' \sim N(\mathbf{0}_{n_{cs} \cdot (n_{end} +1) \times 1}, CV_{er})
$$

*****

<h2> Monte Carlo Data Generating Process </h2>
<h3> Endogenous Variable Generation </h3>

<ul>
    <li> Randomly Draw $\alpha_{1d} \in [1,-1]\times [1,-1] $ for each $d \in \{1,2,\cdots,n_{end} \}$          </li> <br>
    <li> Randomly Draw $\alpha_{2d} \in [1,-1]^{n_{exo}+n_{tinst}} $ for each $d \in \{1,2,\cdots,n_{end} \}$    </li><br>
    <li> Randomly draw a set of integers from $\mathcal{C}^{n_{tinst}}_{n_{cinst}}$ ways that that you can choose $n_{cinst}$ instruments from $n_{tinst}$ total instrument, for each  $j\in \{1,2,\cdots , n_{cs}\}$  </li><br>
    <li> Map that set of integers to a binary vector $m_j$ indicating the integers drawn above.  </li><br>
    <li> Let $M_j = \text{diag}(m_j)$, and generate the following </li><br>
    
    
    $$ Z_{1jd} =  \alpha_{0jd} + Z_{2jt}' \alpha_{1d} + W_{t}'M_j \alpha_{2d} + V_{jt,d} \hspace{1cm} \text{ where } \hspace{1cm} \alpha_{0jd} = 1/2+j/2 $$ 
</ul>

<h3> Primary Regressand Variable Generation </h3>

<ul> 
    <li>  Draw the coefficienct vector $\beta_1 \in [1,-1]^{n_{exo} +n_{end}}$, and generate the following   </li><br>
$$ Y_{jt} = [\; Z_{1jt}' \;\; Z_{2jt}' \;] \beta_1 + e_j + \varepsilon_{jt} \;\;\;\; \text{ where } \;\;\;\;  e_{j} = 1+j/2  $$
</ul>
*****

<h3> Variable Description Table </h3>

A number of variables are used below, here are their descriptions. Refer back to 'psc.ipynb' or 'psc_dgp.ipynb' for more details.

Variable Name  |  Description  
--|--
k_H| Kernel number used for H function Estimation  
c_H |  Plug in bandwidth constant for H function Estimation
k_mvd  | Kernel number used for multivariate d>2 density estimation
c_mvd|  Plug in bandwidth constant for multivariate d>2 density estimation
k_uvd  |  Kernel number used for bivariate density  estimation 
c_uvd |  Plug in bandwidth used for bivariate density estimation
dep_nm|  Variable name of the dependent variable
en_nm |  Variable names of each endogenous variabble
ex_nm |  Variable names of each exogenous variable
in_nm |  Variable names of instruments relevant to each cross section
err_vpro|  Vector of covariances used to construct the error cov matrix
ex_vpro|  Vector of covariances used to construct the exog variable cov matrix
inst_vpro | Vector of covariances used to construct the instrument cov matrix
frc |  Indicator for whether the functional form of control function is forced
input_filename|  Filename of dataset used to generate the results. 
kwnsub  | Indicator for ifthe subset of instrument relevant to each crs is known
n_end  |  Number of endogenous variables 
n_exo|  Number of exogenous variables
ncs  |  Number cross sections
nds  |  Number of dgp data sets
ntp |  Number of time periods
orcl |  Indicator for whether residuals $V$ are observed (=1) or not
r_seed|  Random number generator seed used to generate the data set
sec_pan|  Indicator for whether the secondary eqn data is panel or not
c_inst  |  Number of instrument relevant to each cross section   
t_inst|  Total number of instruments
inc | List of instrument relevant to at least one cross section
tin  |  Variable name of the time period index
cin  |  Variable name of the cross section index 
lasso | Indicator for lasso estimation
alph | lasso penalty value
epsil | Threshold for averaging "non zero" coefficients

<a id='trial_1'></a>

<h2> Trial Set 1: Estimator comparison by varying the total number of instruments </h2>

<a id='sl1'></a>
<a id='trial_11'></a>

<h3> Trial Set 1.1: Estimator Comparison when $t_{inst} = 15$ </h3> 

Here we examine the sampling distribution of $\hat{\beta}_1$, and $ \hat{\alpha}_{1} $.

* Number of Cross Sections: 10


* Number of Endogenous Regressors: 1


* Number of Exogenous Regressors: 1


* Total Number of Instruments: 15


* Number of Instrument Relevant to Each Cross Section: 3


<a href='#index'>Index </a>,<a href='#sl3'> Next </a>,<a href='#sl1'> Back </a>
<a id='sl2'><a>

<h3> Trial Set 1.1: Merged DGP and Estimator Function Input Dictionary Comparison </h3> 

Here I have merged together the dictionaries used to generate both the underlying dataset and the results (you will see the file name for this data set below) and the dictionary used to produce the estimates based on that data below. 

In [41]:
psd.indict_dsp(merged_dcts[0],1)

<a href='#index'>Index </a>,<a href='#sl4'> Next </a>,<a href='#sl2'> Back </a>
<a id='sl3'><a>

<h3> Trial Set 1.1: True Secondary Equation Coefficients Comparison </h3> 

Here I interactively display the coefficent vectors $\alpha_{1jd}$ used to generate the data set (by row indicating cross section and equation) corresponding to the position its file name appears in 'input_filenames0' above. Here they should also be identical across data sets. 

**Note:** 

1.) That since in the above 'sec_pan = 1' the secondary equations are panel type so all non zero coefficients in a columns should be identical. 

2.) A zero coefficient in the following matrix means that the instrument it multiplies is not relevant to that cross section. 

3.) In accordance with the description above they should be identical across results data sets.

4.) The density of the secondary regression coefficient matrix is **25%**


In [55]:
psd.indict_dsp(true_acoeffs[0],2)

<a href='#index'>Index </a>,<a href='#sl5'> Next </a>,<a href='#sl3'> Back </a>
<a id='sl4'><a>

<h3> Trial Set 1.1: Secondary Function Coefficient Estimates </h3>

Here I interactively show the sampling distribution of the elements of $\hat{\alpha}_{dj}$.  

In [57]:
display(psd.cfs_dsp(acoeff[0],atables[0],2,5,line_nms[0]))

<h3> Trial Set 1.1: Comments on Secondary Function Coefficient Estimates </h3>
    
<ul> 
       <li> Here you can see that, as we would expect, the known subset estimator generates unbiased estimators, the unknown subset estimator generates badly biased estimates, and the lasso estimator generates estimates whose bias roughly splits that difference between the previous two.
       <br>   
       <li> Note that the properties of the lasso estimator are what you would expect from a consistent estimator, where the variance of the sampling distribution of each coefficient in inversely proportional to the number of crossections that the instrument it multiplies is relevant to. <br>
       <br>
       <li> This presentation is really only a frame of reference as the sampling distribution of the estimates isn't of first order importance in this estimator. 
</ul>


<h3> Trial Set 1.1: True Primary Equations Coefficients Comparison </h3>

Here I interactively display the coefficent vector $\beta_1$ used to generate the data set corresponding to the position its file name appears in 'input_filenames0' above. Here they should be identical. 

In [58]:
psd.indict_dsp(true_bcoeffs[0],1)

<a id='sl5'></a>
<h3> Trial Set 1.1: Primary Function Coefficient Estimates </h3>

Here I show the sampling distribution of the elements of $\hat{\beta}_1$.  

In [71]:
display(psd.cfs_dsp(bcoeff[0],btables[0],1,5,line_nms[0],1))

<a href='#index'>Index </a>,<a href='#sl6'> Next </a>,<a href='#sl4'> Back </a>

<a id='sl6'></a>
<h2> Trial Set 1.2: Estimator Comparison when $t_{inst} = 30$ </h2> 

Here we examine the sampling distribution of $\hat{\beta}_1, \hat{\alpha}_{1}$


<ul> 
    <li> Number of Cross Sections: 10 <br> <br>
    <li> Number of Time Periods: 50 <br> <br>
    <li> Number of Endogenous Regressors: 1    <br><br>
    <li> Number of Exogenous Regressors: 1    <br><br>
    <li> Total Number of Instruments: 30    <br><br>
    <li> Number of Instrument Relevant to Each Cross Section: 3    <br><br>
<ul>




<a href='#index'>Index </a>,<a href='#sl7'> Next </a>,<a href='#sl5'> Back </a>

<h3> Trial Set 1.2: Merged DGP and Estimator Function Input Dictionary Comparison </h3> 

Here I have merged together the dictionaries used to generate both the underlying dataset and the results (you will see the file name for this data set below) and the dictionary used to produce the estimates based on that data below. 

In [65]:
psd.indict_dsp(merged_dcts[1],1)

<a id='sl7'></a>
<h3> Trial Set 1.2: True Secondary Equation Coefficients Comparison </h3> 

Here I interactively display the coefficent vectors $\alpha_{1jd}$ used to generate the data set (by row indicating cross section and equation) corresponding to the position its file name appears in 'input_filenames0' above. Here they should also be identical across data sets. 

**Note:** 

1.) That since in the above 'sec_pan = 1' the secondary equations are panel type so all non zero coefficients in a columns should be identical. 

2.) A zero coefficient in the following matrix means that the instrument it multiplies is not relevant to that cross section. 

3.) In accordance with the description above they should be identical across results data sets.


4.) The density of the secondary regression coefficient matrix is **13%**


In [68]:
psd.indict_dsp(true_acoeffs[1],2)

<a href='#index'>Index </a>,<a href='#sl8'> Next </a>,<a href='#sl6'> Back </a>

<h3> Trial Set 1.2: Secondary Function Coefficient Estimates </h3>

Here I interactively show the sampling distribution of the elements of $\hat{\alpha}_{dj}$.  

In [12]:
display(psd.cfs_dsp(acoeff[1],atables[1],2,5,line_nms[1]))

<h3> Trial Set 1.2: Comments on Secondary Function Coefficient Estimates </h3>
    
<ul>
       <li> Same comments roughly as 1.1 
</ul>
    
* Same comments as 

<h3> Trial Set 1.2: True Primary Equations Coefficients Comparison </h3>

Here I interactively display the coefficent vector $\beta_1$ used to generate the data set corresponding to the position its file name appears in 'input_filenames0' above. Here they should be identical. 

In [13]:
psd.indict_dsp(true_bcoeffs[1],1)

<a id='sl8'></a>
<h3> Trial Set 1.2: Primary Function Coefficient Estimates </h3>

Here I show the sampling distribution of the elements of $\hat{\beta}_1$.  

In [73]:
display(psd.cfs_dsp(bcoeff[1],btables[1],1,9,line_nms[1],1))

<a href='#index'>Index </a>,<a href='#sl9'> Next </a>,<a href='#sl7'> Back </a>

<h3> Trial Set 1.2: Comments on Primary Function Coefficient Estimates </h3>

*

*

<a href='#index'> Back to Index </a>
<a id='trial_13'><a>

<a id='sl9'></a>
<h2>Trial Set 1.3: Description </h2> 

Here we examine the sampling distribution of $\hat{\beta}_1, \hat{\alpha}_{1}$.

* Number of Cross Sections: 10


* Number of Endogenous Regressors: 1


* Number of Exogenous Regressors: 1


* Number of Time Periods: 50


* Total Number of Instruments: 45


* Number of Instrument Relevant to Each Cross Section: 3





<a href='#index'>Index </a>,<a href='#sl10'> Next </a>,<a href='#sl8'> Back </a>

<h3> Trial Set 1.3: Merged DGP and Estimator Function Input Dictionary Comparison </h3> 

Here I have merged together the dictionaries used to generate both the underlying dataset and the results (you will see the file name for this data set below) and the dictionary used to produce the estimates based on that data below. 

Below you will see a slider which can be used to summarize this merged dictionary corresponding to the position its file name appears in 'input_filenames0' above. 

In accordance with the trial description, the only differences that should exist are the number of time periods (ntp) and the file name of the data set uded to generate the results. 

In [15]:
psd.indict_dsp(merged_dcts[2],1)

<a id = 'sl10'></a>
<h3> Trial Set 1.3: True Secondary Equation Coefficients Comparison </h3> 

Here I interactively display the coefficent vectors $\alpha_{1jd}$ used to generate the data set (by row indicating cross section and equation) corresponding to the position its file name appears in 'input_filenames0' above. Here they should also be identical across data sets. 

**Note:** 

1.) That since in the above 'sec_pan = 1' the secondary equations are panel type so all non zero coefficients in a columns should be identical. 

2.) A zero coefficient in the following matrix means that the instrument it multiplies is not relevant to that cross section. 

3.) In accordance with the description above they should be identical across results data sets.

4.) The density of the secondary regression coefficient matrix is **8%**


In [86]:
psd.indict_dsp(true_acoeffs[2],2)

<a href='#index'>Index </a>,<a href='#sl11'> Next </a>,<a href='#sl9'> Back </a>

<h3> Trial Set 1.3: Secondary Function Coefficient Estimates </h3>

Here I interactively show the sampling distribution of the elements of $\hat{\alpha}_{dj}$.  

In [17]:
display(psd.cfs_dsp(acoeff[2],atables[2],2,5,line_nms[2]))

<h3> Trial Set 1.3: Comments on Secondary Function Coefficient Estimates </h3>
<ul>
    <li> Due to the shrinkage inherent in the operation of the lasso estimator the bias of the coefficients is substantial and in nearly half the cases growing. However the variances of each are shrinking as the number of time periods grows. <br>
        <br>
</ul>

<h3> Trial Set 1.3: True Primary Equations Coefficients Comparison </h3>

Here I interactively display the coefficent vector $\beta_1$ used to generate the data set corresponding to the position its file name appears in 'input_filenames0' above. Here they should be identical. 

In [18]:
psd.indict_dsp(true_bcoeffs[2],1)

<a id = 'sl11'></a>
<h3> Trial Set 1.3: Primary Function Coefficient Estimates </h3>

Here I show the sampling distribution of the elements of $\hat{\beta}_1$.  

In [19]:
display(psd.cfs_dsp(bcoeff[2],btables[2],1,8.5,line_nms[2],1))

<a href='#index'>Index </a>,<a href='#sl12'> Next </a>,<a href='#sl10'> Back </a>

<h3> Trial Set 1.3: Comments on Primary Function Coefficient Estimates </h3>

<ul>
    <li> The behavior here is the same as the the known subset estimation in trials set 1.1
        <br><br> 
</ul>

<a href='#index'> Back to Index </a>
<a id='trial_14'><a>

<a id = 'sl12'> </a>
<h2>Trial Set 1.4: Lasso Comparison where $t_{inst} \in \{15,30,45,100\}$ </h2> 

Here we examine the sampling distribution of $\hat{\beta}_1, \hat{\alpha}_{1}$.

* Number of Cross Sections: 10


* Number of Endogenous Regressors: 1


* Number of Exogenous Regressors: 1


* Number of time periods: 50


* Number of Instrument Relevant to Each Cross Section: 3



<a href='#index'>Index </a>,<a href='#sl13'> Next </a>,<a href='#sl11'> Back </a>

<h3> Trial Set 1.4: Merged DGP and Estimator Function Input Dictionary Comparison </h3> 

Here I have merged together the dictionaries used to generate both the underlying dataset and the results (you will see the file name for this data set below) and the dictionary used to produce the estimates based on that data below. 

Below you will see a slider which can be used to summarize this merged dictionary corresponding to the position its file name appears in 'input_filenames0' above. 

In accordance with the trial description, the only differences that should exist are the number of time periods (ntp) and the file name of the data set uded to generate the results. 

In [20]:
psd.indict_dsp(merged_dcts[3],1)

<a id = 'sl13'> </a>
<h3> Trial Set 1.4: True Secondary Equation Coefficients Comparison </h3> 

Here I interactively display the coefficent vectors $\alpha_{1jd}$ used to generate the data set (by row indicating cross section and equation) corresponding to the position its file name appears in 'input_filenames0' above. Here they should also be identical across data sets. 

**Note:** 

1.) That since in the above 'sec_pan = 1' the secondary equations are panel type so all non zero coefficients in a columns should be identical. 

2.) A zero coefficient in the following matrix means that the instrument it multiplies is not relevant to that cross section. 

3.) In accordance with the description above they should be identical across results data sets.

4.) The density of the secondary regression coefficient matrices are **26%,13%,8%,3%**


In [88]:
psd.indict_dsp(true_acoeffs[3],2)

<a href='#index'>Index </a>,<a href='#sl14'> Next </a>,<a href='#sl12'> Back </a>

<h3> Trial Set 1.4: Secondary Function Coefficient Estimates </h3>

Here I interactively show the sampling distribution of the elements of $\hat{\alpha}_{dj}$.  

In [22]:
display(psd.cfs_dsp(acoeff[3],atables[3],2,5,line_nms[3]))

<h3> Trial Set 1.4: Comments on Secondary Function Coefficient Estimates </h3>
<ul>
    <li> Due to the shrinkage inherent in the operation of the lasso estimator the bias of the coefficients is substantial and in nearly half the cases growing. However the variances of each are shrinking as the number of time periods grows. <br>
        <br>
</ul>

<h3> Trial Set 1.4: True Primary Equations Coefficients Comparison </h3>

Here I interactively display the coefficent vector $\beta_1$ used to generate the data set corresponding to the position its file name appears in 'input_filenames0' above. Here they should be identical. 

In [23]:
psd.indict_dsp(true_bcoeffs[3],1)

<a id ='sl14'> </a>

<h3> Trial Set 1.4: Primary Function Coefficient Estimates </h3>

Here I show the sampling distribution of the elements of $\hat{\beta}_1$.  

In [24]:
display(psd.cfs_dsp(bcoeff[3],btables[3],1,12,line_nms[3],1))

<a href='#index'>Index </a>,<a href='#sl15'> Next </a>,<a href='#sl13'>Back </a>

<h3> Trial Set 1.4: Comments on Primary Function Coefficient Estimates </h3>

<ul>
    <li> The behavior here is the same as the the known subset estimation in trials set 1.1
        <br><br> 
</ul>

<a href='#index'> Back to Index </a>
<a id='trial_2'><a>

<a id = 'sl15'> </a>

<h2>Trial Set 2.0:  Properties of Lasso Estimator, Increasing Number of Cross Sections  </h2> 

Here we examine the sampling distribution of $\hat{\beta}_1$, and $\hat{\alpha}_{1}$ as the number of cross section increases.

* Number of Cross Sections: 10,25,40


* Number of Time Periods: 50


* Number of Endogenous Regressors: 1


* Number of Exogenous Regressors: 1


* Total Number of Instruments: 100


* Number of Instrument Relevant to Each Cross Section: 3



<a href='#index'>Index </a>,<a href='#sl16'> Next </a>,<a href='#sl14'>Back </a>

<h3> Trial Set 2.0: Merged DGP and Estimator Function Input Dictionary Comparison </h3> 

Here I have merged together the dictionaries used to generate both the underlying dataset and the results (you will see the file name for this data set below) and the dictionary used to produce the estimates based on that data below. 

Below you will see a slider which can be used to summarize this merged dictionary corresponding to the position its file name appears in 'input_filenamesXX' above. 

In accordance with the trial description, the only differences that should exist are the number of cross sections (ncs) and the file name of the data set used to generate the results. 

In [25]:
psd.indict_dsp(merged_dcts[4],1)

<h3> Trial Set 2.0: True Secondary Equation Coefficients Comparison </h3> 

Here I interactively display the coefficent vectors $\alpha_{1jd}$ used to generate the data set (by row indicating cross section and equation) corresponding to the position its file name appears in 'input_filenamesXX' above. Here they should also be identical across data sets. 

**Note:** 

1.) That since in the above 'sec_pan = 1' the secondary equations are panel type so all non zero coefficients in a columns should be identical. 

2.) A zero coefficient in the following matrix means that the instrument it multiplies is not relevant to that cross section. 


In [26]:
psd.indict_dsp(true_acoeffs[4],2)

<h3> Trial Set 2.0: Secondary Function Coefficient Estimates </h3>

Here I interactively show the sampling distribution of the elements of $\hat{\alpha}_{dj}$.  

In [27]:
display(psd.cfs_dsp(acoeff[4],atables[4],2,5,line_nms[4]))

<h3> Trial Set 2.0: Comments on Secondary Function Coefficient Estimates </h3>
<ul>
    <li> Due to the lack of stability in lasso estiamtes its difficult to interpret <br>
        <br>
</ul>

In [40]:
psd.indict_dsp(true_bcoeffs[4],1)

<a id ='sl16'> </a>

<h3> Trial Set 2.0: Primary Function Coefficients </h3>

Here I show the sampling distribution of the elements of $\hat{\beta}_1$.  

In [29]:
display(psd.cfs_dsp(bcoeff[4],btables[4],1,12,line_nms[4],1))

<a href='#index'>Index </a>,<a href='#sl17'> Next </a>,<a href='#sl15'>Back </a>

<h3> Trial Set 2.0: Comments on Primary Function Coefficient Estimates </h3>

<ul>
    <li> YYXYXYXYXY
        <br><br> 
</ul>

<a href='#index'> Back to Index </a>
<a id='trial_3'><a>

<a id = 'sl17'> </a>

<h2>Trial Set 3.0: Properties of Lasso Estimator, Increasing Number of Time Periods </h2> 

Here we examine the sampling distribution of $\hat{\beta}_1, \hat{\alpha}_{1}$.

* Number of Cross Sections: 10


* Number of Time Periods: 50,100,150


* Number of Endogenous Regressors: 1


* Number of Exogenous Regressors: 1


* Total Number of Instruments: 100


* Number of Instrument Relevant to Each Cross Section: 3 



<a href='#index'>Index </a>,<a href='#sl18'> Next </a>,<a href='#sl16'>Back </a>

<h3> Trial Set 3.0: Merged DGP and Estimator Function Input Dictionary Comparison </h3> 

Here I have merged together the dictionaries used to generate both the underlying dataset and the results (you will see the file name for this data set below) and the dictionary used to produce the estimates based on that data below. 

Below you will see a slider which can be used to summarize this merged dictionary corresponding to the position its file name appears in 'input_filenames0' above. 

In accordance with the trial description, the only differences that should exist are the number of time periods (ntp) and the file name of the data set uded to generate the results. 

In [30]:
psd.indict_dsp(merged_dcts[5],1)

<h3> Trial Set 3.0: True Secondary Equation Coefficients Comparison </h3> 

Here I interactively display the coefficent vectors $\alpha_{1jd}$ used to generate the data set (by row indicating cross section and equation) corresponding to the position its file name appears in 'input_filenames0' above. Here they should also be identical across data sets. 

**Note:** 

1.) That since in the above 'sec_pan = 1' the secondary equations are panel type so all non zero coefficients in a columns should be identical. 

2.) A zero coefficient in the following matrix means that the instrument it multiplies is not relevant to that cross section. 



In [31]:
psd.indict_dsp(true_acoeffs[5],2)

<h3> Trial Set 3.0: Secondary Function Coefficient Estimates </h3>

Here I interactively show the sampling distribution of the elements of $\hat{\alpha}_{dj}$.  

In [32]:
display(psd.cfs_dsp(acoeff[5],atables[5],2,7,line_nms[5]))

<h3> Trial Set 3.0: Comments on Secondary Function Coefficient Estimates </h3>
<ul>
    <li> D. <br>
        <br>
</ul>

<h3> Trial Set 3.0: True Primary Equations Coefficients Comparison </h3>

Here I interactively display the coefficent vector $\beta_1$ used to generate the data set corresponding to the position its file name appears in 'input_filenames0' above. Here they should be identical. 

In [33]:
psd.indict_dsp(true_bcoeffs[5],1)

<a id = 'sl18'> </a>


<h3> Trial Set 3.0: Primary Function Coefficient Estimates </h3>

Here I show the sampling distribution of the elements of $\hat{\beta}_1$.  

In [34]:
display(psd.cfs_dsp(bcoeff[5],btables[5],1,8,line_nms[5],1))

<a href='#index'>Index </a>, <a href='#sl17'> Back </a>

<h3> Trial Set 3.0: Comments on Primary Function Coefficient Estimates </h3>