In [4]:
## Preamble: Package Loading
import numpy as np
import ipywidgets as ipw
from IPython.display import display,display_html
import matplotlib.pyplot as plt
from matplotlib import gridspec
import pandas as pd
import json
import kernel as kr
import psc_dbl_sumdisp as psd 

In [5]:
%%html
<style>
# .cell.selected~.unselected { display: none; }
#.cell.code_cell.unselected .input { display: none; }
</style>

<h1> Panel Selection and Control: Monte Carlo Results

<h2> Summary </h2>

The following notebook contains results of a Monte Carlo Exercise conducted on the estimator detailed in 'psc.ipynb' and 'psc_proposal.pdf' with a data sets generated by 'psc_dgp.ipnyb' (see this notebook for details of the DGP). 

Important features of each of the following trials are presented here

* In all data sets the endogneous variables $Z_1$ have been generated by secondary equations which are panel fixed effects type, corresponding to section 3.3 and 3.4 of 'psc_dgp.ipynb'. 


* All estimates have been generated with the knowledge that the secondary equations are panel type (i.e. the estimation of the secondary equations is properly specified). 


* The number of datasets used from each component of each trial is 'nds = 500'

In [15]:
# Trial 1: 0
inpt_filenames=[['pscout_10_19_1848.json','pscout_10_19_1466.json' ,'pscout_10_19_1163.json' ]]
line_nms=[['ntp = 50','ntp = 100','ntp = 150',]]

# #Trial 2: 1
# inpt_filenames.append(['pscout_9_4_1232.json' ,'pscout_9_4_1837.json','pscout_9_4_1969.json' ,'pscout_9_4_1655.json' ])
# line_nms.append(['Oracle','Known','Unknown','Lasso'])

In [16]:
res_out = [[psd.psc_load(inpt_filenames[k][i]) for i in range(len(inpt_filenames[k]))] 
                                               for k in range(len(line_nms))]

estin_dcts = [[res_out[k][i][0] for i in range(len(inpt_filenames[k]))]
                                for k in range(len(line_nms))]

dgp_sum_filenames = [[estin_dcts[k][i]['input_filename'].replace('pscdata','pscsum')
                                for i in range(len(inpt_filenames[k]))]
                                for k in range(len(line_nms))]
    
dgp_dicts = [[psd.pscsum_load(dgp_sum_filenames[k][i]) 
                                for i in range(len(dgp_sum_filenames[k]))]
                                for k in range(len(line_nms))]

dgpin_dcts =  [[dgp_dicts[k][i][0] for i in range(len(inpt_filenames[k]))]
                                   for k in range(len(line_nms))]

merged_dcts = [[{**estin_dcts[k][i],**dgpin_dcts[k][i]} 
                                   for i in range(len(inpt_filenames[k]))]
                                   for k in range(len(line_nms))]

true_bcoeffs = [[dgp_dicts[k][i][1] for i in range(len(inpt_filenames[k]))]
                                    for k in range(len(line_nms))]

true_acoeffs = [[dgp_dicts[k][i][2] for i in range(len(inpt_filenames[k]))]
                                    for k in range(len(line_nms))]

bcoeff  = [[res_out[k][i][1] for i in range(len(inpt_filenames[k]))]
                             for k in range(len(line_nms))]

acoeff  = [[res_out[k][i][3] for i in range(len(inpt_filenames[k]))]
                             for k in range(len(line_nms))]

btables = [[res_out[k][i][2] for i in range(len(inpt_filenames[k]))]
                             for k in range(len(line_nms))]

atables = [[res_out[k][i][4] for i in range(len(inpt_filenames[k]))]
                             for k in range(len(line_nms))]

<a id='index'><a>

<h2> Index </h2>
<ul>
    <li> <a href='#trial_1'> Trial Set 1:  Estimator comparison varying the total number of instruments</a> <br>
        <br>
    <ul> 
<!--         <li> <a href='#trial_11'> Trial Set 1.1:  Estimator Comparison when $t_{inst} = 15$ </a> <br>
        <br>
        <li> <a href='#trial_12'> Trial Set 1.2:  Estimator Comparison when $t_{inst} = 30$ </a> <br>
        <br>
        <li> <a href='#trial_13'> Trial Set 1.3:  Estimator Comparison when $t_{inst} = 45$ </a> <br>
        <br>
        <li> <a href='#trial_14'> Trial Set 1.4:   Lasso Comparison where $t_{inst} \in \{15,30,45,100\}$ </a> <br>
        <br>
    </ul> 
     <li> <a href='#trial_2'> Trial Set 2.0: Properties of Lasso Estimator, Increasing Number of Cross Sections </a> <br>
       <br>
     <li> <a href='#trial_3'> Trial Set 3.0: Properties of Lasso Estimator, Increasing Number of Time Periods </a> <br>
       <br>
   <ul> 
    <li> <a href='#trial_21'> Trial Set 2.1: Varying the number of Cross Sections, Known Subset </a> <br>
    <br>
    <li> <a href='#trial_22'> Trial Set 2.2: Varying the number of Cross Sections, Lasso </a> <br>
    <br>
    </ul>
    <li> <a href='#trial_3'> Trial Set 3:  Known Subset vs. Unknown Subset vs. Lasso with $t_{inst} = 5$</a> <br>
              <br>
    <li> <a href='#trial_4'> Trial Set 4: Known Subset vs. Unknown Subset vs. Lasso with $t_{inst} = 10$</a> <br>
        <br>
    <li> <a href='#trial_5'> Trial Set 5: Known Subset vs. Unknown Subset vs. Lasso with $t_{inst} = 20$</a> <br>
        <br>
    <li> <a href='#trial_6'> Trial Set 6: Two Instruments per Cross Section: Unknown Subset vs. Lasso </a> <br><br>
    <ul>
        <li> <a href='#trial_61'>  Trial Set 6.1:  Unknown Subset vs. Lasso, $ncs = 15,\;\; t_{inst} = 30$<br>
            <br>
        <li> <a href='#trial_62'>  Trial Set 6.2: Unknown Subset vs. Lasso, $ncs = 25,\;\; t_{inst} = 50$<br> 
            <br>
        <li> <a href='#trial_63'>   Trial Set 6.3: Unknown Subset vs. Lasso, $ncs = 35,\;\; t_{inst} = 70$<br> <br>
        <li> <a href='#trial_64'>   Trial Set 6.4: Lasso Comparison <br> <br>
    </ul> 
    <li> <a href='#trial_7'> Trial Set 7: Five Instruments per Cross Section: Unknown Subset vs. Lasso </a> <br><br>
    <ul>
        <li> <a href='#trial_71'>  Trial Set 7.1:  Unknown Subset vs. Lasso, $ncs = 10,\;\; t_{inst} = 50$<br>
            <br>
        <li> <a href='#trial_72'>  Trial Set 7.2: Unknown Subset vs. Lasso, $ncs = 20,\;\; t_{inst} = 100$<br> 
            <br>
        <li> <a href='#trial_73'>   Trial Set 7.3: Unknown Subset vs. Lasso, $ncs = 30,\;\; t_{inst} = 150$<br><br>
        <li> <a href='#trial_74'>   Trial Set 7.4: Lasso Comparison <br> <br>
    </ul>  -->
</ul>

<h3> Variable Description Table </h3>

A number of variables are used below, here are their descriptions. Refer back to 'psc.ipynb' or 'psc_dgp.ipynb' for more details.

Variable Name  |  Description  
--|--
k_H| Kernel number used for H function Estimation  
c_H |  Plug in bandwidth constant for H function Estimation
k_mvd  | Kernel number used for multivariate d>2 density estimation
c_mvd|  Plug in bandwidth constant for multivariate d>2 density estimation
k_uvd  |  Kernel number used for bivariate density  estimation 
c_uvd |  Plug in bandwidth used for bivariate density estimation
dep_nm|  Variable name of the dependent variable
en_nm |  Variable names of each endogenous variabble
ex_nm |  Variable names of each exogenous variable
in_nm |  Variable names of instruments relevant to each cross section
err_vpro|  Vector of covariances used to construct the error cov matrix
ex_vpro|  Vector of covariances used to construct the exog variable cov matrix
inst_vpro | Vector of covariances used to construct the instrument cov matrix
frc |  Indicator for whether the functional form of control function is forced
input_filename|  Filename of dataset used to generate the results. 
kwnsub  | Indicator for ifthe subset of instrument relevant to each crs is known
n_end  |  Number of endogenous variables 
n_exo|  Number of exogenous variables
ncs  |  Number cross sections
nds  |  Number of dgp data sets
ntp |  Number of time periods
orcl |  Indicator for whether residuals $V$ are observed (=1) or not
r_seed|  Random number generator seed used to generate the data set
sec_pan|  Indicator for whether the secondary eqn data is panel or not
c_inst  |  Number of instrument relevant to each cross section   
t_inst|  Total number of instruments
inc | List of instrument relevant to at least one cross section
tin  |  Variable name of the time period index
cin  |  Variable name of the cross section index 
lasso | Indicator for lasso estimation
alph | lasso penalty value
epsil | Threshold for averaging "non zero" coefficients

<a href='#index'> Back to Index </a>
<a id='trial_1'><a>

<h1> Trial Set 1: </h2>

<a href='#index'> Back to Index </a>
<a id='trial_11'><a>

<h2> Trial Set 1: Estimator Comparison when </h2> 

Here we examine the sampling distribution of $\hat{\beta}_1$, and $ \hat{\alpha}_{1} $.

* Number of Cross Sections: 


* Number of Endogenous Regressors:


* Number of Exogenous Regressors:


* Total Number of Instruments: 


* Number of Instrument Relevant to Each Cross Section:


<h3> Trial Set 1: Merged DGP and Estimator Function Input Dictionary Comparison </h3> 


In [17]:
psd.indict_dsp(merged_dcts[0],1)

<h3> Trial Set 1: True Secondary Equation Coefficients Comparison </h3> 

In [18]:
psd.indict_dsp(true_acoeffs[0],2)

<h3> Trial Set 1: Secondary Function Coefficient Estimates </h3>

Here I interactively show the sampling distribution of the elements of $\hat{\alpha}_{dj}$.  

In [19]:
display(psd.cfs_dsp(acoeff[0],atables[0],2,5,line_nms[0]))

<h3> Trial Set 1: Comments on Secondary Function Coefficient Estimates </h3>
<ul> 
    <li> <br> 
</ul> 

<h3> Trial Set 1: True Primary Equations Coefficients Comparison </h3>

Here I interactively display the coefficent vector $\beta_1$ used to generate the data set.

In [20]:
psd.indict_dsp(true_bcoeffs[0],1)

<h3> Trial Set 1: Primary Function Coefficient Estimates </h3>

Here I show the sampling distribution of the elements of $\hat{\beta}_1$.  

In [21]:
display(psd.cfs_dsp(bcoeff[0],btables[0],1,5,line_nms[0],1))

<h3> Trial Set 1: Comments on Primary Function Coefficient Estimates </h3>

<ul> 
    <li> <br> 
</ul>

<a href='#index'> Back to Index </a>
<a id='trial_12'><a>
 *****
 *****

<h2>Trial Set X.X: Description </h2> 

Here we examine the sampling distribution of $\hat{\beta}_1, \hat{\alpha}_{1}$.

* Number of Cross Sections:


* Number of Endogenous Regressors: 


* Number of Exogenous Regressors: 


* Total Number of Instruments:


* Number of Instrument Relevant to Each Cross Section:



<h3> Trial Set X.X: Merged DGP and Estimator Function Input Dictionary Comparison </h3> 
 

In [35]:
psd.indict_dsp(merged_dcts[X],1)

NameError: name 'X' is not defined

<h3> Trial Set X.X: True Secondary Equation Coefficients Comparison </h3> 


In [None]:
psd.indict_dsp(true_acoeffs[X],2)

<h3> Trial Set X.X: Secondary Function Coefficient Estimates </h3>

Here I interactively show the sampling distribution of the elements of $\hat{\alpha}_{dj}$.  

In [None]:
display(psd.cfs_dsp(acoeff[X],atables[X],2,5,line_nms[X]))

<h3> Trial Set X.X: Comments on Secondary Function Coefficient Estimates </h3>
<ul>
    <li>  <br>
</ul>

<h3> Trial Set X.X: True Primary Equations Coefficients Comparison </h3>


In [None]:
psd.indict_dsp(true_bcoeffs[X],1)

<h3> Trial Set X.X: Primary Function Coefficient Estimates </h3>

Here I show the sampling distribution of the elements of $\hat{\beta}_1$.  

In [None]:
display(psd.cfs_dsp(bcoeff[X],btables[X],1,12,line_nms[X]))

<h3> Trial Set X.X: Comments on Primary Function Coefficient Estimates </h3>

<ul>
    <li> The behavior here is the same as the the known subset estimation in trials set 1.1
        <br><br> 
</ul>