In [1]:
import sys
import os
sys.path.append(os.path.join(os.getcwd(), "../src"))

In [8]:
import logging
import jax
import jax.numpy as np
import numpy as onp
import skbio.stats.composition as cmp
import plotly.express as px
import matplotlib.pyplot as plt
import pandas as pd
import pickle
import statsmodels.api as sm
from datetime import datetime

from simulate_data_fct import sim_IV_lognormal_linear
from plot_fct import update_layout, img_path, update_layout_px, colours, plot_ilr_X_vs_Y
from run_methods_all import run_methods_confidence_interval
from plot_fct import plot_mse_results, plot_beta_results

In [3]:
%load_ext autoreload
%autoreload 2

In [4]:
# set up logging file
logging.getLogger().setLevel(logging.INFO)

In [5]:
key = jax.random.PRNGKey(191)

# Linear Setup

## Data Generation

In [15]:
n = 500
p = 30
num_star = 250
instrument_strength = "unknown"
savepath = os.path.join(img_path, "HighDimLogcontrast")

V = cmp._gram_schmidt_basis(p)
num_inst=10
c_X = 2  # dispersion parameter

# instrument strength
alpha0 =np.hstack([np.array([1, 1, 2, 1, 4, 4, 2, 1, 4, 4, 2, 1]), jax.random.choice(key, np.array([1, 2, 2]), (p-8,))])
alphaT = jax.random.choice(key, np.array([0, 0, 0, 10]), (num_inst, p))
# confounder
mu_c = np.hstack([np.array([0.2, 0.3, 0.2, 0.1]), jax.random.uniform(key, (p-4, ), minval=0.01, maxval=0.05)])
mu_c = mu_c / mu_c.sum()  # has to be a compositional vector
ps = np.hstack([np.zeros((int(p/2),)), 0.8*np.ones((p - int(p/2),))])   # prob of zero inflation

# relationship between X and Y
beta0 = 1
betaT = np.hstack([ np.array([-5, -5, -5, -5]), np.array([5, 5, 5, 5]),  
                   np.zeros((p-8))])  # beta is chosen to sum up to one
# confounder influence to Y
c_Y = np.hstack([np.array([20, 20, 0, 0]), np.array([-5, -5, -5, -5, -5, -5, -5, -5]), 
                 np.zeros((p-12))])  # confounder is a composition as well.

In [10]:
confounder, Z_sim, X_sim, Y_sim, X_star, Y_star = sim_IV_lognormal_linear(
            key,
            n=n,
            p=p,
            num_inst=num_inst,
            mu_c=mu_c,
            c_X=c_X,
            alpha0=alpha0,
            alphaT=alphaT,
            c_Y=c_Y,
            beta0=beta0,
            betaT=betaT,
            num_star=500,
ps=ps)
X_sim_ilr = cmp.ilr(X_sim)
X_star_ilr = cmp.ilr(X_star)

## Data Visualization

In [11]:
# Compute F-Statistics 
ZZ_sim = onp.array(sm.add_constant(Z_sim))
alphahat = np.linalg.inv(ZZ_sim.T@ZZ_sim)@ZZ_sim.T@X_sim_ilr
MSM = np.sum(((ZZ_sim@alphahat - np.mean(X_sim_ilr, axis=0))**2), axis=0)/(num_inst)
MSE = np.sum(((ZZ_sim@alphahat - X_sim_ilr)**2), axis=0)/(n-num_inst)
F = MSM / MSE
F

DeviceArray([21.7243    ,  6.557183  ,  5.0914493 ,  2.2304416 ,
              4.7095346 ,  0.967368  ,  2.3969598 ,  5.781007  ,
             14.037651  ,  7.829588  ,  5.8779984 , 14.1642    ,
              3.339064  , 17.386314  ,  1.3577465 ,  1.2088323 ,
              1.5491704 ,  0.45316425,  1.0237327 ,  1.5300292 ,
              1.7684637 ,  1.1605805 ,  1.5496781 ,  0.66334194,
              0.70978135,  2.1112206 ,  0.45974594,  0.6181574 ,
              2.2108054 ], dtype=float32)

In [16]:
fig = plot_ilr_X_vs_Y(X_sim_ilr, Y_sim, Y_star)

fig.update_layout(xaxis3=dict(showline=True, linewidth=2, linecolor="black"))
fig.update_layout(yaxis3=dict(showline=True, linewidth=2, linecolor="black"))
fig.update_layout(xaxis4=dict(showline=True, linewidth=2, linecolor="black"))
fig.update_layout(yaxis4=dict(showline=True, linewidth=2, linecolor="black"))
fig.update_layout(xaxis5=dict(showline=True, linewidth=2, linecolor="black"))
fig.update_layout(yaxis5=dict(showline=True, linewidth=2, linecolor="black"))

fig.write_image(os.path.join(savepath,
                             "Data_vs_trueeffect_" +str(p)+"InstrumentStrength_"+str(instrument_strength)+".pdf" ))


fig.show()

# Diversity Approach

In [17]:
from plot_fct import plot_diversity_methods
from helper_fct import diversity
from run_methods_all import run_diversity_estimation_methods
div_shannon = diversity(X_sim, "shannon")
div_simpson = diversity(X_sim, "simpson")

In [18]:
# Shannon Estimation
x, y, ytrue, xstar, xstar_bound, ystar_ols, ystar_2sls, ystar_kiv, results = run_diversity_estimation_methods(
    Z_sim, div_shannon, Y_sim, Ytrue=Y_star, methods=["OLS", "2SLS", "KIV"])

fig = plot_diversity_methods(x, y, xstar, xstar_bound, ystar_ols, ystar_2sls, ystar_kiv, results, ytrue)
fig.write_image(os.path.join(savepath,
                             "ShannonDiversity_" +str(p)+"InstrumentStrength_"+str(instrument_strength)+".pdf" ))

fig.show()

INFO:absl:Setup matrices...
INFO:absl:Optimize lambda...
INFO:absl:Optimal lambda [3.98270133]...
INFO:absl:Optimize xi...


                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.026
Model:                            OLS   Adj. R-squared:                  0.006
Method:                 Least Squares   F-statistic:                     1.287
Date:                 Di, 01 Jun 2021   Prob (F-statistic):              0.235
Time:                        16:56:48   Log-Likelihood:                -702.98
No. Observations:                 500   AIC:                             1428.
Df Residuals:                     489   BIC:                             1474.
Df Model:                          10                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0265      0.305      0.087      0.9

INFO:absl:Optimal xi [6.91830488]...
INFO:absl:Predict treatment effect...


In [19]:
# Simpson Estimation
x, y, ytrue, xstar, xstar_bound, ystar_ols, ystar_2sls, ystar_kiv, results = run_diversity_estimation_methods(
    Z_sim, div_simpson, Y_sim, Ytrue=Y_star, methods=["OLS", "2SLS", "KIV"])

plot_diversity_methods(x, y, xstar, xstar_bound, ystar_ols, ystar_2sls, ystar_kiv, results, ytrue)
fig.write_image(os.path.join(savepath,
                             "SimpsonDiversity_" +str(p)+"InstrumentStrength_"+str(instrument_strength)+".pdf" ))

fig.show()

INFO:absl:Setup matrices...
INFO:absl:Optimize lambda...
INFO:absl:Optimal lambda [3.91361359]...
INFO:absl:Optimize xi...


                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.026
Model:                            OLS   Adj. R-squared:                  0.006
Method:                 Least Squares   F-statistic:                     1.296
Date:                 Di, 01 Jun 2021   Prob (F-statistic):              0.229
Time:                        16:56:49   Log-Likelihood:                -702.93
No. Observations:                 500   AIC:                             1428.
Df Residuals:                     489   BIC:                             1474.
Df Model:                          10                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         -0.0384      0.305     -0.126      0.9

INFO:absl:Optimal xi [6.65859303]...
INFO:absl:Predict treatment effect...


# Confidence Intervals for higher dimensional methods

In [20]:
from run_methods_all import run_methods_all

In [None]:
today = datetime.now()
num_iteration = 20

path = os.getcwd()
mypath = os.path.join(path, "temp",
                      today.strftime("%Y_%m_%d_%H_%M") + "_Logcontrast_ConfidenceInterval_"+str(instrument_strength)
                      +"_"+ str(num_iteration)+"_Microbiota_" +str(p))

if not os.path.isdir(mypath):
    os.makedirs(mypath)


# create logging file with relevant information
for handler in logging.root.handlers[:]:
    logging.root.removeHandler(handler)

logging.basicConfig(level=logging.INFO,
                    format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s',
                    datefmt='%m-%d %H:%M',
                    filename=os.path.join(os.getcwd(), mypath, "configurations.log"),
                    filemode='w'
                    )

logging.info("n="+str(n)+", p="+str(p)+", instrument strength: "+instrument_strength)
logging.info("Number of runs: "+str(num_iteration))
logging.info("First Stage Specification: ")
logging.info("Number of Instruments: "+str(num_inst))
logging.info("Instrument Strength by F-Test: " +str(F))
logging.info("alpha0="+str(alpha0)+", alphaT="+str(alphaT)+", confounder multiplicator c_X="+str(c_X))
logging.info("Second Stage Specification: ")
logging.info("beta0="+str(beta0)+", betaT="+str(betaT)+", confounder multiplicator c_Y="+str(c_Y))
logging.info("Confounder mean mu_c="+str(mu_c))


df_beta, df_mse, mse_large_confidence= run_methods_confidence_interval(
    key, num_iteration, 
    n, p, num_inst, mu_c, c_X, alpha0, alphaT, c_Y, beta0, betaT, is_lognormal=True, num_star=num_star, 
    logcontrast_threshold=0.3)

**************************************************************************************************
*****************************************We are at 0 of 20***********************************************
**************************************************************************************************



divide by zero encountered in log


invalid value encountered in subtract



---------------------------------------------------------------------------------------------
<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< ONLY Second Stage - ILR Regression >>>>>>>>>>>>>>>>>>>>>>>>>>>
True Beta: [-5. -5. -5. -5.  5.  5.  5.  5.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
Estimated Beta: [ 0.76  2.23  4.71 -2.63 -3.74  2.16 -1.84  3.7   5.66  2.19  0.18  3.52
  6.25 -0.95  2.13 -0.73  4.07  2.38 -1.02  2.41  0.39  5.13 -3.55  1.51
  2.72  3.11  5.23  2.46  7.15]
Estimated Beta: [ 4.98  3.9   1.71 -1.91  5.11  6.86  1.11  5.08 -0.56 -3.04  0.03  1.93
 -1.57 -4.67  2.34 -0.78  2.04 -2.86 -1.36  2.01 -1.46  0.5  -4.37  4.28
 -0.74 -2.03 -2.53 -4.8  -2.17 -7.03]
Error: 4202.0

---------------------------------------------------------------------------------------------
<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< ONLY Second Stage - Log Contrast >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
True Beta: [-5. -5. -5. -5.  5.  5.  5.  5.  0.  0.  0.  0.  0.  0.  0.  0.  0.


covariance of constraints does not have full rank. The number of constraints is 29, but rank is 19


invalid value encountered in sqrt



                          IV2SLS Regression Results                           
Dep. Variable:                      y   R-squared:                     -37.499
Model:                         IV2SLS   Adj. R-squared:                -39.875
Method:                     Two Stage   F-statistic:                 1.374e-16
                        Least Squares   Prob (F-statistic):               1.00
Date:                 Di, 01 Jun 2021                                         
Time:                        18:46:15                                         
No. Observations:                 500                                         
Df Residuals:                     470                                         
Df Model:                          29                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const        -39.8759   3.38e+09  -1.18e-08      1.0


divide by zero encountered in log


invalid value encountered in subtract



---------------------------------------------------------------------------------------------
<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< ONLY Second Stage - ILR Regression >>>>>>>>>>>>>>>>>>>>>>>>>>>
True Beta: [-5. -5. -5. -5.  5.  5.  5.  5.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
Estimated Beta: [ 0.33 -0.18  2.83 -2.12 -3.35  3.19 -1.63  3.47  6.66  2.09  1.58  3.19
  3.44  0.87  3.16  1.67  4.69  2.12 -0.96  1.81  1.3   5.46 -4.19  1.88
  3.08  3.1   2.92  0.65  5.86]
Estimated Beta: [ 3.67  3.21  3.67  0.25  5.07  6.84  0.34  5.04 -0.16 -3.91  0.21  0.55
 -1.26 -1.76  0.65 -1.77 -0.43 -3.63 -1.26  1.79 -1.   -0.56 -4.88  4.74
 -1.28 -2.58 -2.72 -2.65 -0.44 -5.76]
Error: 3745.31

---------------------------------------------------------------------------------------------
<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< ONLY Second Stage - Log Contrast >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
True Beta: [-5. -5. -5. -5.  5.  5.  5.  5.  0.  0.  0.  0.  0.  0.  0.  0.  0


covariance of constraints does not have full rank. The number of constraints is 29, but rank is 19


invalid value encountered in sqrt



                          IV2SLS Regression Results                           
Dep. Variable:                      y   R-squared:                     -10.270
Model:                         IV2SLS   Adj. R-squared:                -10.965
Method:                     Two Stage   F-statistic:                 3.559e-15
                        Least Squares   Prob (F-statistic):               1.00
Date:                 Di, 01 Jun 2021                                         
Time:                        18:48:05                                         
No. Observations:                 500                                         
Df Residuals:                     470                                         
Df Model:                          29                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const        -13.9423        nan        nan        n


divide by zero encountered in log


invalid value encountered in subtract



---------------------------------------------------------------------------------------------
<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< ONLY Second Stage - ILR Regression >>>>>>>>>>>>>>>>>>>>>>>>>>>
True Beta: [-5. -5. -5. -5.  5.  5.  5.  5.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
Estimated Beta: [-0.32  1.48  2.82 -2.17 -2.16  3.28 -2.9   4.46  5.57  4.13  0.8   1.37
  4.25 -0.79  2.03  2.22  3.32  3.13 -0.9   2.26  0.19  5.48 -2.8   2.52
  3.61  2.19  2.85  1.65  5.02]
Estimated Beta: [ 3.85  4.31  2.27  0.22  5.1   5.52  0.    6.14 -1.3  -2.97 -2.02  1.09
  0.43 -2.67  2.25 -0.62 -0.94 -2.2  -2.19  1.78 -1.41  0.59 -4.82  3.4
 -1.91 -3.12 -1.82 -2.57 -1.45 -4.94]
Error: 3526.86

---------------------------------------------------------------------------------------------
<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< ONLY Second Stage - Log Contrast >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
True Beta: [-5. -5. -5. -5.  5.  5.  5.  5.  0.  0.  0.  0.  0.  0.  0.  0.  0.


covariance of constraints does not have full rank. The number of constraints is 29, but rank is 19


invalid value encountered in sqrt



                          IV2SLS Regression Results                           
Dep. Variable:                      y   R-squared:                      -2.997
Model:                         IV2SLS   Adj. R-squared:                 -3.244
Method:                     Two Stage   F-statistic:                -8.760e-15
                        Least Squares   Prob (F-statistic):               1.00
Date:                 Di, 01 Jun 2021                                         
Time:                        18:49:56                                         
No. Observations:                 500                                         
Df Residuals:                     470                                         
Df Model:                          29                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         -9.1912        nan        nan        n


divide by zero encountered in log


invalid value encountered in subtract



---------------------------------------------------------------------------------------------
<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< ONLY Second Stage - ILR Regression >>>>>>>>>>>>>>>>>>>>>>>>>>>
True Beta: [-5. -5. -5. -5.  5.  5.  5.  5.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
Estimated Beta: [ 1.97  1.86  2.31 -4.28 -3.71  4.2  -1.54  4.52  4.76  1.97  1.15  3.51
  5.55  1.49  5.03  2.28  4.8   1.62 -1.1  -0.26  0.69  3.98 -3.76  1.22
  1.83  1.59  2.89  2.33  6.52]
Estimated Beta: [ 5.14  2.35  1.47  0.32  7.1   7.34 -0.59  4.96 -1.28 -2.04  0.42  1.09
 -1.46 -3.85 -0.04 -3.8  -1.28 -4.01 -1.01  1.7   0.89 -0.07 -3.46  4.27
 -0.65 -1.33 -1.16 -2.54 -2.07 -6.41]
Error: 3574.14

---------------------------------------------------------------------------------------------
<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< ONLY Second Stage - Log Contrast >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
True Beta: [-5. -5. -5. -5.  5.  5.  5.  5.  0.  0.  0.  0.  0.  0.  0.  0.  0


covariance of constraints does not have full rank. The number of constraints is 29, but rank is 19


invalid value encountered in sqrt



                          IV2SLS Regression Results                           
Dep. Variable:                      y   R-squared:                      -4.883
Model:                         IV2SLS   Adj. R-squared:                 -5.246
Method:                     Two Stage   F-statistic:                 4.801e-16
                        Least Squares   Prob (F-statistic):               1.00
Date:                 Di, 01 Jun 2021                                         
Time:                        18:51:40                                         
No. Observations:                 500                                         
Df Residuals:                     470                                         
Df Model:                          29                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         23.0029   1.81e+08   1.27e-07      1.0


divide by zero encountered in log


invalid value encountered in subtract



---------------------------------------------------------------------------------------------
<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< ONLY Second Stage - ILR Regression >>>>>>>>>>>>>>>>>>>>>>>>>>>
True Beta: [-5. -5. -5. -5.  5.  5.  5.  5.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
Estimated Beta: [ 0.71  3.82  4.06 -2.36 -3.67  3.25 -2.17  4.65  6.5   3.16  1.91  0.53
  5.69 -1.4   1.68  1.1   1.93  1.68 -1.26  0.74  0.78  5.09 -4.04  2.05
  2.96  3.63  2.87  2.11  5.45]
Estimated Beta: [ 5.45  4.45  0.27 -1.3   4.86  6.77 -0.09  5.24 -1.72 -4.2  -1.34 -0.32
  0.96 -4.44  2.49 -0.59 -0.1  -1.02 -0.88  2.05  0.07 -0.01 -4.45  4.66
 -1.4  -2.4  -3.2  -2.56 -1.89 -5.36]
Error: 3354.73

---------------------------------------------------------------------------------------------
<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< ONLY Second Stage - Log Contrast >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
True Beta: [-5. -5. -5. -5.  5.  5.  5.  5.  0.  0.  0.  0.  0.  0.  0.  0.  0


covariance of constraints does not have full rank. The number of constraints is 29, but rank is 19


invalid value encountered in sqrt



                          IV2SLS Regression Results                           
Dep. Variable:                      y   R-squared:                    -627.401
Model:                         IV2SLS   Adj. R-squared:               -666.175
Method:                     Two Stage   F-statistic:                 1.067e-15
                        Least Squares   Prob (F-statistic):               1.00
Date:                 Di, 01 Jun 2021                                         
Time:                        18:53:26                                         
No. Observations:                 500                                         
Df Residuals:                     470                                         
Df Model:                          29                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          4.2746   8.15e+09   5.25e-10      1.0


divide by zero encountered in log


invalid value encountered in subtract



---------------------------------------------------------------------------------------------
<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< ONLY Second Stage - ILR Regression >>>>>>>>>>>>>>>>>>>>>>>>>>>
True Beta: [-5. -5. -5. -5.  5.  5.  5.  5.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
Estimated Beta: [-0.98  1.33  1.98 -3.36 -3.14  3.13 -0.92  4.28  4.89  2.11  0.46  3.34
  5.    0.4   1.94  2.09  2.84  2.   -0.48  1.56  0.81  5.62 -5.59 -0.2
  2.21  4.03  4.    1.65  8.41]
Estimated Beta: [ 2.73  4.11  1.79  0.59  6.06  6.5   0.25  4.13 -1.27 -2.39  0.03  1.57
 -1.47 -3.45  0.95 -0.66 -0.94 -1.83 -1.12  1.31 -0.76 -0.06 -5.02  6.19
  0.93 -1.53 -3.47 -3.58 -1.33 -8.27]
Error: 3304.5

---------------------------------------------------------------------------------------------
<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< ONLY Second Stage - Log Contrast >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
True Beta: [-5. -5. -5. -5.  5.  5.  5.  5.  0.  0.  0.  0.  0.  0.  0.  0.  0. 


covariance of constraints does not have full rank. The number of constraints is 29, but rank is 19


invalid value encountered in sqrt



                          IV2SLS Regression Results                           
Dep. Variable:                      y   R-squared:                    -159.069
Model:                         IV2SLS   Adj. R-squared:               -168.946
Method:                     Two Stage   F-statistic:                 2.242e-15
                        Least Squares   Prob (F-statistic):               1.00
Date:                 Di, 01 Jun 2021                                         
Time:                        18:55:13                                         
No. Observations:                 500                                         
Df Residuals:                     470                                         
Df Model:                          29                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const        299.0315        nan        nan        n


divide by zero encountered in log


invalid value encountered in subtract



---------------------------------------------------------------------------------------------
<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< ONLY Second Stage - ILR Regression >>>>>>>>>>>>>>>>>>>>>>>>>>>
True Beta: [-5. -5. -5. -5.  5.  5.  5.  5.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
Estimated Beta: [ 0.51  1.2   3.16 -1.99 -2.77  0.89 -1.86  3.93  7.14  4.59  1.43  1.03
  7.13 -0.15  1.81  2.61  4.44  1.2  -2.06  1.01  2.64  5.11 -2.32  2.17
  0.73  2.59  3.46  0.61  5.57]
Estimated Beta: [ 4.42  3.7   2.59 -0.08  4.88  6.14  2.65  5.46 -0.45 -4.27 -2.31  0.57
  0.87 -5.54  1.48 -0.53 -1.47 -3.51 -0.42  2.86 -0.18 -1.9  -4.55  2.82
 -1.66 -0.28 -2.2  -3.19 -0.41 -5.48]
Error: 3472.39

---------------------------------------------------------------------------------------------
<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< ONLY Second Stage - Log Contrast >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
True Beta: [-5. -5. -5. -5.  5.  5.  5.  5.  0.  0.  0.  0.  0.  0.  0.  0.  0


covariance of constraints does not have full rank. The number of constraints is 29, but rank is 19


invalid value encountered in sqrt



                          IV2SLS Regression Results                           
Dep. Variable:                      y   R-squared:                     -12.965
Model:                         IV2SLS   Adj. R-squared:                -13.826
Method:                     Two Stage   F-statistic:                -8.634e-15
                        Least Squares   Prob (F-statistic):               1.00
Date:                 Di, 01 Jun 2021                                         
Time:                        18:56:58                                         
No. Observations:                 500                                         
Df Residuals:                     470                                         
Df Model:                          29                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const        -36.0266        nan        nan        n


divide by zero encountered in log


invalid value encountered in subtract



---------------------------------------------------------------------------------------------
<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< ONLY Second Stage - ILR Regression >>>>>>>>>>>>>>>>>>>>>>>>>>>
True Beta: [-5. -5. -5. -5.  5.  5.  5.  5.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
Estimated Beta: [ 0.44  0.97  1.37 -3.02 -0.95  2.86 -0.81  1.21  5.52  1.64  2.44  1.56
  5.92 -0.56  2.35  0.72  4.2   2.02 -2.42  2.26  1.94  5.4  -2.66  1.32
  3.43  1.89  3.06  2.41  8.24]
Estimated Beta: [ 3.69  3.07  2.19  1.4   5.96  4.31  0.34  3.86  1.81 -2.85  0.66 -0.33
  0.38 -4.26  2.02 -0.95  0.58 -3.03 -1.02  3.42 -1.25 -1.03 -4.66  3.34
 -0.61 -2.81 -1.38 -2.64 -2.09 -8.1 ]
Error: 3074.12

---------------------------------------------------------------------------------------------
<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< ONLY Second Stage - Log Contrast >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
True Beta: [-5. -5. -5. -5.  5.  5.  5.  5.  0.  0.  0.  0.  0.  0.  0.  0.  0


covariance of constraints does not have full rank. The number of constraints is 29, but rank is 19


invalid value encountered in sqrt



                          IV2SLS Regression Results                           
Dep. Variable:                      y   R-squared:                      -1.781
Model:                         IV2SLS   Adj. R-squared:                 -1.953
Method:                     Two Stage   F-statistic:                 -0.002490
                        Least Squares   Prob (F-statistic):               1.00
Date:                 Di, 01 Jun 2021                                         
Time:                        18:58:51                                         
No. Observations:                 500                                         
Df Residuals:                     470                                         
Df Model:                          29                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         31.0259   7.28e+07   4.26e-07      1.0


divide by zero encountered in log


invalid value encountered in subtract



---------------------------------------------------------------------------------------------
<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< ONLY Second Stage - ILR Regression >>>>>>>>>>>>>>>>>>>>>>>>>>>
True Beta: [-5. -5. -5. -5.  5.  5.  5.  5.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
Estimated Beta: [ 0.08 -0.06  2.15 -3.28 -1.87  3.28 -1.64  3.88  4.25  2.25  1.2   1.37
  6.06 -1.79  3.46  1.16  4.95  2.4  -3.04  1.34  1.69  4.93 -1.36  0.93
  1.45  1.37  3.73  2.1   8.2 ]
Estimated Beta: [ 3.02  2.9   3.03  0.5   6.03  5.14 -0.11  4.68 -0.97 -1.79 -0.12  0.78
  0.5  -4.47  3.22 -2.09  0.07 -3.89 -1.55  3.9  -0.43 -0.85 -4.25  1.96
 -0.31 -0.88 -0.85 -3.32 -1.79 -8.06]
Error: 3350.85

---------------------------------------------------------------------------------------------
<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< ONLY Second Stage - Log Contrast >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
True Beta: [-5. -5. -5. -5.  5.  5.  5.  5.  0.  0.  0.  0.  0.  0.  0.  0.  0


covariance of constraints does not have full rank. The number of constraints is 29, but rank is 19


invalid value encountered in sqrt



                          IV2SLS Regression Results                           
Dep. Variable:                      y   R-squared:                     -25.564
Model:                         IV2SLS   Adj. R-squared:                -27.203
Method:                     Two Stage   F-statistic:                 6.192e-16
                        Least Squares   Prob (F-statistic):               1.00
Date:                 Di, 01 Jun 2021                                         
Time:                        19:00:46                                         
No. Observations:                 500                                         
Df Residuals:                     470                                         
Df Model:                          29                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         68.4759        nan        nan        n


divide by zero encountered in log


invalid value encountered in subtract



---------------------------------------------------------------------------------------------
<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< ONLY Second Stage - ILR Regression >>>>>>>>>>>>>>>>>>>>>>>>>>>
True Beta: [-5. -5. -5. -5.  5.  5.  5.  5.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
Estimated Beta: [ 1.17 -0.6   2.04 -2.31 -2.79  2.87 -1.97  1.88  5.29  2.64  2.72  1.7
  5.91 -1.    3.04  2.58  3.33  2.57 -1.65  2.47  2.37  4.57 -2.07  2.61
  5.61  3.01  1.49  1.71  5.39]
Estimated Beta: [ 3.78  2.12  3.69  0.84  5.19  6.18  0.53  5.3   1.45 -2.34 -0.09 -0.42
  0.42 -4.08  2.64 -1.46 -1.18 -2.1  -1.5   2.69 -1.45 -1.46 -3.82  2.76
 -1.93 -5.09 -2.66 -1.22 -1.5  -5.3 ]
Error: 3536.62

---------------------------------------------------------------------------------------------
<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< ONLY Second Stage - Log Contrast >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
True Beta: [-5. -5. -5. -5.  5.  5.  5.  5.  0.  0.  0.  0.  0.  0.  0.  0.  0.


covariance of constraints does not have full rank. The number of constraints is 29, but rank is 19


invalid value encountered in sqrt



                          IV2SLS Regression Results                           
Dep. Variable:                      y   R-squared:                     -20.296
Model:                         IV2SLS   Adj. R-squared:                -21.609
Method:                     Two Stage   F-statistic:                 1.625e-14
                        Least Squares   Prob (F-statistic):               1.00
Date:                 Di, 01 Jun 2021                                         
Time:                        19:02:42                                         
No. Observations:                 500                                         
Df Residuals:                     470                                         
Df Model:                          29                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const        -38.3618        nan        nan        n


divide by zero encountered in log


invalid value encountered in subtract



---------------------------------------------------------------------------------------------
<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< ONLY Second Stage - ILR Regression >>>>>>>>>>>>>>>>>>>>>>>>>>>
True Beta: [-5. -5. -5. -5.  5.  5.  5.  5.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
Estimated Beta: [ 0.31 -0.23  3.85 -3.64 -3.77  2.67 -1.01  4.68  6.78  0.6   1.61  1.4
  5.46  0.53  2.46  1.32  6.03  1.5  -2.03  2.12  0.65  4.41 -2.73  1.11
  2.92  2.89  3.08  2.35  6.83]
Estimated Beta: [ 3.5   3.06  3.56 -1.08  6.33  7.2   0.88  4.43 -1.48 -4.21  1.59  0.48
  0.57 -3.75  0.95 -1.07 -0.05 -4.98 -0.65  2.89 -1.27  0.14 -3.73  3.37
 -0.44 -2.33 -2.41 -2.71 -2.08 -6.72]
Error: 3593.65

---------------------------------------------------------------------------------------------
<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< ONLY Second Stage - Log Contrast >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
True Beta: [-5. -5. -5. -5.  5.  5.  5.  5.  0.  0.  0.  0.  0.  0.  0.  0.  0.


covariance of constraints does not have full rank. The number of constraints is 29, but rank is 19


invalid value encountered in sqrt



                          IV2SLS Regression Results                           
Dep. Variable:                      y   R-squared:                      -3.882
Model:                         IV2SLS   Adj. R-squared:                 -4.183
Method:                     Two Stage   F-statistic:                -1.547e-14
                        Least Squares   Prob (F-statistic):               1.00
Date:                 Di, 01 Jun 2021                                         
Time:                        19:04:45                                         
No. Observations:                 500                                         
Df Residuals:                     470                                         
Df Model:                          29                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         -3.7646        nan        nan        n


divide by zero encountered in log


invalid value encountered in subtract



---------------------------------------------------------------------------------------------
<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< ONLY Second Stage - ILR Regression >>>>>>>>>>>>>>>>>>>>>>>>>>>
True Beta: [-5. -5. -5. -5.  5.  5.  5.  5.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
Estimated Beta: [-0.13  1.39  2.94 -2.88 -3.43  2.47 -3.49  3.75  5.6   3.36 -0.07  2.81
  5.98 -1.57  2.16  3.04  5.09  1.5  -2.04  1.92  0.76  5.97 -3.16  2.04
  1.72  1.11  3.16  2.84  8.98]
Estimated Beta: [ 3.44  3.62  1.83 -0.43  5.34  6.52  0.71  6.74 -0.5  -2.88 -1.08  2.19
 -0.81 -4.3   3.08 -0.67 -1.71 -4.   -0.59  2.96 -1.    0.1  -5.26  3.81
 -1.37 -1.12 -0.57 -2.7  -2.49 -8.82]
Error: 3339.12

---------------------------------------------------------------------------------------------
<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< ONLY Second Stage - Log Contrast >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
True Beta: [-5. -5. -5. -5.  5.  5.  5.  5.  0.  0.  0.  0.  0.  0.  0.  0.  0


covariance of constraints does not have full rank. The number of constraints is 29, but rank is 19


invalid value encountered in sqrt



                          IV2SLS Regression Results                           
Dep. Variable:                      y   R-squared:                     -35.631
Model:                         IV2SLS   Adj. R-squared:                -37.891
Method:                     Two Stage   F-statistic:                -1.588e-15
                        Least Squares   Prob (F-statistic):               1.00
Date:                 Di, 01 Jun 2021                                         
Time:                        19:06:47                                         
No. Observations:                 500                                         
Df Residuals:                     470                                         
Df Model:                          29                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const        -94.0727        nan        nan        n


divide by zero encountered in log


invalid value encountered in subtract



---------------------------------------------------------------------------------------------
<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< ONLY Second Stage - ILR Regression >>>>>>>>>>>>>>>>>>>>>>>>>>>
True Beta: [-5. -5. -5. -5.  5.  5.  5.  5.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
Estimated Beta: [ 0.92 -0.66  2.97 -2.39 -2.52  3.62 -0.87  3.36  5.66  1.91  1.44  3.2
  4.93 -0.76  2.71  0.87  2.68  1.7  -0.87  1.38  1.88  4.13 -2.79  1.09
  2.09  4.03  3.94  3.76  6.58]
Estimated Beta: [ 3.98  2.68  4.14  0.17  5.42  6.04 -0.18  4.1  -0.27 -3.07  0.3   0.62
 -1.33 -3.38  2.16 -1.37  0.35 -1.56 -0.7   1.84 -0.42 -1.   -3.38  3.51
 -0.34 -1.4  -3.46 -3.51 -3.47 -6.47]
Error: 3769.68

---------------------------------------------------------------------------------------------
<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< ONLY Second Stage - Log Contrast >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
True Beta: [-5. -5. -5. -5.  5.  5.  5.  5.  0.  0.  0.  0.  0.  0.  0.  0.  0.


covariance of constraints does not have full rank. The number of constraints is 29, but rank is 19


invalid value encountered in sqrt



                          IV2SLS Regression Results                           
Dep. Variable:                      y   R-squared:                     -30.284
Model:                         IV2SLS   Adj. R-squared:                -32.214
Method:                     Two Stage   F-statistic:                 1.629e-14
                        Least Squares   Prob (F-statistic):               1.00
Date:                 Di, 01 Jun 2021                                         
Time:                        19:08:36                                         
No. Observations:                 500                                         
Df Residuals:                     470                                         
Df Model:                          29                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const        -40.6490   6.08e+08  -6.68e-08      1.0


divide by zero encountered in log


invalid value encountered in subtract



---------------------------------------------------------------------------------------------
<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< ONLY Second Stage - ILR Regression >>>>>>>>>>>>>>>>>>>>>>>>>>>
True Beta: [-5. -5. -5. -5.  5.  5.  5.  5.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
Estimated Beta: [ 0.56  0.24  4.24 -3.9  -2.68  2.45 -0.87  3.93  6.93  2.72  2.35  3.71
  6.13 -0.74  2.26  2.55  3.07  2.52 -1.03 -1.4   2.64  3.4  -3.28  0.4
  2.9   2.41  2.55  2.43  5.99]
Estimated Beta: [ 4.21  3.41  3.51 -1.19  6.85  6.3   1.2   4.4  -0.58 -4.18 -0.46 -0.32
 -1.93 -4.73  1.94 -1.11 -1.55 -2.24 -1.84  1.67  2.11 -1.97 -2.86  3.81
  0.19 -2.37 -1.98 -2.21 -2.18 -5.89]
Error: 3559.72

---------------------------------------------------------------------------------------------
<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< ONLY Second Stage - Log Contrast >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
True Beta: [-5. -5. -5. -5.  5.  5.  5.  5.  0.  0.  0.  0.  0.  0.  0.  0.  0.


covariance of constraints does not have full rank. The number of constraints is 29, but rank is 19


invalid value encountered in sqrt



                          IV2SLS Regression Results                           
Dep. Variable:                      y   R-squared:                   -3417.886
Model:                         IV2SLS   Adj. R-squared:              -3628.839
Method:                     Two Stage   F-statistic:                -1.062e-14
                        Least Squares   Prob (F-statistic):               1.00
Date:                 Di, 01 Jun 2021                                         
Time:                        19:10:19                                         
No. Observations:                 500                                         
Df Residuals:                     470                                         
Df Model:                          29                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const       -933.2759        nan        nan        n


divide by zero encountered in log


invalid value encountered in subtract



---------------------------------------------------------------------------------------------
<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< ONLY Second Stage - ILR Regression >>>>>>>>>>>>>>>>>>>>>>>>>>>
True Beta: [-5. -5. -5. -5.  5.  5.  5.  5.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
Estimated Beta: [ 1.34  1.82  2.6  -1.82 -1.49  3.12 -1.35  2.99  4.51  0.79  2.27  2.22
  5.55 -0.37  3.54 -0.02  3.33  1.95 -1.    1.78  1.48  4.42 -2.36  0.35
  1.68  3.1   3.5   1.6   7.52]
Estimated Beta: [ 5.09  3.19  1.9   0.4   4.68  4.69 -0.04  4.29 -0.15 -2.08  1.37 -0.25
 -0.39 -4.02  1.71 -2.29  1.15 -2.3  -1.07  1.86 -0.94 -0.72 -3.79  2.94
  0.28 -1.09 -2.6  -3.12 -1.32 -7.39]
Error: 3318.24

---------------------------------------------------------------------------------------------
<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< ONLY Second Stage - Log Contrast >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
True Beta: [-5. -5. -5. -5.  5.  5.  5.  5.  0.  0.  0.  0.  0.  0.  0.  0.  0


covariance of constraints does not have full rank. The number of constraints is 29, but rank is 19


invalid value encountered in sqrt



                          IV2SLS Regression Results                           
Dep. Variable:                      y   R-squared:                      -0.971
Model:                         IV2SLS   Adj. R-squared:                 -1.093
Method:                     Two Stage   F-statistic:                 5.170e-15
                        Least Squares   Prob (F-statistic):               1.00
Date:                 Di, 01 Jun 2021                                         
Time:                        19:11:52                                         
No. Observations:                 500                                         
Df Residuals:                     470                                         
Df Model:                          29                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const        -12.1278        nan        nan        n


divide by zero encountered in log


invalid value encountered in subtract



In [30]:
import pickle
results = {
    "df_mse": df_mse,
    "df_beta": df_beta,
    "mse_large": mse_large_confidence
}

with open(os.path.join(savepath, "results_"+str(p)+str(instrument_strength)+".pickle"), "wb") as f:
    pickle.dump(results, f)
    f.close()


# Mean Squarred Error Plot

In [36]:
filter_list = ["ALR+LC", "ILR+ILR", "ONLY Second ILR", "ONLY Second LC", "DIR+LC", "ILR+LC"]

fig = plot_mse_results(df_mse, filter_list)
fig.update_yaxes(range=(0, 50))
fig.write_image(os.path.join(savepath,
                             "MSEFull_" +str(p)+"InstrumentStrength_"+str(instrument_strength)+"_3.pdf" ))
fig.show()

# Beta Plot

In [39]:
fig = plot_beta_results(df_beta, V.T@betaT, filter_list)
fig.update_layout(legend=dict(
    yanchor="top",
    y=0.99,
    xanchor="left",
    x=0.01
))

fig.update_yaxes(range=(-15, 20))
fig.write_image(os.path.join(savepath,
                             "BetaFull_" +str(p)+"InstrumentStrength_"+str(instrument_strength)+".pdf" ))

fig.show()


In [43]:
fig = plot_beta_results(df_beta, V.T@betaT, filter_list, beta_zero=True)


fig.update_yaxes(range=(-5, 5))
fig.write_image(os.path.join(savepath,
                             "BetaZeroFull_" +str(p)+"InstrumentStrength_"+str(instrument_strength)+".pdf" ))

fig.show()
