In [1]:
import sys
import os
sys.path.append(os.path.join(os.getcwd(), "../src"))

In [2]:
import logging
import jax
import jax.numpy as np
import numpy as onp
import skbio.stats.composition as cmp
import plotly.express as px
import matplotlib.pyplot as plt
import pandas as pd
import pickle
import statsmodels.api as sm
from datetime import datetime

from simulate_data_fct import sim_IV_ilr_linear
from plot_fct import update_layout, img_path, update_layout_px, colours, plot_ilr_X_vs_Y
from run_methods_all import run_methods_confidence_interval
from plot_fct import plot_mse_results, plot_beta_results



In [3]:
%load_ext autoreload
%autoreload 2

In [4]:
# set up logging file
logging.getLogger().setLevel(logging.INFO)

In [6]:
key = jax.random.PRNGKey(191)

# Linear Setup

## Data Generation

In [12]:
n = 1000
p = 30
num_inst = 20
instrument_strength = "unknown"
mypath = os.path.join(img_path, "HighDimLinear")


V = cmp._gram_schmidt_basis(p)

c_X = np.hstack([np.array([-1, 2, -2, 1]), np.zeros(p-1-4, )]) # confounder multiplication to X

alpha0 = np.hstack([np.array([1, 1, 3, 1]), np.zeros(p-1-4, )])
alphaT = jax.random.choice(key, np.array([0.5, 0.75, 0.25, 0, 0, 0]), (p-1, num_inst))  # -4, 4

betaT_log = np.hstack([np.array([5, -5, 3, -3]), np.zeros(p-1-4, )])
betaT_p = np.hstack([betaT_log, -betaT_log.sum()])
beta0 = 5
betaT = V@betaT_p

mu_c = 3
c_Y = 4

In [8]:
confounder, Z_sim, X_sim, Y_sim, X_star, Y_star = sim_IV_ilr_linear(
            key,
            n=n,
            p=p,
            num_inst=num_inst,
            mu_c=mu_c,
            c_X=c_X,
            alpha0=alpha0,
            alphaT=alphaT,
            c_Y=c_Y,
            beta0=beta0,
            betaT=betaT,
            num_star=500)
X_sim_ilr = cmp.ilr(X_sim)
X_star_ilr = cmp.ilr(X_star)

## Data Visualization

In [46]:
fig = px.bar(pd.DataFrame(X_sim[:100, :], columns=["Microbiota "+str(i) for i in range(X_sim.shape[1])]))
fig = update_layout_px(fig)
fig.update_xaxes(title="Sample number")
fig.update_yaxes(title="Percentage of microbiota in sample")
fig.write_image(os.path.join(mypath,
                             "Barplot_Sample_" +str(p)+"InstrumentStrength_"+str(instrument_strength)+".pdf" ))
fig.show()

In [10]:
# Compute F-Statistics 
ZZ_sim = onp.array(sm.add_constant(Z_sim))
alphahat = np.linalg.inv(ZZ_sim.T@ZZ_sim)@ZZ_sim.T@X_sim_ilr
MSM = np.sum(((ZZ_sim@alphahat - np.mean(X_sim_ilr, axis=0))**2), axis=0)/(num_inst)
MSE = np.sum(((ZZ_sim@alphahat - X_sim_ilr)**2), axis=0)/(n-num_inst)
F = MSM / MSE
F

DeviceArray([1.0789142e+01, 3.0695031e+00, 4.8881588e+00, 1.1596719e+01,
             1.8656028e+11, 1.7220629e+11, 2.3095638e+11, 1.6802064e+11,
             1.5984609e+11, 1.5448152e+11, 1.0957689e+11, 2.5182944e+11,
             3.7260768e+11, 2.3788755e+11, 2.6528165e+11, 1.5829606e+11,
             2.2666317e+11, 1.8636797e+11, 1.1781357e+11, 2.0500675e+11,
             3.2960496e+11, 1.5847331e+11, 1.2177576e+11, 1.2134960e+11,
             1.3080679e+11, 2.1113422e+11, 2.1239371e+11, 1.4785556e+11,
             2.6978796e+11], dtype=float32)

In [24]:
fig = plot_ilr_X_vs_Y(X_sim_ilr, Y_sim, Y_star)

fig.update_layout(xaxis3=dict(showline=True, linewidth=2, linecolor="black"))
fig.update_layout(yaxis3=dict(showline=True, linewidth=2, linecolor="black"))
fig.update_layout(xaxis4=dict(showline=True, linewidth=2, linecolor="black"))
fig.update_layout(yaxis4=dict(showline=True, linewidth=2, linecolor="black"))
fig.update_layout(xaxis5=dict(showline=True, linewidth=2, linecolor="black"))
fig.update_layout(yaxis5=dict(showline=True, linewidth=2, linecolor="black"))

fig.write_image(os.path.join(mypath,
                             "Data_vs_trueeffect_" +str(p)+"InstrumentStrength_"+str(instrument_strength)+".pdf" ))


fig.show()

# Diversity Approach

In [25]:
from plot_fct import plot_diversity_methods
from helper_fct import diversity
from run_methods_all import run_diversity_estimation_methods
div_shannon = diversity(X_sim, "shannon")
div_simpson = diversity(X_sim, "simpson")

In [26]:
# Shannon Estimation
x, y, ytrue, xstar, xstar_bound, ystar_ols, ystar_2sls, ystar_kiv, results = run_diversity_estimation_methods(
    Z_sim, div_shannon, Y_sim, Ytrue=Y_star, methods=["OLS", "2SLS", "KIV"])

fig = plot_diversity_methods(x, y, xstar, xstar_bound, ystar_ols, ystar_2sls, ystar_kiv, results, ytrue)
fig.write_image(os.path.join(mypath,
                             "ShannonDiversity_" +str(p)+"InstrumentStrength_"+str(instrument_strength)+".pdf" ))

fig.show()

INFO:absl:Setup matrices...


                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.088
Model:                            OLS   Adj. R-squared:                  0.070
Method:                 Least Squares   F-statistic:                     4.733
Date:                 Di, 01 Jun 2021   Prob (F-statistic):           4.61e-11
Time:                        15:43:53   Log-Likelihood:                -1372.8
No. Observations:                1000   AIC:                             2788.
Df Residuals:                     979   BIC:                             2891.
Df Model:                          20                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          1.2798      0.250      5.110      0.0

INFO:absl:Optimize lambda...
INFO:absl:Optimal lambda [4.52595122]...
INFO:absl:Optimize xi...
INFO:absl:Optimal xi [-5.21784051]...
INFO:absl:Predict treatment effect...


In [27]:
# Simpson Estimation
x, y, ytrue, xstar, xstar_bound, ystar_ols, ystar_2sls, ystar_kiv, results = run_diversity_estimation_methods(
    Z_sim, div_simpson, Y_sim, Ytrue=Y_star, methods=["OLS", "2SLS", "KIV"])

plot_diversity_methods(x, y, xstar, xstar_bound, ystar_ols, ystar_2sls, ystar_kiv, results, ytrue)
fig.write_image(os.path.join(mypath,
                             "SimpsonDiversity_" +str(p)+"InstrumentStrength_"+str(instrument_strength)+".pdf" ))

fig.show()

INFO:absl:Setup matrices...


                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.042
Model:                            OLS   Adj. R-squared:                  0.023
Method:                 Least Squares   F-statistic:                     2.155
Date:                 Di, 01 Jun 2021   Prob (F-statistic):            0.00234
Time:                        15:43:56   Log-Likelihood:                -1397.4
No. Observations:                1000   AIC:                             2837.
Df Residuals:                     979   BIC:                             2940.
Df Model:                          20                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.3727      0.257      1.452      0.1

INFO:absl:Optimize lambda...
INFO:absl:Optimal lambda [4.96727534]...
INFO:absl:Optimize xi...
INFO:absl:Optimal xi [-4.17793788]...
INFO:absl:Predict treatment effect...


# Confidence Intervals for higher dimensional methods

In [28]:
from run_methods_all import run_methods_all

In [29]:
today = datetime.now()
num_iteration = 20

path = os.getcwd()
mypath = os.path.join(path, "temp",
                      today.strftime("%Y_%m_%d_%H_%M") + "_LINEAR_ConfidenceInterval_"+str(instrument_strength)
                      +"_"+ str(num_iteration)+"_Microbiota_" +str(p))

if not os.path.isdir(mypath):
    os.makedirs(mypath)


# create logging file with relevant information
for handler in logging.root.handlers[:]:
    logging.root.removeHandler(handler)

logging.basicConfig(level=logging.INFO,
                    format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s',
                    datefmt='%m-%d %H:%M',
                    filename=os.path.join(os.getcwd(), mypath, "configurations.log"),
                    filemode='w'
                    )

logging.info("n="+str(n)+", p="+str(p)+", instrument strength: "+instrument_strength)
logging.info("Number of runs: "+str(num_iteration))
logging.info("First Stage Specification: ")
logging.info("Number of Instruments: "+str(num_inst))
logging.info("Instrument Strength by F-Test: " +str(F))
logging.info("alpha0="+str(alpha0)+", alphaT="+str(alphaT)+", confounder multiplicator c_X="+str(c_X))
logging.info("Second Stage Specification: ")
logging.info("beta0="+str(beta0)+", betaT="+str(betaT)+", confounder multiplicator c_Y="+str(c_Y))
logging.info("Confounder mean mu_c="+str(mu_c))


df_beta, df_mse, mse_large_confidence= run_methods_confidence_interval(
    key, num_iteration, 
    n, p, num_inst, mu_c, c_X, alpha0, alphaT, c_Y, beta0, betaT, num_star=200)

**************************************************************************************************
*****************************************We are at 0 of 20***********************************************
**************************************************************************************************
---------------------------------------------------------------------------------------------
<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< ONLY Second Stage - ILR Regression >>>>>>>>>>>>>>>>>>>>>>>>>>>
True Beta: [ 7.071068  -2.4494898  3.4641016  0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.       ]
Estimated Beta: [ 6.34 -1.48  2.89  0.18 -0.46  0.67  0.65 -0.97  1.16  0.31 -0.61  0.43
 -0.68 -0.56  0.7   0.   -0.26  0.01  0.63  1.48  1.72 -0.82 -0.03 -1.04
  0.06  0.14 -0.2  -0.51 -1.37]
Estim


covariance of constraints does not have full rank. The number of constraints is 29, but rank is 9


invalid value encountered in sqrt



                          IV2SLS Regression Results                           
Dep. Variable:                      y   R-squared:                      -0.047
Model:                         IV2SLS   Adj. R-squared:                 -0.078
Method:                     Two Stage   F-statistic:                -0.0009441
                        Least Squares   Prob (F-statistic):               1.00
Date:                 Di, 01 Jun 2021                                         
Time:                        15:44:26                                         
No. Observations:                1000                                         
Df Residuals:                     970                                         
Df Model:                          29                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         56.6557   9.42e+07   6.02e-07      1.0

Error: 331.38

---------------------------------------------------------------------------------------------
<<<<<<<<<<<<<<<<<<<<<<<<<<< ONLY SECOND STAGE - Kernel Regression KIV >>>>>>>>>>>>>>>>>>>>>>>>>
Error: 16.36

---------------------------------------------------------------------------------------------
<<<<<<<<<<<<<<<<<<<<<<<<<<< 2SLS - Kernel Regression KIV (manual) >>>>>>>>>>>>>>>>>>>>>>>>>
Error: 229.82

---------------------------------------------------------------------------------------------
<<<<<<<<<<<<<<<<<<<<<<<<<<<<< 2SLS - ILR ILR Regression Implementation >>>>>>>>>>>>>>>>>>>>>>>



covariance of constraints does not have full rank. The number of constraints is 29, but rank is 9



                          IV2SLS Regression Results                           
Dep. Variable:                      y   R-squared:                       0.721
Model:                         IV2SLS   Adj. R-squared:                  0.713
Method:                     Two Stage   F-statistic:                 -0.003366
                        Least Squares   Prob (F-statistic):               1.00
Date:                 Di, 01 Jun 2021                                         
Time:                        15:44:46                                         
No. Observations:                1000                                         
Df Residuals:                     970                                         
Df Model:                          29                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const        -18.0629   1.27e+08  -1.42e-07      1.0

Error: 345.78

---------------------------------------------------------------------------------------------
<<<<<<<<<<<<<<<<<<<<<<<<<<< ONLY SECOND STAGE - Kernel Regression KIV >>>>>>>>>>>>>>>>>>>>>>>>>
Error: 15.44

---------------------------------------------------------------------------------------------
<<<<<<<<<<<<<<<<<<<<<<<<<<< 2SLS - Kernel Regression KIV (manual) >>>>>>>>>>>>>>>>>>>>>>>>>
Error: 133.62

---------------------------------------------------------------------------------------------
<<<<<<<<<<<<<<<<<<<<<<<<<<<<< 2SLS - ILR ILR Regression Implementation >>>>>>>>>>>>>>>>>>>>>>>



covariance of constraints does not have full rank. The number of constraints is 29, but rank is 10


invalid value encountered in sqrt



                          IV2SLS Regression Results                           
Dep. Variable:                      y   R-squared:                     -19.974
Model:                         IV2SLS   Adj. R-squared:                -20.601
Method:                     Two Stage   F-statistic:                  0.002509
                        Least Squares   Prob (F-statistic):               1.00
Date:                 Di, 01 Jun 2021                                         
Time:                        15:45:06                                         
No. Observations:                1000                                         
Df Residuals:                     970                                         
Df Model:                          29                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          3.8884   1.28e+09   3.03e-09      1.0

Error: 353.65

---------------------------------------------------------------------------------------------
<<<<<<<<<<<<<<<<<<<<<<<<<<< ONLY SECOND STAGE - Kernel Regression KIV >>>>>>>>>>>>>>>>>>>>>>>>>
Error: 14.2

---------------------------------------------------------------------------------------------
<<<<<<<<<<<<<<<<<<<<<<<<<<< 2SLS - Kernel Regression KIV (manual) >>>>>>>>>>>>>>>>>>>>>>>>>
Error: 242.82

---------------------------------------------------------------------------------------------
<<<<<<<<<<<<<<<<<<<<<<<<<<<<< 2SLS - ILR ILR Regression Implementation >>>>>>>>>>>>>>>>>>>>>>>



covariance of constraints does not have full rank. The number of constraints is 29, but rank is 10


invalid value encountered in sqrt



                          IV2SLS Regression Results                           
Dep. Variable:                      y   R-squared:                       0.999
Model:                         IV2SLS   Adj. R-squared:                  0.999
Method:                     Two Stage   F-statistic:                     5.178
                        Least Squares   Prob (F-statistic):           9.22e-17
Date:                 Di, 01 Jun 2021                                         
Time:                        15:45:26                                         
No. Observations:                1000                                         
Df Residuals:                     970                                         
Df Model:                          29                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          8.3283        nan        nan        n

Error: 287.2

---------------------------------------------------------------------------------------------
<<<<<<<<<<<<<<<<<<<<<<<<<<< ONLY SECOND STAGE - Kernel Regression KIV >>>>>>>>>>>>>>>>>>>>>>>>>
Error: 15.78

---------------------------------------------------------------------------------------------
<<<<<<<<<<<<<<<<<<<<<<<<<<< 2SLS - Kernel Regression KIV (manual) >>>>>>>>>>>>>>>>>>>>>>>>>
Error: 33.08

---------------------------------------------------------------------------------------------
<<<<<<<<<<<<<<<<<<<<<<<<<<<<< 2SLS - ILR ILR Regression Implementation >>>>>>>>>>>>>>>>>>>>>>>



covariance of constraints does not have full rank. The number of constraints is 29, but rank is 11


invalid value encountered in sqrt



                          IV2SLS Regression Results                           
Dep. Variable:                      y   R-squared:                       0.969
Model:                         IV2SLS   Adj. R-squared:                  0.968
Method:                     Two Stage   F-statistic:                     10.34
                        Least Squares   Prob (F-statistic):           6.47e-40
Date:                 Di, 01 Jun 2021                                         
Time:                        15:45:46                                         
No. Observations:                1000                                         
Df Residuals:                     970                                         
Df Model:                          29                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         12.1021        nan        nan        n


covariance of constraints does not have full rank. The number of constraints is 29, but rank is 11


invalid value encountered in sqrt



                          IV2SLS Regression Results                           
Dep. Variable:                      y   R-squared:                       0.823
Model:                         IV2SLS   Adj. R-squared:                  0.818
Method:                     Two Stage   F-statistic:                    0.1018
                        Least Squares   Prob (F-statistic):               1.00
Date:                 Di, 01 Jun 2021                                         
Time:                        15:46:09                                         
No. Observations:                1000                                         
Df Residuals:                     970                                         
Df Model:                          29                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const        -10.0598   9.15e+06   -1.1e-06      1.0

Error: 365.06

---------------------------------------------------------------------------------------------
<<<<<<<<<<<<<<<<<<<<<<<<<<< ONLY SECOND STAGE - Kernel Regression KIV >>>>>>>>>>>>>>>>>>>>>>>>>
Error: 15.31

---------------------------------------------------------------------------------------------
<<<<<<<<<<<<<<<<<<<<<<<<<<< 2SLS - Kernel Regression KIV (manual) >>>>>>>>>>>>>>>>>>>>>>>>>
Error: 70.68

---------------------------------------------------------------------------------------------
<<<<<<<<<<<<<<<<<<<<<<<<<<<<< 2SLS - ILR ILR Regression Implementation >>>>>>>>>>>>>>>>>>>>>>>



covariance of constraints does not have full rank. The number of constraints is 29, but rank is 16


invalid value encountered in sqrt



                          IV2SLS Regression Results                           
Dep. Variable:                      y   R-squared:                     -39.826
Model:                         IV2SLS   Adj. R-squared:                -41.046
Method:                     Two Stage   F-statistic:                    0.2757
                        Least Squares   Prob (F-statistic):               1.00
Date:                 Di, 01 Jun 2021                                         
Time:                        15:46:31                                         
No. Observations:                1000                                         
Df Residuals:                     970                                         
Df Model:                          29                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         17.2972   5.29e+07   3.27e-07      1.0

Error: 279.18

---------------------------------------------------------------------------------------------
<<<<<<<<<<<<<<<<<<<<<<<<<<< ONLY SECOND STAGE - Kernel Regression KIV >>>>>>>>>>>>>>>>>>>>>>>>>
Error: 13.62

---------------------------------------------------------------------------------------------
<<<<<<<<<<<<<<<<<<<<<<<<<<< 2SLS - Kernel Regression KIV (manual) >>>>>>>>>>>>>>>>>>>>>>>>>
Error: 74.72

---------------------------------------------------------------------------------------------
<<<<<<<<<<<<<<<<<<<<<<<<<<<<< 2SLS - ILR ILR Regression Implementation >>>>>>>>>>>>>>>>>>>>>>>



covariance of constraints does not have full rank. The number of constraints is 29, but rank is 9


invalid value encountered in sqrt



                          IV2SLS Regression Results                           
Dep. Variable:                      y   R-squared:                       0.982
Model:                         IV2SLS   Adj. R-squared:                  0.981
Method:                     Two Stage   F-statistic:                  -0.06983
                        Least Squares   Prob (F-statistic):               1.00
Date:                 Di, 01 Jun 2021                                         
Time:                        15:46:52                                         
No. Observations:                1000                                         
Df Residuals:                     970                                         
Df Model:                          29                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          8.8456        nan        nan        n

Error: 321.16

---------------------------------------------------------------------------------------------
<<<<<<<<<<<<<<<<<<<<<<<<<<< ONLY SECOND STAGE - Kernel Regression KIV >>>>>>>>>>>>>>>>>>>>>>>>>
Error: 15.15

---------------------------------------------------------------------------------------------
<<<<<<<<<<<<<<<<<<<<<<<<<<< 2SLS - Kernel Regression KIV (manual) >>>>>>>>>>>>>>>>>>>>>>>>>
Error: 190.4

---------------------------------------------------------------------------------------------
<<<<<<<<<<<<<<<<<<<<<<<<<<<<< 2SLS - ILR ILR Regression Implementation >>>>>>>>>>>>>>>>>>>>>>>



covariance of constraints does not have full rank. The number of constraints is 29, but rank is 10


invalid value encountered in sqrt



                          IV2SLS Regression Results                           
Dep. Variable:                      y   R-squared:                       0.392
Model:                         IV2SLS   Adj. R-squared:                  0.374
Method:                     Two Stage   F-statistic:                   -0.1306
                        Least Squares   Prob (F-statistic):               1.00
Date:                 Di, 01 Jun 2021                                         
Time:                        15:47:13                                         
No. Observations:                1000                                         
Df Residuals:                     970                                         
Df Model:                          29                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const        -54.8168   1.44e+07   -3.8e-06      1.0

Error: 299.39

---------------------------------------------------------------------------------------------
<<<<<<<<<<<<<<<<<<<<<<<<<<< ONLY SECOND STAGE - Kernel Regression KIV >>>>>>>>>>>>>>>>>>>>>>>>>
Error: 14.57

---------------------------------------------------------------------------------------------
<<<<<<<<<<<<<<<<<<<<<<<<<<< 2SLS - Kernel Regression KIV (manual) >>>>>>>>>>>>>>>>>>>>>>>>>
Error: 95.42

---------------------------------------------------------------------------------------------
<<<<<<<<<<<<<<<<<<<<<<<<<<<<< 2SLS - ILR ILR Regression Implementation >>>>>>>>>>>>>>>>>>>>>>>



covariance of constraints does not have full rank. The number of constraints is 29, but rank is 12


invalid value encountered in sqrt



                          IV2SLS Regression Results                           
Dep. Variable:                      y   R-squared:                      -1.030
Model:                         IV2SLS   Adj. R-squared:                 -1.091
Method:                     Two Stage   F-statistic:                  -0.05294
                        Least Squares   Prob (F-statistic):               1.00
Date:                 Di, 01 Jun 2021                                         
Time:                        15:47:36                                         
No. Observations:                1000                                         
Df Residuals:                     970                                         
Df Model:                          29                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         67.0865   4.13e+07   1.62e-06      1.0

Error: 453.98

---------------------------------------------------------------------------------------------
<<<<<<<<<<<<<<<<<<<<<<<<<<< ONLY SECOND STAGE - Kernel Regression KIV >>>>>>>>>>>>>>>>>>>>>>>>>
Error: 17.43

---------------------------------------------------------------------------------------------
<<<<<<<<<<<<<<<<<<<<<<<<<<< 2SLS - Kernel Regression KIV (manual) >>>>>>>>>>>>>>>>>>>>>>>>>
Error: 46.2

---------------------------------------------------------------------------------------------
<<<<<<<<<<<<<<<<<<<<<<<<<<<<< 2SLS - ILR ILR Regression Implementation >>>>>>>>>>>>>>>>>>>>>>>



covariance of constraints does not have full rank. The number of constraints is 29, but rank is 16


invalid value encountered in sqrt



                          IV2SLS Regression Results                           
Dep. Variable:                      y   R-squared:                       0.915
Model:                         IV2SLS   Adj. R-squared:                  0.913
Method:                     Two Stage   F-statistic:                     5.228
                        Least Squares   Prob (F-statistic):           5.52e-17
Date:                 Di, 01 Jun 2021                                         
Time:                        15:47:58                                         
No. Observations:                1000                                         
Df Residuals:                     970                                         
Df Model:                          29                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          4.8180   5.19e+06   9.28e-07      1.0

Error: 419.25

---------------------------------------------------------------------------------------------
<<<<<<<<<<<<<<<<<<<<<<<<<<< ONLY SECOND STAGE - Kernel Regression KIV >>>>>>>>>>>>>>>>>>>>>>>>>
Error: 14.4

---------------------------------------------------------------------------------------------
<<<<<<<<<<<<<<<<<<<<<<<<<<< 2SLS - Kernel Regression KIV (manual) >>>>>>>>>>>>>>>>>>>>>>>>>
Error: 171.24

---------------------------------------------------------------------------------------------
<<<<<<<<<<<<<<<<<<<<<<<<<<<<< 2SLS - ILR ILR Regression Implementation >>>>>>>>>>>>>>>>>>>>>>>



covariance of constraints does not have full rank. The number of constraints is 29, but rank is 9


invalid value encountered in sqrt



                          IV2SLS Regression Results                           
Dep. Variable:                      y   R-squared:                    -283.680
Model:                         IV2SLS   Adj. R-squared:               -292.191
Method:                     Two Stage   F-statistic:                 -0.001521
                        Least Squares   Prob (F-statistic):               1.00
Date:                 Di, 01 Jun 2021                                         
Time:                        15:48:20                                         
No. Observations:                1000                                         
Df Residuals:                     970                                         
Df Model:                          29                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const        558.1401        nan        nan        n

Error: 304.89

---------------------------------------------------------------------------------------------
<<<<<<<<<<<<<<<<<<<<<<<<<<< ONLY SECOND STAGE - Kernel Regression KIV >>>>>>>>>>>>>>>>>>>>>>>>>
Error: 14.64

---------------------------------------------------------------------------------------------
<<<<<<<<<<<<<<<<<<<<<<<<<<< 2SLS - Kernel Regression KIV (manual) >>>>>>>>>>>>>>>>>>>>>>>>>
Error: 60.9

---------------------------------------------------------------------------------------------
<<<<<<<<<<<<<<<<<<<<<<<<<<<<< 2SLS - ILR ILR Regression Implementation >>>>>>>>>>>>>>>>>>>>>>>



covariance of constraints does not have full rank. The number of constraints is 29, but rank is 13


invalid value encountered in sqrt



                          IV2SLS Regression Results                           
Dep. Variable:                      y   R-squared:                       0.115
Model:                         IV2SLS   Adj. R-squared:                  0.089
Method:                     Two Stage   F-statistic:                    0.9298
                        Least Squares   Prob (F-statistic):              0.574
Date:                 Di, 01 Jun 2021                                         
Time:                        15:48:42                                         
No. Observations:                1000                                         
Df Residuals:                     970                                         
Df Model:                          29                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const        -46.7906   8.72e+06  -5.36e-06      1.0

Error: 380.07

---------------------------------------------------------------------------------------------
<<<<<<<<<<<<<<<<<<<<<<<<<<< ONLY SECOND STAGE - Kernel Regression KIV >>>>>>>>>>>>>>>>>>>>>>>>>
Error: 17.98

---------------------------------------------------------------------------------------------
<<<<<<<<<<<<<<<<<<<<<<<<<<< 2SLS - Kernel Regression KIV (manual) >>>>>>>>>>>>>>>>>>>>>>>>>
Error: 150.07

---------------------------------------------------------------------------------------------
<<<<<<<<<<<<<<<<<<<<<<<<<<<<< 2SLS - ILR ILR Regression Implementation >>>>>>>>>>>>>>>>>>>>>>>



covariance of constraints does not have full rank. The number of constraints is 29, but rank is 10


invalid value encountered in sqrt



                          IV2SLS Regression Results                           
Dep. Variable:                      y   R-squared:                       0.973
Model:                         IV2SLS   Adj. R-squared:                  0.972
Method:                     Two Stage   F-statistic:                     9.516
                        Least Squares   Prob (F-statistic):           2.82e-36
Date:                 Di, 01 Jun 2021                                         
Time:                        15:49:05                                         
No. Observations:                1000                                         
Df Residuals:                     970                                         
Df Model:                          29                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.9332   2.43e+06   3.84e-07      1.0


covariance of constraints does not have full rank. The number of constraints is 29, but rank is 15


invalid value encountered in sqrt



                          IV2SLS Regression Results                           
Dep. Variable:                      y   R-squared:                       0.986
Model:                         IV2SLS   Adj. R-squared:                  0.986
Method:                     Two Stage   F-statistic:                     27.16
                        Least Squares   Prob (F-statistic):          1.19e-104
Date:                 Di, 01 Jun 2021                                         
Time:                        15:49:27                                         
No. Observations:                1000                                         
Df Residuals:                     970                                         
Df Model:                          29                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          9.7139        nan        nan        n

Error: 342.3

---------------------------------------------------------------------------------------------
<<<<<<<<<<<<<<<<<<<<<<<<<<< ONLY SECOND STAGE - Kernel Regression KIV >>>>>>>>>>>>>>>>>>>>>>>>>
Error: 16.84

---------------------------------------------------------------------------------------------
<<<<<<<<<<<<<<<<<<<<<<<<<<< 2SLS - Kernel Regression KIV (manual) >>>>>>>>>>>>>>>>>>>>>>>>>
Error: 54.0

---------------------------------------------------------------------------------------------
<<<<<<<<<<<<<<<<<<<<<<<<<<<<< 2SLS - ILR ILR Regression Implementation >>>>>>>>>>>>>>>>>>>>>>>



covariance of constraints does not have full rank. The number of constraints is 29, but rank is 9


invalid value encountered in sqrt



                          IV2SLS Regression Results                           
Dep. Variable:                      y   R-squared:                      -5.781
Model:                         IV2SLS   Adj. R-squared:                 -5.984
Method:                     Two Stage   F-statistic:                  0.004189
                        Least Squares   Prob (F-statistic):               1.00
Date:                 Di, 01 Jun 2021                                         
Time:                        15:49:51                                         
No. Observations:                1000                                         
Df Residuals:                     970                                         
Df Model:                          29                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const        -90.9171   2.16e+08  -4.21e-07      1.0

Error: 342.88

---------------------------------------------------------------------------------------------
<<<<<<<<<<<<<<<<<<<<<<<<<<< ONLY SECOND STAGE - Kernel Regression KIV >>>>>>>>>>>>>>>>>>>>>>>>>
Error: 16.3

---------------------------------------------------------------------------------------------
<<<<<<<<<<<<<<<<<<<<<<<<<<< 2SLS - Kernel Regression KIV (manual) >>>>>>>>>>>>>>>>>>>>>>>>>
Error: 255.5

---------------------------------------------------------------------------------------------
<<<<<<<<<<<<<<<<<<<<<<<<<<<<< 2SLS - ILR ILR Regression Implementation >>>>>>>>>>>>>>>>>>>>>>>



covariance of constraints does not have full rank. The number of constraints is 29, but rank is 12


invalid value encountered in sqrt



                          IV2SLS Regression Results                           
Dep. Variable:                      y   R-squared:                       0.983
Model:                         IV2SLS   Adj. R-squared:                  0.982
Method:                     Two Stage   F-statistic:                    -2.590
                        Least Squares   Prob (F-statistic):               1.00
Date:                 Di, 01 Jun 2021                                         
Time:                        15:50:13                                         
No. Observations:                1000                                         
Df Residuals:                     970                                         
Df Model:                          29                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         -1.9323   3.29e+06  -5.87e-07      1.0

Error: 368.11

---------------------------------------------------------------------------------------------
<<<<<<<<<<<<<<<<<<<<<<<<<<< ONLY SECOND STAGE - Kernel Regression KIV >>>>>>>>>>>>>>>>>>>>>>>>>
Error: 16.36

---------------------------------------------------------------------------------------------
<<<<<<<<<<<<<<<<<<<<<<<<<<< 2SLS - Kernel Regression KIV (manual) >>>>>>>>>>>>>>>>>>>>>>>>>
Error: 233.83

---------------------------------------------------------------------------------------------
<<<<<<<<<<<<<<<<<<<<<<<<<<<<< 2SLS - ILR ILR Regression Implementation >>>>>>>>>>>>>>>>>>>>>>>



covariance of constraints does not have full rank. The number of constraints is 29, but rank is 10


invalid value encountered in sqrt



                          IV2SLS Regression Results                           
Dep. Variable:                      y   R-squared:                       0.982
Model:                         IV2SLS   Adj. R-squared:                  0.981
Method:                     Two Stage   F-statistic:                     2.528
                        Least Squares   Prob (F-statistic):           1.80e-05
Date:                 Di, 01 Jun 2021                                         
Time:                        15:50:34                                         
No. Observations:                1000                                         
Df Residuals:                     970                                         
Df Model:                          29                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          2.8140        nan        nan        n


covariance of constraints does not have full rank. The number of constraints is 29, but rank is 13


invalid value encountered in sqrt



                          IV2SLS Regression Results                           
Dep. Variable:                      y   R-squared:                       0.825
Model:                         IV2SLS   Adj. R-squared:                  0.819
Method:                     Two Stage   F-statistic:                   -0.3683
                        Least Squares   Prob (F-statistic):               1.00
Date:                 Di, 01 Jun 2021                                         
Time:                        15:50:55                                         
No. Observations:                1000                                         
Df Residuals:                     970                                         
Df Model:                          29                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         -6.3776        nan        nan        n

Error: 494.65

---------------------------------------------------------------------------------------------
<<<<<<<<<<<<<<<<<<<<<<<<<<< ONLY SECOND STAGE - Kernel Regression KIV >>>>>>>>>>>>>>>>>>>>>>>>>
Error: 18.76

---------------------------------------------------------------------------------------------
<<<<<<<<<<<<<<<<<<<<<<<<<<< 2SLS - Kernel Regression KIV (manual) >>>>>>>>>>>>>>>>>>>>>>>>>
Error: 332.41

---------------------------------------------------------------------------------------------
<<<<<<<<<<<<<<<<<<<<<<<<<<<<< 2SLS - ILR ILR Regression Implementation >>>>>>>>>>>>>>>>>>>>>>>



covariance of constraints does not have full rank. The number of constraints is 29, but rank is 20


invalid value encountered in sqrt



                          IV2SLS Regression Results                           
Dep. Variable:                      y   R-squared:                       0.998
Model:                         IV2SLS   Adj. R-squared:                  0.998
Method:                     Two Stage   F-statistic:                     338.7
                        Least Squares   Prob (F-statistic):               0.00
Date:                 Di, 01 Jun 2021                                         
Time:                        15:51:15                                         
No. Observations:                1000                                         
Df Residuals:                     970                                         
Df Model:                          29                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          3.9880   7.49e+05   5.32e-06      1.0

In [30]:
import pickle
results = {
    "df_mse": df_mse,
    "df_beta": df_beta,
    "mse_large": mse_large_confidence
}

with open(os.path.join(mypath, "results_"+str(p)+str(instrument_strength)+".pickle"), "wb") as f:
    pickle.dump(results, f)
    f.close()


# Mean Squarred Error Plot

In [36]:
filter_list = ["ALR+LC", "ILR+ILR", "ONLY Second ILR", "ONLY Second LC", "DIR+LC", "ILR+LC"]

fig = plot_mse_results(df_mse, filter_list)
fig.update_yaxes(range=(0, 50))
fig.write_image(os.path.join(mypath,
                             "MSEFull_" +str(p)+"InstrumentStrength_"+str(instrument_strength)+"_3.pdf" ))
fig.show()

# Beta Plot

In [39]:
fig = plot_beta_results(df_beta, V.T@betaT, filter_list)
fig.update_layout(legend=dict(
    yanchor="top",
    y=0.99,
    xanchor="left",
    x=0.01
))

fig.update_yaxes(range=(-15, 20))
fig.write_image(os.path.join(mypath,
                             "BetaFull_" +str(p)+"InstrumentStrength_"+str(instrument_strength)+".pdf" ))

fig.show()


In [43]:
fig = plot_beta_results(df_beta, V.T@betaT, filter_list, beta_zero=True)


fig.update_yaxes(range=(-5, 5))
fig.write_image(os.path.join(mypath,
                             "BetaZeroFull_" +str(p)+"InstrumentStrength_"+str(instrument_strength)+".pdf" ))

fig.show()
