# Examine IHT reconstruction results 

This notebook examines IHT's reconstruction result with and without debiasing. Overall, debiasing does not affect model selection nor parameter estimation. 

In [1]:
using DelimitedFiles
using Random
using DataFrames
using StatsBase
using Statistics
using Plots
using Plotly

# Below are 100 simulations of y where X is 5k by 100k matrix

In [14]:
#debiasing simulation results
normal_debias = readdlm("repeats/Normal_100")
logistic_debias = readdlm("repeats/Bernoulli_100")
poisson_debias = readdlm("repeats/Poisson_100")
negativebinomial_debias = readdlm("repeats/NegativeBinomial_100")

#non-debiasing simulation results
normal_nodebias = readdlm("repeats_nodebias/Normal_100")
logistic_nodebias = readdlm("repeats_nodebias/Bernoulli_100")
poisson_nodebias = readdlm("repeats_nodebias/Poisson_100")
negativebinomial_nodebias = readdlm("repeats_nodebias/NegativeBinomial_100")

#true model 
true_b = [0.01; 0.5; 0.03; 0.1; 0.05; 0.25]

6-element Array{Float64,1}:
 0.01
 0.5 
 0.03
 0.1 
 0.05
 0.25

# Construct Table

### First compute the proportion of finding each predictor

In [15]:
k = size(true_b, 1)
normal_found   = zeros(k)
logistic_found = zeros(k)
poisson_found  = zeros(k)
negativebinomial_found = zeros(k)

normal_found_nodebias   = zeros(k)
logistic_found_nodebias = zeros(k)
poisson_found_nodebias  = zeros(k)
negativebinomial_found_nodebias = zeros(k)

for i in 1:k
    normal_found[i]   = sum(normal_debias[i, :] .!= 0)
    logistic_found[i] = sum(logistic_debias[i, :] .!= 0)
    poisson_found[i]  = sum(poisson_debias[i, :] .!= 0)
    negativebinomial_found[i] = sum(negativebinomial_debias[i, :] .!= 0)
    
    normal_found_nodebias[i]   = sum(normal_nodebias[i, :] .!= 0)
    logistic_found_nodebias[i] = sum(logistic_nodebias[i, :] .!= 0)
    poisson_found_nodebias[i]  = sum(poisson_nodebias[i, :] .!= 0)
    negativebinomial_found_nodebias[i] = sum(negativebinomial_nodebias[i, :] .!= 0)    
end

# Found proportion (debiasing)

In [16]:
find_probability = DataFrame(
    true_b = true_b,
    normal_prob_find = normal_found, 
    logistic_prob_find = logistic_found,
    poisson_prob_find  = poisson_found,
    negativebinomial_prob_find = negativebinomial_found)
find_probability_debias = deepcopy(find_probability)
sort!(find_probability_debias, rev=true) #sort later

Unnamed: 0_level_0,true_b,normal_prob_find,logistic_prob_find,poisson_prob_find,negativebinomial_prob_find
Unnamed: 0_level_1,Float64,Float64,Float64,Float64,Float64
1,0.5,100.0,100.0,100.0,100.0
2,0.25,100.0,100.0,100.0,100.0
3,0.1,100.0,78.0,96.0,100.0
4,0.05,77.0,5.0,72.0,61.0
5,0.03,15.0,0.0,13.0,12.0
6,0.01,0.0,0.0,0.0,0.0


# Found proportion (no debiasing)

In [17]:
find_probability_nodebias = DataFrame(
    true_b = true_b,
    normal_prob_find_nodebias = normal_found_nodebias, 
    logistic_prob_find_nodebias = logistic_found_nodebias,
    poisson_prob_find_nodebias  = poisson_found_nodebias,
    negativebinomial_prob_find_nodebias = negativebinomial_found_nodebias)
find_probability_nodebias_cp = deepcopy(find_probability_nodebias)
sort!(find_probability_nodebias_cp, rev=true) #sort later

Unnamed: 0_level_0,true_b,normal_prob_find_nodebias,logistic_prob_find_nodebias,poisson_prob_find_nodebias,negativebinomial_prob_find_nodebias
Unnamed: 0_level_1,Float64,Float64,Float64,Float64,Float64
1,0.5,100.0,100.0,100.0,100.0
2,0.25,100.0,100.0,100.0,100.0
3,0.1,100.0,73.0,99.0,99.0
4,0.05,83.0,6.0,77.0,66.0
5,0.03,8.0,0.0,10.0,4.0
6,0.01,0.0,0.0,0.0,0.0


# Mean and standard deviation (debiasing)

In [18]:
k = size(true_b, 1)
normal_mean   = zeros(k)
normal_std    = zeros(k)
logistic_mean = zeros(k)
logistic_std  = zeros(k)
poisson_mean  = zeros(k)
poisson_std   = zeros(k)
negativebinomial_mean = zeros(k)
negativebinomial_std  = zeros(k)

for i in 1:k
    #compute mean and std if at least 1 found
    if normal_found[i] != 0
        normal_cur_row = normal_debias[i, :] .!= 0
        normal_mean[i] = mean(normal_debias[i, :][normal_cur_row])
        normal_std[i]  = std(normal_debias[i, :][normal_cur_row])
    end
    
    if logistic_found[i] != 0
        logistic_cur_row = logistic_debias[i, :] .!= 0
        logistic_mean[i] = mean(logistic_debias[i, :][logistic_cur_row])
        logistic_std[i]  = std(logistic_debias[i, :][logistic_cur_row])
    end
    
    if poisson_found[i] != 0
        poisson_cur_row = poisson_debias[i, :] .!= 0
        poisson_mean[i] = mean(poisson_debias[i, :][poisson_cur_row])
        poisson_std[i]  = std(poisson_debias[i, :][poisson_cur_row])
    end
    
    if negativebinomial_found[i] != 0
        negativebinomial_cur_row = negativebinomial_debias[i, :] .!= 0
        negativebinomial_mean[i] = mean(negativebinomial_debias[i, :][negativebinomial_cur_row])
        negativebinomial_std[i]  = std(negativebinomial_debias[i, :][negativebinomial_cur_row])
    end
end

In [19]:
found_mean_and_std = DataFrame(
    true_b = true_b,
    normal_mean = normal_mean, 
    normal_std = normal_std,
    logistic_mean = logistic_mean,
    logistic_std = logistic_std,
    poisson_mean = poisson_mean,
    poisson_std = poisson_std,
    negativebinomial_mean = negativebinomial_mean,
    negativebinomial_std = negativebinomial_std)
# sort!(found_mean_and_std, rev=true) #sort later

Unnamed: 0_level_0,true_b,normal_mean,normal_std,logistic_mean,logistic_std,poisson_mean,poisson_std,negativebinomial_mean,negativebinomial_std
Unnamed: 0_level_1,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64
1,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.5,0.499205,0.0107783,0.500955,0.0201752,0.473463,0.076793,0.494118,0.0214736
3,0.03,0.0482141,0.00458179,0.0,0.0,0.041355,0.00384615,0.0484982,0.00555893
4,0.1,0.0994391,0.00942976,0.107739,0.0141301,0.0969835,0.0118964,0.0975961,0.0101816
5,0.05,0.0521786,0.00720834,0.0974611,0.0126273,0.0532384,0.00778964,0.0546311,0.0077682
6,0.25,0.250971,0.0108108,0.253644,0.0211192,0.236194,0.0392121,0.247268,0.0145383


# Mean and standard deviation (non-debiasing) 

In [20]:
k = size(true_b, 1)
normal_mean_nodebias   = zeros(k)
normal_std_nodebias    = zeros(k)
logistic_mean_nodebias = zeros(k)
logistic_std_nodebias  = zeros(k)
poisson_mean_nodebias  = zeros(k)
poisson_std_nodebias   = zeros(k)
negativebinomial_mean_nodebias = zeros(k)
negativebinomial_std_nodebias  = zeros(k)

for i in 1:k
    #compute mean and std if at least 1 found
    if normal_found_nodebias[i] != 0
        normal_cur_row = normal_nodebias[i, :] .!= 0
        normal_mean_nodebias[i] = mean(normal_nodebias[i, :][normal_cur_row])
        normal_std_nodebias[i]  = std(normal_nodebias[i, :][normal_cur_row])
    end
    
    if logistic_found_nodebias[i] != 0
        logistic_cur_row = logistic_nodebias[i, :] .!= 0
        logistic_mean_nodebias[i] = mean(logistic_nodebias[i, :][logistic_cur_row])
        logistic_std_nodebias[i]  = std(logistic_nodebias[i, :][logistic_cur_row])
    end
    
    if poisson_found_nodebias[i] != 0
        poisson_cur_row = poisson_nodebias[i, :] .!= 0
        poisson_mean_nodebias[i] = mean(poisson_nodebias[i, :][poisson_cur_row])
        poisson_std_nodebias[i]  = std(poisson_nodebias[i, :][poisson_cur_row])
    end
    
    if negativebinomial_found_nodebias[i] != 0
        negativebinomial_cur_row = negativebinomial_nodebias[i, :] .!= 0
        negativebinomial_mean_nodebias[i] = mean(negativebinomial_nodebias[i, :][negativebinomial_cur_row])
        negativebinomial_std_nodebias[i]  = std(negativebinomial_nodebias[i, :][negativebinomial_cur_row])
    end
end

In [21]:
found_mean_and_std_nodebias = DataFrame(
    true_b = true_b,
    normal_mean_nodebias = normal_mean_nodebias, 
    normal_std_nodebias = normal_std_nodebias,
    logistic_mean_nodebias = logistic_mean_nodebias,
    logistic_std_nodebias = logistic_std_nodebias,
    poisson_mean_nodebias = poisson_mean_nodebias,
    poisson_std_nodebias = poisson_std_nodebias,
    negativebinomial_mean_nodebias = negativebinomial_mean_nodebias,
    negativebinomial_std_nodebias = negativebinomial_std_nodebias)
# sort!(found_mean_and_std_nodebias, rev=true) #sort later

Unnamed: 0_level_0,true_b,normal_mean_nodebias,normal_std_nodebias,logistic_mean_nodebias,logistic_std_nodebias,poisson_mean_nodebias,poisson_std_nodebias,negativebinomial_mean_nodebias,negativebinomial_std_nodebias
Unnamed: 0_level_1,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64
1,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.5,0.500169,0.00988011,0.503653,0.0216698,0.472854,0.0455963,0.476275,0.0451686
3,0.03,0.0458877,0.00383527,0.0,0.0,0.0448035,0.00498413,0.0487748,0.00682147
4,0.1,0.0970366,0.00926511,0.108255,0.0152654,0.0959403,0.0113014,0.0957512,0.0124975
5,0.05,0.0534712,0.00799052,0.0902063,0.00400189,0.0509248,0.00790711,0.0539218,0.00644066
6,0.25,0.249822,0.00935955,0.252061,0.0209583,0.235781,0.0260038,0.238144,0.0239454


# Sort and round results (debiasing)

In [22]:
found_mean_and_std_debias = deepcopy(found_mean_and_std)
sort!(found_mean_and_std_debias, rev=true)
for i in 1:size(found_mean_and_std_debias, 2)
    found_mean_and_std_debias[:, i] = round.(found_mean_and_std_debias[:, i], digits=3)
end
found_mean_and_std_debias

Unnamed: 0_level_0,true_b,normal_mean,normal_std,logistic_mean,logistic_std,poisson_mean,poisson_std,negativebinomial_mean,negativebinomial_std
Unnamed: 0_level_1,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64
1,0.5,0.499,0.011,0.501,0.02,0.473,0.077,0.494,0.021
2,0.25,0.251,0.011,0.254,0.021,0.236,0.039,0.247,0.015
3,0.1,0.099,0.009,0.108,0.014,0.097,0.012,0.098,0.01
4,0.05,0.052,0.007,0.097,0.013,0.053,0.008,0.055,0.008
5,0.03,0.048,0.005,0.0,0.0,0.041,0.004,0.048,0.006
6,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


# Sort and round results (non-debiasing)

In [23]:
found_mean_and_std_nodebias_copy = deepcopy(found_mean_and_std_nodebias)
sort!(found_mean_and_std_nodebias_copy, rev=true)
for i in 1:size(found_mean_and_std_nodebias_copy, 2)
    found_mean_and_std_nodebias_copy[:, i] = round.(found_mean_and_std_nodebias_copy[:, i], digits=3)
end
found_mean_and_std_nodebias_copy

Unnamed: 0_level_0,true_b,normal_mean_nodebias,normal_std_nodebias,logistic_mean_nodebias,logistic_std_nodebias,poisson_mean_nodebias,poisson_std_nodebias,negativebinomial_mean_nodebias,negativebinomial_std_nodebias
Unnamed: 0_level_1,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64
1,0.5,0.5,0.01,0.504,0.022,0.473,0.046,0.476,0.045
2,0.25,0.25,0.009,0.252,0.021,0.236,0.026,0.238,0.024
3,0.1,0.097,0.009,0.108,0.015,0.096,0.011,0.096,0.012
4,0.05,0.053,0.008,0.09,0.004,0.051,0.008,0.054,0.006
5,0.03,0.046,0.004,0.0,0.0,0.045,0.005,0.049,0.007
6,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
