# Examine IHT reconstruction results 

This notebook examines IHT's reconstruction result with and without debiasing. Overall, debiasing does not affect model selection nor parameter estimation. 

In [1]:
using DelimitedFiles
using Random
using DataFrames
using StatsBase
using Statistics

# Below are 100 simulations of y where X is 5k by 100k matrix

In [2]:
#debiasing simulation results
normal_5k_by_100k_100 = readdlm("repeats/Normal_100")
logistic_5k_by_100k_100 = readdlm("repeats/Bernoulli_100")
poisson_5k_by_100k_100 = readdlm("repeats/Poisson_100")
negativebinomial_5k_by_100k_100 = readdlm("repeats/NegativeBinomial_100")

#non-debiasing simulation results
normal_5k_by_100k_100_nodebias = readdlm("repeats_nodebias/Normal_100")
logistic_5k_by_100k_100_nodebias = readdlm("repeats_nodebias/Bernoulli_100")
poisson_5k_by_100k_100_nodebias = [readdlm("repeats_nodebias/Poisson_50_1") readdlm("repeats_nodebias/Poisson_50_2")]
negativebinomial_5k_by_100k_100_nodebias = readdlm("repeats_nodebias/NegativeBinomial_100")

#true model 
true_b = [0.25; 0.5; 0.1; 0.8]

4-element Array{Float64,1}:
 0.25
 0.5 
 0.1 
 0.8 

### First compute the probability that reach predictor is found

In [3]:
k = size(true_b, 1)
normal_found   = zeros(k)
logistic_found = zeros(k)
poisson_found  = zeros(k)
negativebinomial_found = zeros(k)

normal_found_nodebias   = zeros(k)
logistic_found_nodebias = zeros(k)
poisson_found_nodebias  = zeros(k)
negativebinomial_found_nodebias = zeros(k)

for i in 1:k
    normal_found[i]   = sum(normal_5k_by_100k_100[i, :] .!= 0)
    logistic_found[i] = sum(logistic_5k_by_100k_100[i, :] .!= 0)
    poisson_found[i]  = sum(poisson_5k_by_100k_100[i, :] .!= 0)
    negativebinomial_found[i] = sum(negativebinomial_5k_by_100k_100[i, :] .!= 0)
    
    normal_found_nodebias[i]   = sum(normal_5k_by_100k_100_nodebias[i, :] .!= 0)
    logistic_found_nodebias[i] = sum(logistic_5k_by_100k_100_nodebias[i, :] .!= 0)
    poisson_found_nodebias[i]  = sum(poisson_5k_by_100k_100_nodebias[i, :] .!= 0)
    negativebinomial_found_nodebias[i] = sum(negativebinomial_5k_by_100k_100_nodebias[i, :] .!= 0)    
end

# Found probability (debiasing)

In [14]:
find_probability = DataFrame(
    true_b = true_b[:],
    normal_prob_find = normal_found, 
    logistic_prob_find = logistic_found,
    poisson_prob_find  = poisson_found,
    negativebinomial_prob_find = negativebinomial_found)
sort!(find_probability, rev=true) #sort later

Unnamed: 0_level_0,true_b,normal_prob_find,logistic_prob_find,poisson_prob_find,negativebinomial_prob_find
Unnamed: 0_level_1,Float64,Float64,Float64,Float64,Float64
1,0.8,100.0,100.0,100.0,100.0
2,0.5,100.0,100.0,100.0,100.0
3,0.25,100.0,100.0,100.0,56.0
4,0.1,100.0,16.0,94.0,0.0


# Found probability (no debiasing)

In [15]:
find_probability_nodebias = DataFrame(
    true_b = true_b[:],
    normal_prob_find_nodebias = normal_found_nodebias, 
    logistic_prob_find_nodebias = logistic_found_nodebias,
    poisson_prob_find_nodebias  = poisson_found_nodebias,
    negativebinomial_prob_find_nodebias = negativebinomial_found_nodebias)
sort!(find_probability, rev=true) #sort later

Unnamed: 0_level_0,true_b,normal_prob_find,logistic_prob_find,poisson_prob_find,negativebinomial_prob_find
Unnamed: 0_level_1,Float64,Float64,Float64,Float64,Float64
1,0.8,100.0,100.0,100.0,100.0
2,0.5,100.0,100.0,100.0,100.0
3,0.25,100.0,100.0,100.0,56.0
4,0.1,100.0,16.0,94.0,0.0


# Mean and standard deviation (debiasing)

In [6]:
k = size(true_b, 1)
normal_mean   = zeros(k)
normal_std    = zeros(k)
logistic_mean = zeros(k)
logistic_std  = zeros(k)
poisson_mean  = zeros(k)
poisson_std   = zeros(k)
negativebinomial_mean = zeros(k)
negativebinomial_std  = zeros(k)

for i in 1:k
    #compute mean and std if at least 1 found
    if normal_found[i] != 0
        normal_cur_row = normal_5k_by_100k_100[i, :] .!= 0
        normal_mean[i] = mean(normal_5k_by_100k_100[i, :][normal_cur_row])
        normal_std[i]  = std(normal_5k_by_100k_100[i, :][normal_cur_row])
    end
    
    if logistic_found[i] != 0
        logistic_cur_row = logistic_5k_by_100k_100[i, :] .!= 0
        logistic_mean[i] = mean(logistic_5k_by_100k_100[i, :][logistic_cur_row])
        logistic_std[i]  = std(logistic_5k_by_100k_100[i, :][logistic_cur_row])
    end
    
    if poisson_found[i] != 0
        poisson_cur_row = poisson_5k_by_100k_100[i, :] .!= 0
        poisson_mean[i] = mean(poisson_5k_by_100k_100[i, :][poisson_cur_row])
        poisson_std[i]  = std(poisson_5k_by_100k_100[i, :][poisson_cur_row])
    end
    
    if negativebinomial_found[i] != 0
        negativebinomial_cur_row = negativebinomial_5k_by_100k_100[i, :] .!= 0
        negativebinomial_mean[i] = mean(negativebinomial_5k_by_100k_100[i, :][negativebinomial_cur_row])
        negativebinomial_std[i]  = std(negativebinomial_5k_by_100k_100[i, :][negativebinomial_cur_row])
    end
end

In [9]:
found_mean_and_std = DataFrame(
    true_b = true_b[:],
    normal_mean = normal_mean, 
    normal_std = normal_std,
    logistic_mean = logistic_mean,
    logistic_std = logistic_std,
    poisson_mean = poisson_mean,
    poisson_std = poisson_std,
    negativebinomial_mean = negativebinomial_mean,
    negativebinomial_std = negativebinomial_std)
# sort!(found_mean_and_std, rev=true) #sort later

Unnamed: 0_level_0,true_b,normal_mean,normal_std,logistic_mean,logistic_std,poisson_mean,poisson_std,negativebinomial_mean,negativebinomial_std
Unnamed: 0_level_1,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64
1,0.25,0.249992,0.015539,0.252406,0.0324372,0.250319,0.00874955,0.249273,0.0122305
2,0.5,0.500658,0.012804,0.49942,0.0295852,0.498949,0.00556203,0.50532,0.0119818
3,0.1,0.099854,0.014814,0.157379,0.0151122,0.0996543,0.00965202,0.0,0.0
4,0.8,0.80075,0.0152157,0.804983,0.0366237,0.801283,0.00692655,0.809977,0.0122908


# Mean and standard deviation (non-debiasing) 

In [10]:
k = size(true_b, 1)
normal_mean_nodebias   = zeros(k)
normal_std_nodebias    = zeros(k)
logistic_mean_nodebias = zeros(k)
logistic_std_nodebias  = zeros(k)
poisson_mean_nodebias  = zeros(k)
poisson_std_nodebias   = zeros(k)
negativebinomial_mean_nodebias = zeros(k)
negativebinomial_std_nodebias  = zeros(k)

for i in 1:k
    #compute mean and std if at least 1 found
    if normal_found_nodebias[i] != 0
        normal_cur_row = normal_5k_by_100k_100_nodebias[i, :] .!= 0
        normal_mean_nodebias[i] = mean(normal_5k_by_100k_100_nodebias[i, :][normal_cur_row])
        normal_std_nodebias[i]  = std(normal_5k_by_100k_100_nodebias[i, :][normal_cur_row])
    end
    
    if logistic_found_nodebias[i] != 0
        logistic_cur_row = logistic_5k_by_100k_100_nodebias[i, :] .!= 0
        logistic_mean_nodebias[i] = mean(logistic_5k_by_100k_100_nodebias[i, :][logistic_cur_row])
        logistic_std_nodebias[i]  = std(logistic_5k_by_100k_100_nodebias[i, :][logistic_cur_row])
    end
    
    if poisson_found_nodebias[i] != 0
        poisson_cur_row = poisson_5k_by_100k_100_nodebias[i, :] .!= 0
        poisson_mean_nodebias[i] = mean(poisson_5k_by_100k_100_nodebias[i, :][poisson_cur_row])
        poisson_std_nodebias[i]  = std(poisson_5k_by_100k_100_nodebias[i, :][poisson_cur_row])
    end
    
    if negativebinomial_found_nodebias[i] != 0
        negativebinomial_cur_row = negativebinomial_5k_by_100k_100_nodebias[i, :] .!= 0
        negativebinomial_mean_nodebias[i] = mean(negativebinomial_5k_by_100k_100_nodebias[i, :][negativebinomial_cur_row])
        negativebinomial_std_nodebias[i]  = std(negativebinomial_5k_by_100k_100_nodebias[i, :][negativebinomial_cur_row])
    end
end

In [11]:
found_mean_and_std_nodebias = DataFrame(
    true_b = true_b[:],
    normal_mean_nodebias = normal_mean_nodebias, 
    normal_std_nodebias = normal_std_nodebias,
    logistic_mean_nodebias = logistic_mean_nodebias,
    logistic_std_nodebias = logistic_std_nodebias,
    poisson_mean_nodebias = poisson_mean_nodebias,
    poisson_std_nodebias = poisson_std_nodebias,
    negativebinomial_mean_nodebias = negativebinomial_mean_nodebias,
    negativebinomial_std_nodebias = negativebinomial_std_nodebias)
# sort!(found_mean_and_std_nodebias, rev=true) #sort later

Unnamed: 0_level_0,true_b,normal_mean_nodebias,normal_std_nodebias,logistic_mean_nodebias,logistic_std_nodebias,poisson_mean_nodebias,poisson_std_nodebias,negativebinomial_mean_nodebias,negativebinomial_std_nodebias
Unnamed: 0_level_1,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64
1,0.25,0.248782,0.0126425,0.254429,0.0304876,0.24819,0.00684828,0.347684,0.0192545
2,0.5,0.502616,0.0119811,0.499046,0.0354224,0.499912,0.00594064,0.884658,0.0387502
3,0.1,0.0991987,0.0140408,0.150433,0.00857745,0.0998994,0.00864056,0.0,0.0
4,0.8,0.798988,0.0131306,0.802978,0.0351538,0.799929,0.00595888,1.09068,0.0305499


# Sort and round results (debiasing)

In [19]:
sort!(found_mean_and_std, rev=true)
for i in 1:size(found_mean_and_std, 2)
    found_mean_and_std[:, i] = round.(found_mean_and_std[:, i], digits=3)
end
found_mean_and_std

Unnamed: 0_level_0,true_b,normal_mean,normal_std,logistic_mean,logistic_std,poisson_mean,poisson_std,negativebinomial_mean,negativebinomial_std
Unnamed: 0_level_1,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64
1,0.8,0.801,0.015,0.805,0.037,0.801,0.007,0.81,0.012
2,0.5,0.501,0.013,0.499,0.03,0.499,0.006,0.505,0.012
3,0.25,0.25,0.016,0.252,0.032,0.25,0.009,0.249,0.012
4,0.1,0.1,0.015,0.157,0.015,0.1,0.01,0.0,0.0


# Sort and round results (non-debiasing)

In [20]:
sort!(found_mean_and_std_nodebias, rev=true)
for i in 1:size(found_mean_and_std_nodebias, 2)
    found_mean_and_std_nodebias[:, i] = round.(found_mean_and_std_nodebias[:, i], digits=3)
end
found_mean_and_std

Unnamed: 0_level_0,true_b,normal_mean,normal_std,logistic_mean,logistic_std,poisson_mean,poisson_std,negativebinomial_mean,negativebinomial_std
Unnamed: 0_level_1,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64
1,0.8,0.801,0.015,0.805,0.037,0.801,0.007,0.81,0.012
2,0.5,0.501,0.013,0.499,0.03,0.499,0.006,0.505,0.012
3,0.25,0.25,0.016,0.252,0.032,0.25,0.009,0.249,0.012
4,0.1,0.1,0.015,0.157,0.015,0.1,0.01,0.0,0.0
