In [1]:
using DelimitedFiles
using Random
using DataFrames
using StatsBase
using Statistics

# Below are 100 simulations of y where X is 5k by 100k matrix

In [2]:
#import simulation results
normal_5k_by_100k_100   = readdlm("normal_5k_by_100k_100")
logistic_5k_by_100k_100 = readdlm("logistic_5k_by_100k_100")
poisson_5k_by_100k_100  = readdlm("poisson_5k_by_100k_100")

#import true model
true_b = readdlm("true_b")

10×1 Array{Float64,2}:
 -0.15383809548156066 
 -0.8994459470272312  
 -0.007629197779590085
 -0.3803702214456629  
 -1.0528703282949283  
 -0.164312670254221   
  0.9595798578217997  
 -0.22792996142696456 
  1.173293523705761   
  0.4534550514505588  

### First compute the probability that reach predictor is found

In [35]:
k = size(true_b, 1)
normal_found   = zeros(k)
logistic_found = zeros(k)
poisson_found  = zeros(k)

for i in 1:k
    normal_found[i]   = sum(normal_5k_by_100k_100[i, :] .!= 0)
    logistic_found[i] = sum(logistic_5k_by_100k_100[i, :] .!= 0)
    poisson_found[i]  = sum(poisson_5k_by_100k_100[i, :] .!= 0)
end

In [36]:
find_probability = DataFrame(
    true_b = true_b[:],
    normal_prob_find = normal_found, 
    logistic_prob_find = logistic_found,
    poisson_prob_find  = poisson_found)

Unnamed: 0_level_0,true_b,normal_prob_find,logistic_prob_find,poisson_prob_find
Unnamed: 0_level_1,Float64,Float64,Float64,Float64
1,-0.153838,100.0,49.0,100.0
2,-0.899446,100.0,100.0,100.0
3,-0.0076292,85.0,0.0,0.0
4,-0.38037,100.0,100.0,100.0
5,-1.05287,100.0,100.0,100.0
6,-0.164313,100.0,57.0,100.0
7,0.95958,100.0,100.0,100.0
8,-0.22793,100.0,98.0,100.0
9,1.17329,100.0,100.0,100.0
10,0.453455,100.0,100.0,100.0


### When found, what's the average and standard deviation 

In [37]:
k = size(true_b, 1)
normal_mean   = zeros(k)
normal_std    = zeros(k)
logistic_mean = zeros(k)
logistic_std  = zeros(k)
poisson_mean  = zeros(k)
poisson_std   = zeros(k)

for i in 1:k
    #compute mean and std if at least 1 found
    if normal_found[i] != 0
        normal_cur_row = normal_5k_by_100k_100[i, :] .!= 0
        normal_mean[i] = mean(normal_5k_by_100k_100[i, :][normal_cur_row])
        normal_std[i]  = std(normal_5k_by_100k_100[i, :][normal_cur_row])
    end
    
    if logistic_found[i] != 0
        logistic_cur_row = logistic_5k_by_100k_100[i, :] .!= 0
        logistic_mean[i] = mean(logistic_5k_by_100k_100[i, :][logistic_cur_row])
        logistic_std[i]  = std(logistic_5k_by_100k_100[i, :][logistic_cur_row])
    end
    
    if poisson_found[i] != 0
        poisson_cur_row = poisson_5k_by_100k_100[i, :] .!= 0
        poisson_mean[i] = mean(poisson_5k_by_100k_100[i, :][poisson_cur_row])
        poisson_std[i]  = std(poisson_5k_by_100k_100[i, :][poisson_cur_row])
    end
end

In [38]:
found_mean_and_std = DataFrame(
    true_b = true_b[:],
    normal_mean = normal_mean, 
    normal_std = normal_std,
    logistic_mean = logistic_mean,
    logistic_std = logistic_std,
    poisson_mean = poisson_mean,
    poisson_std = poisson_std)

Unnamed: 0_level_0,true_b,normal_mean,normal_std,logistic_mean,logistic_std,poisson_mean,poisson_std
Unnamed: 0_level_1,Float64,Float64,Float64,Float64,Float64,Float64,Float64
1,-0.153838,-0.153996,0.0013654,-0.189892,0.0212166,-0.153484,0.00513342
2,-0.899446,-0.899363,0.00132262,-0.908014,0.0363525,-0.899544,0.00657099
3,-0.0076292,-0.0082179,0.00107313,0.0,0.0,0.0,0.0
4,-0.38037,-0.380463,0.00161458,-0.380738,0.0401432,-0.379907,0.00445302
5,-1.05287,-1.0531,0.00152849,-1.06732,0.0440068,-1.05061,0.00761162
6,-0.164313,-0.164382,0.00150407,-0.189739,0.0189839,-0.163746,0.00457592
7,0.95958,0.95947,0.00135204,0.975699,0.0425978,0.959261,0.00436532
8,-0.22793,-0.227895,0.00146507,-0.233117,0.0381534,-0.227001,0.00427831
9,1.17329,1.17337,0.00152257,1.18229,0.0488598,1.17338,0.00316699
10,0.453455,0.453442,0.00128862,0.453688,0.043737,0.45331,0.00238008


# Sort the 2 DataFrames and round results

In [42]:
sort!(found_mean_and_std, rev=true, by = abs)
for i in 1:size(found_mean_and_std, 2)
    found_mean_and_std[:, i] = round.(found_mean_and_std[:, i], digits=4)
end

found_mean_and_std

Unnamed: 0_level_0,true_b,normal_mean,normal_std,logistic_mean,logistic_std,poisson_mean,poisson_std
Unnamed: 0_level_1,Float64,Float64,Float64,Float64,Float64,Float64,Float64
1,1.1733,1.1734,0.0015,1.1823,0.0489,1.1734,0.0032
2,-1.0529,-1.0531,0.0015,-1.0673,0.044,-1.0506,0.0076
3,0.9596,0.9595,0.0014,0.9757,0.0426,0.9593,0.0044
4,-0.8994,-0.8994,0.0013,-0.908,0.0364,-0.8995,0.0066
5,0.4535,0.4534,0.0013,0.4537,0.0437,0.4533,0.0024
6,-0.3804,-0.3805,0.0016,-0.3807,0.0401,-0.3799,0.0045
7,-0.2279,-0.2279,0.0015,-0.2331,0.0382,-0.227,0.0043
8,-0.1643,-0.1644,0.0015,-0.1897,0.019,-0.1637,0.0046
9,-0.1538,-0.154,0.0014,-0.1899,0.0212,-0.1535,0.0051
10,-0.0076,-0.0082,0.0011,0.0,0.0,0.0,0.0


In [8]:
sort!(find_probability, rev=true, by = abs)

Unnamed: 0_level_0,true_b,normal_prob_find,logistic_prob_find,poisson_prob_find
Unnamed: 0_level_1,Float64,Float64,Float64,Float64
1,1.17329,100.0,100.0,100.0
2,-1.05287,100.0,100.0,100.0
3,0.95958,100.0,100.0,100.0
4,-0.899446,100.0,100.0,100.0
5,0.453455,100.0,100.0,100.0
6,-0.38037,100.0,100.0,100.0
7,-0.22793,100.0,98.0,100.0
8,-0.164313,100.0,57.0,100.0
9,-0.153838,100.0,49.0,100.0
10,-0.0076292,85.0,0.0,0.0


In [40]:
round(1.0011234, digits=4)

1.0011

In [31]:
round.(found_mean_and_std[:, 1], digits=3)
three_digits = [1 2 4 6]
three_signif = [3 5 7]
for i in three_digits
    @show round.(found_mean_and_std[:, i], digits=3)
end

round.(found_mean_and_std[:, i], digits=3) = [1.173, -1.053, 0.96, -0.899, 0.453, -0.38, -0.228, -0.164, -0.154, -0.008]
round.(found_mean_and_std[:, i], digits=3) = [1.173, -1.053, 0.959, -0.899, 0.453, -0.38, -0.228, -0.164, -0.154, -0.008]
round.(found_mean_and_std[:, i], digits=3) = [1.182, -1.067, 0.976, -0.908, 0.454, -0.381, -0.233, -0.19, -0.19, 0.0]
round.(found_mean_and_std[:, i], digits=3) = [1.173, -1.051, 0.959, -0.9, 0.453, -0.38, -0.227, -0.164, -0.153, 0.0]
