# Let's see if logistic IHT is working...

## First simulate some response

In [1]:
#load packages
using IHT
using SnpArrays
using DataFrames
using Distributions

#set random seed
srand(1111) 

#specify dimension and noise of data
n = 5000                        # number of cases
p = 30000                       # number of predictors
k = 10                          # number of true predictors per group
s = 0.1                         # noise vector, from very little noise to a lot of noise

#construct snpmatrix, covariate files, and true model b
x           = SnpArray(rand(0:2, n, p))    # a random snpmatrix
z           = ones(n, 1)                   # non-genetic covariates, just the intercept
true_b      = zeros(p)                     # model vector
true_b[1:k] = randn(k)                     # Initialize k non-zero entries in the true model
shuffle!(true_b)                           # Shuffle the entries
correct_position = find(true_b)            # keep track of what the true entries are
noise = rand(Normal(0, s), n)              # noise vectors from N(0, s) where s ∈ S = {0.01, 0.1, 1, 10}s

#compute mean and std used to standardize data to mean 0 variance 1
mean_vec, minor_allele, = summarize(x)
for i in 1:p
    minor_allele[i] ? mean_vec[i] = 2.0 - 2.0mean_vec[i] : mean_vec[i] = 2.0mean_vec[i]
end
std_vec = std_reciprocal(x, mean_vec)

#simulate phenotypes under different noises by: y = Xb + noise
y_temp = zeros(n)
SnpArrays.A_mul_B!(y_temp, x, true_b, mean_vec, std_vec)
y_temp .+= noise #add some noise

# Apply inverse logit link to map y to {0, 1} 
y = 1 ./ (1 .+ exp.(-y_temp)) #inverse logit link
y .= round.(y)                     #map y to 0, 1

5000-element Array{Float64,1}:
 0.0
 1.0
 1.0
 0.0
 0.0
 0.0
 0.0
 1.0
 1.0
 0.0
 0.0
 1.0
 0.0
 ⋮  
 0.0
 1.0
 1.0
 1.0
 0.0
 1.0
 1.0
 0.0
 1.0
 0.0
 0.0
 0.0

## Check if transformed data looks logistic (it does)

In [None]:
using Plots
temp = sort(1 ./ (1 .+ exp.(-y_temp)))
myplot = plot(collect(1:5000), temp)

## Now see if we can reconstruct the signal

In [21]:
#compute logistic IHT result 
estimated_models = zeros(k)
v = IHTVariables(x, z, y, 1, k)
result = L0_reg(v, x, z, y, 1, k, glm = "logistic")
estimated_models .= result.beta[correct_position]

#compare and contrast
true_model = true_b[correct_position]
compare_model = DataFrame(
    correct_position = correct_position, 
    true_β           = true_model
    noise_level_1    = estimated_models)

LoadError: [91mAssertionError: all(y .== 0)[39m