# Negative Binomial: Compute false positive/negatives

Currently `glmnet` does not support negative binomila regression. Therefore, we simply compute the number of false positive/negatives of negative binomial in the same cross validation setting as Normal/Bernoulli/Poisson.

In [1]:
using Distributed
addprocs(4)
nprocs()

5

In [2]:
using MendelIHT
using SnpArrays
using DataFrames
using Distributions
using Random
using LinearAlgebra
using DelimitedFiles
using GLM

In [3]:
function iht_negativebinomial(n::Int64, p::Int64, k::Int64, d::UnionAll, l::Link)
    #construct snpmatrix, covariate files, and true model b
    x, = simulate_random_snparray(n, p, undef)
    xbm = SnpBitMatrix{Float64}(x, model=ADDITIVE_MODEL, center=true, scale=true); 
    z = ones(n, 1) # the intercept
    x_float = [convert(Matrix{Float64}, x, center=true, scale=true) z] #Float64 version of x

    # simulate response, true model b, and the correct non-0 positions of b
    y, true_b, correct_position = simulate_random_response(x, xbm, k, d, l)
 
    #specify path and folds
    num_folds = 3
    folds = rand(1:num_folds, size(x, 1));

    #find non-zero entries returned by best lasso model as largest k estimate
    path = collect(1:50);
    
    #run IHT's cross validation routine 
    mses = cv_iht_distributed(d(), l, x, z, y, 1, path, folds, num_folds, use_maf=false, debias=false, showinfo=false, parallel=true);
    iht_k_est = argmin(mses)
    iht_result = L0_reg(x, xbm, z, y, 1, iht_k_est, d(), l, debias=false, init=false, use_maf=false)
    iht_beta = iht_result.beta
        
    #show lasso and IHT's reconstruction result
    compare_model = DataFrame(
        true_β  = true_b[correct_position], 
        IHT_β   = iht_beta[correct_position])
    @show compare_model
    
    #compute true/false positives/negatives for IHT and lasso
    iht_tp = count(!iszero, iht_beta[correct_position])
    iht_fp = iht_k_est - iht_tp
    iht_fn = k - iht_tp
    
    println("IHT false positives = $iht_fp")
    println("IHT false negatives = $iht_fn" * "\n")
    
    return iht_fp, iht_fn
end

iht_negativebinomial (generic function with 1 method)

In [4]:
#simulat data with k true predictors, from distribution d and with link l.
n = 1000
p = 10000
k = 10
d = NegativeBinomial
l = LogLink()

#set random seed
Random.seed!(2019)

#run function above, saving results in 4 vectors
total_runs = 50
iht_false_positives = zeros(total_runs)
iht_false_negatives = zeros(total_runs)
for i in 1:total_runs
    println("current run = $i")
    iht_fp, iht_fn = iht_negativebinomial(n, p, k, d, l)
    iht_false_positives[i] = iht_fp
    iht_false_negatives[i] = iht_fn
end

current run = 1
compare_model = 10×2 DataFrame
│ Row │ true_β     │ IHT_β     │
│     │ Float64    │ Float64   │
├─────┼────────────┼───────────┤
│ 1   │ -0.389892  │ -0.386837 │
│ 2   │ -0.0653099 │ 0.0       │
│ 3   │ 0.235865   │ 0.204037  │
│ 4   │ 0.17977    │ 0.162094  │
│ 5   │ 0.0851134  │ 0.117708  │
│ 6   │ -0.33761   │ -0.239656 │
│ 7   │ 0.208012   │ 0.235604  │
│ 8   │ -0.203127  │ -0.261774 │
│ 9   │ 0.0441809  │ 0.0       │
│ 10  │ 0.310431   │ 0.321899  │
IHT false positives = 0
IHT false negatives = 2

current run = 2
compare_model = 10×2 DataFrame
│ Row │ true_β    │ IHT_β    │
│     │ Float64   │ Float64  │
├─────┼───────────┼──────────┤
│ 1   │ -0.308163 │ 0.0      │
│ 2   │ 0.748746  │ 0.724344 │
│ 3   │ -0.270412 │ 0.0      │
│ 4   │ -0.261287 │ 0.0      │
│ 5   │ -0.355036 │ 0.0      │
│ 6   │ -0.284426 │ 0.0      │
│ 7   │ -0.307554 │ 0.0      │
│ 8   │ 0.754297  │ 0.724457 │
│ 9   │ 0.162804  │ 0.0      │
│ 10  │ 0.130718  │ 0.0      │
IHT false positives = 1
I

In [1]:
negativebinomial_iht_false_positives = (0+1+1+0+1+0+0+0+0+0+0+2+3+0+0+1+0+0+4+0+1+0+0+3+0+
                                       1+0+0+0+0+4+0+0+2+6+1+4+0+2+0+0+0+0+0+2+0+0+0+2+0)/ 50
bernoulli_iht_false_negatives = (2+8+3+7+3+4+5+5+4+6+3+4+2+3+4+5+4+4+6+2+6+5+9+5+6+
                                2+6+7+7+6+7+4+5+5+5+8+4+8+2+3+9+9+5+5+5+8+2+4+4+9)/50
IHT_did_not_converge = 2
result = [negativebinomial_iht_false_positives; bernoulli_iht_false_negatives]

2-element Array{Float64,1}:
 0.82
 5.08