In [1]:
addprocs(8)

8-element Array{Int64,1}:
 2
 3
 4
 5
 6
 7
 8
 9

In [2]:
using ComputeFramework
using Distributions

# helper function to perform gather only on CF objects
import ComputeFramework.Computation

global ctx=Context()

_gather{C<:Computation, D<:Computation}(x::Tuple{C,D}) = gather(ctx, x)
_gather(x::Tuple) = x

_gather (generic function with 2 methods)

In [3]:
@everywhere logistic(x) = 1 / (1 + exp(-x))

function initialize(X,y,λ)
    (XtX, Xty) = _gather((X'X, X'y))
    cholfact!(XtX + λ*I)\(Xty)
end

@everywhere function regression(X, y; tol = 1e-12, maxIter = 30, λ = 0.0, init = initialize(X,y,λ))
  β = init
  μ = X*vec(β)
  k = 0
  for k = 1:maxIter
      η = map(logistic, μ)
      w = η.*(1-η)
      r = y - η

      Xw = scale(w, X)

      XtX, Xtr = _gather((Xw'X, X'r))
      Δβ = cholfact!(XtX + λ*I) \ (Xtr .- λ*β)

      β += Δβ

      if (@show norm(Δβ)) < tol
          break
      endr
      μ = X*β
  end
  if k == maxIter
      error("no convergence")
  end
  return β, k
end

In [4]:
# Number of rows
N = 10^7
N_part = 1666667

1666667

In [5]:

x = rand(BlockPartition(N_part,10), N, 10)
@time X = compute(x)

y = (X * [9:-1:0.;]) .> Distribute(BlockPartition(N_part), rand(Logistic(), N))
@time Y = compute(y)

  3.513864 seconds (2.86 M allocations: 129.427 MB, 0.90% gc time)
  2.759677 seconds (2.09 M allocations: 172.691 MB, 2.65% gc time)


ComputeFramework.Computed(10000000 BitArray{1} in 6 parts)

In [28]:
N_small = 2*10^3

X1 = gather(X[1:N_small, :])
Y1 = gather(Y[1:N_small])

# run the sequential version on a subset to find an iniital guess
init, k = @time regression(X1,Y1,tol=1e-12, λ=1/N_small)

norm(Δβ) = 0.8634392139496269
norm(Δβ) = 0.7304707586177425
norm(Δβ) = 0.7189065313479128
norm(Δβ) = 0.7336529085838923
norm(Δβ) = 0.7589039220139997
norm(Δβ) = 0.7898295368295862
norm(Δβ) = 0.8241002660802208
norm(Δβ) = 0.8589573676539586
norm(Δβ) = 0.8879357061572806
norm(Δβ) = 0.8947902021568425
norm(Δβ) = 0.849715147713261
norm(Δβ) = 0.7182723270852963
norm(Δβ) = 0.4372006082705449
norm(Δβ) = 0.11264266790268349
norm(Δβ) = 0.005567291277316119
norm(Δβ) = 1.2684039469830244e-5
norm(Δβ) = 6.848122067357623e-11
norm(Δβ) = 2.464326323531238e-12
norm(Δβ) = 9.16125330601169e-13
  0.010783 seconds (79.44 k allocations: 5.733 MB, 40.43% gc time)


([3.233866893389513,3.310758396834973,3.5603050548681554,3.4228170715408903,3.3475120256747575,3.433055426019274,3.492285096042701,3.618546208372272,3.348732338015894,2.944417951482114],19)

In [31]:
@time regression(X,Y,tol=1e-12, λ=1/N)

norm(Δβ) = 0.8555009854429065
norm(Δβ) = 0.7234525658620435
norm(Δβ) = 0.7109977977108463
norm(Δβ) = 0.7244025208076879
norm(Δβ) = 0.7482238363049689
norm(Δβ) = 0.7780454572866816
norm(Δβ) = 0.8123921414785201
norm(Δβ) = 0.8506244591274544
norm(Δβ) = 0.8922157016532679
norm(Δβ) = 0.9381939785896993
norm(Δβ) = 1.0070055416983437
norm(Δβ) = 1.2150317609659707
norm(Δβ) = 1.8509823663255252
norm(Δβ) = 2.763348991500668
norm(Δβ) = 2.9163254913186822
norm(Δβ) = 2.088794184329865
norm(Δβ) = 0.794583838606624
norm(Δβ) = 0.08829764010809388
norm(Δβ) = 0.0010138276589816589
norm(Δβ) = 1.3929833795526373e-7
norm(Δβ) = 5.228848055849768e-13
 29.467062 seconds (2.10 M allocations: 133.861 MB, 0.35% gc time)


([9.10830145833094,10.236706198033996,8.815618895321798,4.093497607192385,3.839092840603249,4.641276635042943,3.501394497014302,0.8002262859609115,0.9588986871748054,1.4295594225438053],21)

In [32]:
a,b= gather((X,Y))

@time regression(a,b,tol=1e-12, λ=1/N)

norm(Δβ) = 0.8555009854429105
norm(Δβ) = 0.7234525658620439
norm(Δβ) = 0.7109977977108463
norm(Δβ) = 0.7244025208076847
norm(Δβ) = 0.7482238363049737
norm(Δβ) = 0.7780454572866833
norm(Δβ) = 0.8123921414785209
norm(Δβ) = 0.8506244591274488
norm(Δβ) = 0.8922157016532651
norm(Δβ) = 0.9381939785896897
norm(Δβ) = 1.007005541698348
norm(Δβ) = 1.2150317609660173
norm(Δβ) = 1.8509823663254972
norm(Δβ) = 2.763348991500565
norm(Δβ) = 2.916325491318795
norm(Δβ) = 2.0887941843297857
norm(Δβ) = 0.794583838606666
norm(Δβ) = 0.08829764010806286
norm(Δβ) = 0.001013827658798716
norm(Δβ) = 1.3929856521853447e-7
norm(Δβ) = 6.273004866991971e-13
 37.897343 seconds (420.01 M allocations: 29.803 GB, 9.39% gc time)


([9.10830145833111,10.236706198033634,8.815618895321794,4.09349760719224,3.8390928406031364,4.64127663504292,3.501394497014381,0.8002262859610104,0.9588986871747942,1.4295594225437944],21)