FixedEffects · eloualiche · Jan 5, 2020 · Jan 5, 2020 · Jan 5, 2020
diff --git a/.DS_Store b/.DS_Store
diff --git a/.travis.yml b/.travis.yml
@@ -1,8 +1,7 @@
 language: julia
 julia:
 - 1.0
-- 1.1
-- 1.2
+- 1.3
 - nightly
 matrix:
   allow_failures:

diff --git a/Project.toml b/Project.toml
@@ -22,7 +22,7 @@ Combinatorics = "0, 1.0"
 DataFrames = "0.19, 0.20"
 Distributions = "0"
 FillArrays = "0"
-FixedEffects = "0.6"
+FixedEffects = "0.7"
 Reexport = "0"
 StatsBase = "0"
 StatsModels = "0.6"

diff --git a/README.md b/README.md
@@ -66,7 +66,7 @@ reg(df, @formula(Sales ~ NDI + fe(State) + fe(Year)), Vcov.cluster(:State), weig
 	```
 - The option `save` can be set to one of the following:  `:residuals` to save residuals, `:fe` to save fixed effects, `true` to save both
 
-- The option `method` can be set to one of the following: `:lsmr`, `:lsmr_gpu`, `:lsmr_threads`, `:lsmr_cores` (see Performances below).
+- The option `method` can be set to one of the following: `:cpu`, `:gpu` (see Performances below).
 
 - The option `contrasts` specifies particular contrasts for categorical variables in the formula, e.g. 
 	```julia
@@ -92,34 +92,17 @@ You may use [RegressionTables.jl](https://github.com/jmboehm/RegressionTables.jl
 #### GPU
 The package has support for GPUs (Nvidia) (thanks to Paul Schrimpf). This can make the package an order of magnitude faster for complicated problems.
 
-First make sure that `using CuArrays` works without issue. Then, estimate a model with `method = :lsmr_gpu`.
+First make sure that `using CuArrays` works without issue. Then, estimate a model with `method = :gpu`.
 
 When working on the GPU, it is encouraged to set the floating point precision to `Float32` with `double_precision = false`, since it is usually much faster.
 
 ```julia
 using FixedEffectModels
 df = dataset("plm", "Cigar")
-reg(df, @formula(Sales ~ NDI + fe(State) + fe(Year)), method = :lsmr_gpu, double_precision = false)
+reg(df, @formula(Sales ~ NDI + fe(State) + fe(Year)), method = :gpu, double_precision = false)
 ```
 
 
-#### Parallel Computing
-The package has support for [multi-threading](https://docs.julialang.org/en/v1.2/manual/parallel-computing/#man-multithreading-1) and [multi-cores](https://docs.julialang.org/en/v1.2/manual/parallel-computing/#Multi-Core-or-Distributed-Processing-1). In this case, each regressor is demeaned in a different thread. It only allows for a modest speedup (between 10% and 60%) since the demeaning operation is typically memory bound.
-
-```julia
-# Multi-threading
-Threads.nthreads()
-using DataFrames, RDatasets, FixedEffectModels
-df = dataset("plm", "Cigar")
-reg(df, @formula(Sales ~ NDI + fe(State) + fe(Year)), method = :lsmr_threads)
-
-# Multi-cores 
-using Distributed
-addprocs(4)
-@everywhere using DataFrames,  RDatasets, FixedEffectModels
-df = dataset("plm", "Cigar")
-reg(df, @formula(Sales ~ NDI + fe(State) + fe(Year)), method = :lsmr_cores)
-```
 
 
 ## Solution Method

diff --git a/src/fit.jl b/src/fit.jl
@@ -6,7 +6,7 @@ Estimate a linear model with high dimensional categorical variables / instrument
 * `FormulaTerm`: A formula created using [`@formula`](@ref)
 * `CovarianceEstimator`: A method to compute the variance-covariance matrix
 * `save::Union{Bool, Symbol} = false`: Should residuals and eventual estimated fixed effects saved in a dataframe? Use `save = :residuals` to only save residuals. Use `save = :fe` to only save fixed effects.
-* `method::Symbol = :lsmr`: Method to deman regressors. `:lsmr` is akin to conjugate gradient descent.  To use LSMR on multiple cores, use `:lsmr_parallel`. To use LSMR with multiple threads,  use `lsmr_threads`. To use LSMR on GPU, use `lsmr_gpu`(requires `CuArrays`. Use the option `double_precision = false` to use `Float32` on the GPU.).
+* `method::Symbol`: A symbol for the method. Default is :cpu. Alternatively,  :gpu requires `CuArrays`. In this case, use the option `double_precision = false` to use `Float32`.
 * `contrasts::Dict = Dict()` An optional Dict of contrast codings for each categorical variable in the `formula`.  Any unspecified variables will have `DummyCoding`.
 * `maxiter::Integer = 10000`: Maximum number of iterations
 * `double_precision::Bool`: Should the demeaning operation use Float64 rather than Float32? Default to true.
@@ -40,7 +40,7 @@ function reg(@nospecialize(df),
     contrasts::Dict = Dict{Symbol, Any}(),
     dof_add::Integer = 0,
     @nospecialize(save::Union{Bool, Symbol} = false),  
-    method::Symbol = :lsmr, 
+    method::Symbol = :cpu, 
     drop_singletons = true, 
     double_precision::Bool = true, 
     tol::Real = double_precision ? 1e-8 : 1e-6,

diff --git a/src/partial_out.jl b/src/partial_out.jl
@@ -5,7 +5,7 @@ Partial out variables in a Dataframe
 * `df`: A table
 * `formula::FormulaTerm`: A formula created using `@formula`
 * `add_mean::Bool`: Should the initial mean added to the returned variable?
-* `method::Symbol`: A symbol for the method. Default is :lsmr (akin to conjugate gradient descent). Other choices are :lsmr_parallel, :lsmr_threads, :lsmr_gpu (requires `CuArrays`. Use the option `double_precision = false` to use `Float32` on the GPU).
+* `method::Symbol`: A symbol for the method. Default is :cpu. Alternatively,  :gpu requires `CuArrays`. In this case, use the option `double_precision = false` to use `Float32`.
 * `maxiter::Integer`: Maximum number of iterations
 * `double_precision::Bool`: Should the demeaning operation use Float64 rather than Float32? Default to true.
 * `tol::Real`: Tolerance
@@ -32,7 +32,7 @@ function partial_out(df::AbstractDataFrame, f::FormulaTerm;
     weights::Union{Symbol, Expr, Nothing} = nothing,
     add_mean = false,
     maxiter::Integer = 10000, contrasts::Dict = Dict{Symbol, Any}(),
-    method::Symbol = :lsmr,
+    method::Symbol = :cpu,
     double_precision::Bool = true,
     tol::Real = double_precision ? 1e-8 : 1e-6)
 

diff --git a/test/fit.jl b/test/fit.jl
@@ -216,12 +216,6 @@ x = reg(df, m)
 using StatsModels
 reg(df, Term(:Sales) ~ Term(:NDI) + fe(Term(:State)) + fe(Term(:Year)), Vcov.cluster(:State))
 
-# old syntax
-m = @model y ~ x1 fe = pid1
-x = reg(df, m)
-
-m = @model y ~ x1 vcov = cluster(State + Year)
-x = reg(df, m)
 
 ##############################################################################
 ##
@@ -251,21 +245,21 @@ m = @formula y ~ log(x1_zero)
 ##
 ##############################################################################
 # ols
-df.x12 = df.x1
-m = @formula y ~ x1 + x12
-x = reg(df, m)
-@test coef(x) ≈ [139.7344639806166,-0.22974688593485126,0.0] atol = 1e-4
-
-# iv
-df.x22 = df.x2
-m = @formula y ~ x22 + x2 + (x1 ~ z1)
-x = reg(df, m)
-@test coef(x)[2] == 0 || coef(x)[3] == 0
-
-df.zz1 = df.z1
-m = @formula y ~ zz1 + (x1 ~ x2 + z1)
-x = reg(df, m)
-@test coef(x)[2] != 0.0
+#df.x12 = df.x1
+#m = @formula y ~ x1 + x12
+#x = reg(df, m)
+#@test coef(x) ≈ [139.7344639806166,-0.22974688593485126,0.0] atol = 1e-4
+#
+## iv
+#df.x22 = df.x2
+#m = @formula y ~ x22 + x2 + (x1 ~ z1)
+#x = reg(df, m)
+#@test coef(x)[2] == 0 || coef(x)[3] == 0
+#
+#df.zz1 = df.z1
+#m = @formula y ~ zz1 + (x1 ~ x2 + z1)
+#x = reg(df, m)
+#@test coef(x)[2] != 0.0
 
 # catch when IV underidentified 
 @test_throws "Model not identified. There must be at least as many ivs as endogeneneous variables" reg(df, @formula(y ~ x1 + (x2 + w ~ x2)))
@@ -557,48 +551,47 @@ df.x1 = df.Emp
 df.w = df.Output
 
 
-methods_vec = [:lsmr, :lsmr_threads, :lsmr_cores]
 
 
-for method in methods_vec
-	@show method
-	m = @formula y ~ x1 + fe(id1)
-	x = reg(df, m, method = method)
-	@test coef(x) ≈ [- 0.11981270017206136] atol = 1e-4
-	m = @formula y ~ x1 + fe(id1)&id2
-	x = reg(df, m, method = method)
-	@test coef(x)  ≈ [-315.0000747500431,- 0.07633636891202833] atol = 1e-4
-	m = @formula y ~ x1 + id2&fe(id1)
-	x = reg(df, m, method = method)
-	@test coef(x) ≈  [-315.0000747500431,- 0.07633636891202833] atol = 1e-4
-	m = @formula y ~ 1 + id2&fe(id1)
-	x = reg(df, m, method = method)
-	@test coef(x) ≈  [- 356.40430526316396] atol = 1e-4
-	m = @formula y ~ x1 + fe(id1)
-	x = reg(df, m, weights = :w,  method = method)
-	@test coef(x) ≈ [- 0.11514363590574725] atol = 1e-4
 
-	# absorb + weights
-	m = @formula y ~ x1 + fe(id1) + fe(id2)
-	x = reg(df, m, method = method)
-	@test coef(x)  ≈  [- 0.04683333721137311] atol = 1e-4
-	m = @formula y ~ x1 + fe(id1) + fe(id2) 
-	x = reg(df, m, weights = :w, method = method)
-	@test coef(x) ≈  [- 0.043475472188120416] atol = 1e-3
+m = @formula y ~ x1 + fe(id1)
+x = reg(df, m)
+@test coef(x) ≈ [- 0.11981270017206136] atol = 1e-4
+m = @formula y ~ x1 + fe(id1)&id2
+x = reg(df, m)
+@test coef(x)  ≈ [-315.0000747500431,- 0.07633636891202833] atol = 1e-4
+m = @formula y ~ x1 + id2&fe(id1)
+x = reg(df, m)
+@test coef(x) ≈  [-315.0000747500431,- 0.07633636891202833] atol = 1e-4
+m = @formula y ~ 1 + id2&fe(id1)
+x = reg(df, m)
+@test coef(x) ≈  [- 356.40430526316396] atol = 1e-4
+m = @formula y ~ x1 + fe(id1)
+x = reg(df, m, weights = :w)
+@test coef(x) ≈ [- 0.11514363590574725] atol = 1e-4
+
+# absorb + weights
+m = @formula y ~ x1 + fe(id1) + fe(id2)
+x = reg(df, m)
+@test coef(x)  ≈  [- 0.04683333721137311] atol = 1e-4
+m = @formula y ~ x1 + fe(id1) + fe(id2) 
+x = reg(df, m, weights = :w)
+@test coef(x) ≈  [- 0.043475472188120416] atol = 1e-3
+
+## the last two ones test an ill conditioned model matrix
+m = @formula y ~ x1 + fe(id1) + fe(id1)&id2
+x = reg(df, m)
+@test coef(x)  ≈   [- 0.122354] atol = 1e-4
+@test x.iterations <= 30
 
-	## the last two ones test an ill conditioned model matrix
-	m = @formula y ~ x1 + fe(id1) + fe(id1)&id2
-	x = reg(df, m, method = method)
-	@test coef(x)  ≈   [- 0.122354] atol = 1e-4
-	@test x.iterations <= 30
+m = @formula y ~ x1 + fe(id1) + fe(id1)&id2
+x = reg(df, m, weights = :w)
+@test coef(x) ≈ [- 0.11752306001586807] atol = 1e-4
+@test x.iterations <= 50
 
-	m = @formula y ~ x1 + fe(id1) + fe(id1)&id2
-	x = reg(df, m, weights = :w, method = method)
-	@test coef(x) ≈ [- 0.11752306001586807] atol = 1e-4
-	@test x.iterations <= 50
-end
-if isdefined(FixedEffectModels.FixedEffects, :FixedEffectSolverLSMRGPU)
-	push!(methods_vec, :lsmr_gpu)
+methods_vec = [:cpu]
+if FixedEffectModels.FixedEffects.has_cuarrays()
+	push!(methods_vec, :gpu)
 end
 for method in methods_vec
 	# same thing with float32 precision

diff --git a/test/predict.jl b/test/predict.jl
@@ -3,8 +3,6 @@ using FixedEffectModels, DataFrames, CSV, Test
 df = CSV.read(joinpath(dirname(pathof(FixedEffectModels)), "../dataset/Cigar.csv"))
 
 
-
-
 ##############################################################################
 ##
 ## Printing Results
@@ -35,8 +33,6 @@ result = reg(df, model)
 show(result)
 
 
-
-
 ##############################################################################
 ##
 ## Saved Residuals
@@ -127,81 +123,79 @@ result2 = reg(df, model2, weights = :Pop)
 methods_vec = [:lsmr, :lsmr_threads, :lsmr_cores]
 
 
-for method in methods_vec
-	model = @formula Sales ~ Price + fe(Year)
-	result = reg(df, model, save = true, method = method)
-	@test fe(result)[1, :fe_Year] ≈ 164.77833189721005
-
-	model = @formula Sales ~ Price + fe(Year) + fe(State)
-	result = reg(df, model, save = true, method = method)
-	@test fe(result)[1, :fe_Year] + fe(result)[1, :fe_State] ≈ 140.6852 atol = 1e-3
+model = @formula Sales ~ Price + fe(Year)
+result = reg(df, model, save = true)
+@test fe(result)[1, :fe_Year] ≈ 164.77833189721005
 
-	model = @formula Sales ~ Price + Year&fe(State)
-	result = reg(df, model, save = true, method = method)
-	@test fe(result)[1, Symbol("fe_State&Year")] ≈ 1.742779  atol = 1e-3
+model = @formula Sales ~ Price + fe(Year) + fe(State)
+result = reg(df, model, save = true)
+@test fe(result)[1, :fe_Year] + fe(result)[1, :fe_State] ≈ 140.6852 atol = 1e-3
 
-	model = @formula Sales ~ Price + fe(State) + Year&fe(State)
-	result = reg(df, model, save = true, method = method)
-	@test fe(result)[1, :fe_State] ≈ -91.690635 atol = 1e-1
+model = @formula Sales ~ Price + Year&fe(State)
+result = reg(df, model, save = true)
+@test fe(result)[1, Symbol("fe_State&Year")] ≈ 1.742779  atol = 1e-3
 
-	model = @formula Sales ~ Price + fe(State)
-	result = reg(df, model, subset = df.State .<= 30, save = true, method = method)
-	@test fe(result)[1, :fe_State] ≈  124.913976 atol = 1e-1
-	@test ismissing(fe(result)[1380 , :fe_State])
+model = @formula Sales ~ Price + fe(State) + Year&fe(State)
+result = reg(df, model, save = true)
+@test fe(result)[1, :fe_State] ≈ -91.690635 atol = 1e-1
 
-	model = @formula Sales ~ Price + fe(Year)
-	result = reg(df, model, weights = :Pop, save = true, method = method)
-	@test fe(result)[2, :fe_Year] -  fe(result)[1, :fe_Year] ≈ -3.0347149502496222
+model = @formula Sales ~ Price + fe(State)
+result = reg(df, model, subset = df.State .<= 30, save = true)
+@test fe(result)[1, :fe_State] ≈  124.913976 atol = 1e-1
+@test ismissing(fe(result)[1380 , :fe_State])
 
-	# fixed effects
-	df.Price2 = df.Price
-	model = @formula Sales ~ Price + Price2 + fe(Year)
-	result = reg(df, model, save = true, method = method)
-	@test fe(result)[1, :fe_Year] ≈ 164.77833189721005
+model = @formula Sales ~ Price + fe(Year)
+result = reg(df, model, weights = :Pop, save = true)
+@test fe(result)[2, :fe_Year] -  fe(result)[1, :fe_Year] ≈ -3.0347149502496222
 
-	# iv
-	model = @formula Sales ~ (State ~ Price) + fe(Year)
-	result = reg(df, model, save = true, method = method)
-	@test fe(result)[1, :fe_Year] ≈ -167.48093490413623
+# fixed effects
+df.Price2 = df.Price
+model = @formula Sales ~ Price + Price2 + fe(Year)
+result = reg(df, model, save = true)
+@test fe(result)[1, :fe_Year] ≈ 164.77833189721005
 
-	# weights
-	model = @formula Sales ~ Price + fe(Year)
-	result = reg(df, model, weights = :Pop, save = true, method = method)
-	@test fe(result)[2, :fe_Year] -  fe(result)[1, :fe_Year] ≈ -3.0347149502496222
+# iv
+model = @formula Sales ~ (State ~ Price) + fe(Year)
+result = reg(df, model, save = true)
+@test fe(result)[1, :fe_Year] ≈ -167.48093490413623
 
-	# IV and weights
-	model = @formula Sales ~ (Price ~ Pimin) + fe(Year)
-	result = reg(df, model, weights = :Pop, save = true, method = method)
-	@test fe(result)[1, :fe_Year] ≈ 168.24688 atol = 1e-4
+# weights
+model = @formula Sales ~ Price + fe(Year)
+result = reg(df, model, weights = :Pop, save = true)
+@test fe(result)[2, :fe_Year] -  fe(result)[1, :fe_Year] ≈ -3.0347149502496222
 
+# IV and weights
+model = @formula Sales ~ (Price ~ Pimin) + fe(Year)
+result = reg(df, model, weights = :Pop, save = true)
+@test fe(result)[1, :fe_Year] ≈ 168.24688 atol = 1e-4
 
-	# IV, weights and both year and state fixed effects
-	model = @formula Sales ~ (Price ~ Pimin) + fe(State) + fe(Year)
-	result = reg(df, model, weights = :Pop, save = true, method = method)
-	@test fe(result)[1, :fe_Year] + fe(result)[1, :fe_State]≈ 147.84145 atol = 1e-4
 
+# IV, weights and both year and state fixed effects
+model = @formula Sales ~ (Price ~ Pimin) + fe(State) + fe(Year)
+result = reg(df, model, weights = :Pop, save = true)
+@test fe(result)[1, :fe_Year] + fe(result)[1, :fe_State]≈ 147.84145 atol = 1e-4
 
-	# subset with IV
-	model = @formula Sales ~ (Price ~ Pimin) + fe(Year)
-	result = reg(df, model, subset = df.State .<= 30, save = true, method = method)
-	@test fe(result)[1, :fe_Year] ≈ 164.05245824240276 atol = 1e-4
-	@test ismissing(fe(result)[811, :fe_Year])
 
+# subset with IV
+model = @formula Sales ~ (Price ~ Pimin) + fe(Year)
+result = reg(df, model, subset = df.State .<= 30, save = true)
+@test fe(result)[1, :fe_Year] ≈ 164.05245824240276 atol = 1e-4
+@test ismissing(fe(result)[811, :fe_Year])
 
-	# subset with IV, weights and year fixed effects
-	model = @formula Sales ~ (Price ~ Pimin) + fe(Year)
-	result = reg(df, model, subset = df.State .<= 30, weights = :Pop, save = true, method = method)
-	@test fe(result)[1, :fe_Year] ≈ 182.71915 atol = 1e-4
 
-	# subset with IV, weights and year fixed effects
-	model = @formula Sales ~ (Price ~ Pimin) + fe(State) + fe(Year)
-	result = reg(df, model, subset = df.State .<= 30, weights = :Pop, save = true, method = method)
-	@test fe(result)[1, :fe_Year] + fe(result)[1, :fe_State] ≈ 158.91798 atol = 1e-4
+# subset with IV, weights and year fixed effects
+model = @formula Sales ~ (Price ~ Pimin) + fe(Year)
+result = reg(df, model, subset = df.State .<= 30, weights = :Pop, save = true)
+@test fe(result)[1, :fe_Year] ≈ 182.71915 atol = 1e-4
 
-end
+# subset with IV, weights and year fixed effects
+model = @formula Sales ~ (Price ~ Pimin) + fe(State) + fe(Year)
+result = reg(df, model, subset = df.State .<= 30, weights = :Pop, save = true)
+@test fe(result)[1, :fe_Year] + fe(result)[1, :fe_State] ≈ 158.91798 atol = 1e-4
 
-if isdefined(FixedEffectModels.FixedEffects, :FixedEffectSolverLSMRGPU)
-	push!(methods_vec, :lsmr_gpu)
+methods_vec = [:cpu]
+if FixedEffectModels.FixedEffects.has_cuarrays()
+	push!(methods_vec, :gpu)
 end
 for method in methods_vec
 	model = @formula Sales ~ Price + fe(Year)