From dfd30ac1d4ecca7fbeac498fc9ccf253bf8fd374 Mon Sep 17 00:00:00 2001
From: Yingbo Ma <mayingbo5@gmail.com>
Date: Tue, 19 Mar 2019 15:11:00 -0400
Subject: [PATCH 01/15] Optimize W matrix formation

---
 src/derivative_utils.jl | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/src/derivative_utils.jl b/src/derivative_utils.jl
index 8a5771b710..1876eb1740 100644
--- a/src/derivative_utils.jl
+++ b/src/derivative_utils.jl
@@ -261,22 +261,28 @@ end
 @noinline _throwWJerror(W, J) = throw(DimensionMismatch("W: $(axes(W)), J: $(axes(J))"))
 @noinline _throwWMerror(W, mass_matrix) = throw(DimensionMismatch("W: $(axes(W)), mass matrix: $(axes(mass_matrix))"))
 
-@inline function jacobian2W!(W, mass_matrix, dtgamma, J, W_transform)::Nothing
+@inline function jacobian2W!(W::AbstractMatrix, mass_matrix::MT, dtgamma::Number, J::AbstractMatrix, W_transform::Bool)::Nothing where MT
   # check size and dimension
   iijj = axes(W)
   @boundscheck (iijj === axes(J) && length(iijj) === 2) || _throwWJerror(W, J)
   mass_matrix isa UniformScaling || @boundscheck axes(mass_matrix) === axes(W) || _throwWMerror(W, mass_matrix)
   @inbounds if W_transform
     invdtgamma′ = inv(dtgamma)
-    for i in iijj[1]
-      @inbounds for j in iijj[2]
-        W[i, j] = muladd(mass_matrix[i, j], invdtgamma′, -J[i, j])
+    if MT <: UniformScaling
+      @simd for i in diagind(W)
+          W[i] = muladd(mass_matrix.λ, invdtgamma′, -J[i])
+      end
+    else
+      for j in iijj[2]
+        @simd for i in iijj[1]
+          W[i, j] = muladd(mass_matrix[i, j], invdtgamma′, -J[i, j])
+        end
       end
     end
   else
     dtgamma′ = -dtgamma
-    for i in iijj[1]
-      @simd for j in iijj[2]
+    for j in iijj[2]
+      @simd for i in iijj[1]
         W[i, j] = muladd(dtgamma′, J[i, j], mass_matrix[i, j])
       end
     end

From 4d06f26207b095962d8223f1705ed2fbbb551fcd Mon Sep 17 00:00:00 2001
From: Yingbo Ma <mayingbo5@gmail.com>
Date: Tue, 19 Mar 2019 16:21:37 -0400
Subject: [PATCH 02/15] `W = M - gamma*J` ---> `W = -M + gamma*J`

---
 src/derivative_utils.jl | 54 ++++++++++++++++++++---------------------
 1 file changed, 27 insertions(+), 27 deletions(-)

diff --git a/src/derivative_utils.jl b/src/derivative_utils.jl
index 1876eb1740..b56afde72a 100644
--- a/src/derivative_utils.jl
+++ b/src/derivative_utils.jl
@@ -152,50 +152,50 @@ function Base.convert(::Type{AbstractMatrix}, W::WOperator)
   if W._concrete_form === nothing || !W.inplace
     # Allocating
     if W.transform
-      W._concrete_form = W.mass_matrix / W.gamma - convert(AbstractMatrix,W.J)
+      W._concrete_form = -W.mass_matrix / W.gamma + convert(AbstractMatrix,W.J)
     else
-      W._concrete_form = W.mass_matrix - W.gamma * convert(AbstractMatrix,W.J)
+      W._concrete_form = -W.mass_matrix + W.gamma * convert(AbstractMatrix,W.J)
     end
   else
     # Non-allocating
     if W.transform
-      rmul!(copyto!(W._concrete_form, W.mass_matrix), 1/W.gamma)
-      axpy!(-1, convert(AbstractMatrix,W.J), W._concrete_form)
+      rmul!(copyto!(W._concrete_form, W.mass_matrix), -1/W.gamma)
+      axpy!(one(W.gamma), convert(AbstractMatrix,W.J), W._concrete_form)
     else
-      copyto!(W._concrete_form, W.mass_matrix)
-      axpy!(-W.gamma, convert(AbstractMatrix,W.J), W._concrete_form)
+      @. W._concrete_form = -W.mass_matrix
+      axpy!(W.gamma, convert(AbstractMatrix,W.J), W._concrete_form)
     end
   end
   W._concrete_form
 end
 function Base.convert(::Type{Number}, W::WOperator)
   if W.transform
-    W._concrete_form = W.mass_matrix / W.gamma - convert(Number,W.J)
+    W._concrete_form = -W.mass_matrix / W.gamma + convert(Number,W.J)
   else
-    W._concrete_form = W.mass_matrix - W.gamma * convert(Number,W.J)
+    W._concrete_form = -W.mass_matrix + W.gamma * convert(Number,W.J)
   end
   W._concrete_form
 end
 Base.size(W::WOperator, args...) = size(W.J, args...)
 function Base.getindex(W::WOperator, i::Int)
   if W.transform
-    W.mass_matrix[i] / W.gamma - W.J[i]
+    -W.mass_matrix[i] / W.gamma + W.J[i]
   else
-    W.mass_matrix[i] - W.gamma * W.J[i]
+    -W.mass_matrix[i] + W.gamma * W.J[i]
   end
 end
 function Base.getindex(W::WOperator, I::Vararg{Int,N}) where {N}
   if W.transform
-    W.mass_matrix[I...] / W.gamma - W.J[I...]
+    -W.mass_matrix[I...] / W.gamma + W.J[I...]
   else
-    W.mass_matrix[I...] - W.gamma * W.J[I...]
+    -W.mass_matrix[I...] + W.gamma * W.J[I...]
   end
 end
 function Base.:*(W::WOperator, x::Union{AbstractVecOrMat,Number})
   if W.transform
-    (W.mass_matrix*x) / W.gamma - W.J*x
+    (W.mass_matrix*x) / -W.gamma + W.J*x
   else
-    W.mass_matrix*x - W.gamma * (W.J*x)
+    -W.mass_matrix*x + W.gamma * (W.J*x)
   end
 end
 function Base.:\(W::WOperator, x::Union{AbstractVecOrMat,Number})
@@ -214,15 +214,15 @@ function LinearAlgebra.mul!(Y::AbstractVecOrMat, W::WOperator, B::AbstractVecOrM
   if W.transform
     # Compute mass_matrix * B
     if isa(W.mass_matrix, UniformScaling)
-      a = W.mass_matrix.λ / W.gamma
+      a = -W.mass_matrix.λ / W.gamma
       @. Y = a * B
     else
       mul!(Y, W.mass_matrix, B)
-      lmul!(1/W.gamma, Y)
+      lmul!(-1/W.gamma, Y)
     end
-    # Compute J * B and subtract
+    # Compute J * B and add
     mul!(W._func_cache, W.J, B)
-    Y .-= W._func_cache
+    Y .+= W._func_cache
   else
     # Compute mass_matrix * B
     if isa(W.mass_matrix, UniformScaling)
@@ -232,8 +232,8 @@ function LinearAlgebra.mul!(Y::AbstractVecOrMat, W::WOperator, B::AbstractVecOrM
     end
     # Compute J * B
     mul!(W._func_cache, W.J, B)
-    # Subtract result
-    axpy!(-W.gamma, W._func_cache, Y)
+    # Add result
+    axpby!(W.gamma, W._func_cache, -one(W.gamma), Y)
   end
 end
 
@@ -267,23 +267,23 @@ end
   @boundscheck (iijj === axes(J) && length(iijj) === 2) || _throwWJerror(W, J)
   mass_matrix isa UniformScaling || @boundscheck axes(mass_matrix) === axes(W) || _throwWMerror(W, mass_matrix)
   @inbounds if W_transform
-    invdtgamma′ = inv(dtgamma)
+    invdtgamma = inv(dtgamma)
     if MT <: UniformScaling
+      copyto!(W, J)
       @simd for i in diagind(W)
-          W[i] = muladd(mass_matrix.λ, invdtgamma′, -J[i])
+          W[i] = muladd(-mass_matrix.λ, invdtgamma, J[i])
       end
     else
       for j in iijj[2]
         @simd for i in iijj[1]
-          W[i, j] = muladd(mass_matrix[i, j], invdtgamma′, -J[i, j])
+          W[i, j] = muladd(-mass_matrix[i, j], invdtgamma, J[i, j])
         end
       end
     end
   else
-    dtgamma′ = -dtgamma
     for j in iijj[2]
       @simd for i in iijj[1]
-        W[i, j] = muladd(dtgamma′, J[i, j], mass_matrix[i, j])
+        W[i, j] = muladd(dtgamma, J[i, j], -mass_matrix[i, j])
       end
     end
   end
@@ -351,8 +351,8 @@ function calc_W!(integrator, cache::OrdinaryDiffEqConstantCache, dtgamma, repeat
   else
     integrator.destats.nw += 1
     J = calc_J(integrator, cache, is_compos)
-    W_full = W_transform ? mass_matrix*inv(dtgamma) - J :
-                           mass_matrix - dtgamma*J
+    W_full = W_transform ? -mass_matrix*inv(dtgamma) + J :
+                           -mass_matrix + dtgamma*J
     W = W_full isa Number ? W_full : lu(W_full)
   end
   is_compos && (integrator.eigen_est = isarray ? opnorm(J, Inf) : J)

From 8ff0dbbe16002485b6e99a883b33a1280ea08018 Mon Sep 17 00:00:00 2001
From: Yingbo Ma <mayingbo5@gmail.com>
Date: Tue, 19 Mar 2019 16:22:35 -0400
Subject: [PATCH 03/15] Update Newton solver

---
 src/nlsolve/newton.jl                 |  4 ++--
 src/perform_step/firk_perform_step.jl | 24 ++++++++++++------------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/src/nlsolve/newton.jl b/src/nlsolve/newton.jl
index 4757c24483..fbe8add743 100644
--- a/src/nlsolve/newton.jl
+++ b/src/nlsolve/newton.jl
@@ -82,7 +82,7 @@ Equations II, Springer Series in Computational Mathematics. ISBN
     end
 
     # update solution
-    z = z .+ dz
+    z = z .- dz
 
     # check stopping criterion
     iter > 1 && (η = θ / (1 - θ))
@@ -151,7 +151,7 @@ end
     end
 
     # update solution
-    z .+= dz
+    @. z = z - dz
 
     # check stopping criterion
     iter > 1 && (η = θ / (1 - θ))
diff --git a/src/perform_step/firk_perform_step.jl b/src/perform_step/firk_perform_step.jl
index c5a59627fe..bdad4d9af6 100644
--- a/src/perform_step/firk_perform_step.jl
+++ b/src/perform_step/firk_perform_step.jl
@@ -52,11 +52,11 @@ end
   γdt, αdt, βdt = γ/dt, α/dt, β/dt
   J = calc_J(integrator, cache, is_compos)
   if u isa Number
-    LU1 = γdt*mass_matrix - J
-    LU2 = (αdt + βdt*im)*mass_matrix - J
+    LU1 = -γdt*mass_matrix + J
+    LU2 = -(αdt + βdt*im)*mass_matrix + J
   else
-    LU1 = lu(γdt*mass_matrix - J)
-    LU2 = lu((αdt + βdt*im)*mass_matrix - J)
+    LU1 = lu(-γdt*mass_matrix + J)
+    LU2 = lu(-(αdt + βdt*im)*mass_matrix + J)
   end
   integrator.destats.nw += 1
 
@@ -126,9 +126,9 @@ end
       end
     end
 
-    w1 = @. w1 + dw1
-    w2 = @. w2 + dw2
-    w3 = @. w3 + dw3
+    w1 = @. w1 - dw1
+    w2 = @. w2 - dw2
+    w3 = @. w3 - dw3
 
     # transform `w` to `z`
     z1 = @. T11 * w1 + T12 * w2 + T13 * w3
@@ -228,8 +228,8 @@ end
                       (integrator.iter < 1 || new_jac ||
                        abs(dt - (t-integrator.tprev)) > 100eps(typeof(integrator.t))))
     @inbounds for II in CartesianIndices(J)
-      W1[II] = γdt * mass_matrix[Tuple(II)...] - J[II]
-      W2[II] = (αdt + βdt*im) * mass_matrix[Tuple(II)...] - J[II]
+      W1[II] = -γdt * mass_matrix[Tuple(II)...] + J[II]
+      W2[II] = -(αdt + βdt*im) * mass_matrix[Tuple(II)...] + J[II]
     end
   else
     new_W = false
@@ -320,9 +320,9 @@ end
       end
     end
 
-    @. w1 = w1 + dw1
-    @. w2 = w2 + dw2
-    @. w3 = w3 + dw3
+    @. w1 = w1 - dw1
+    @. w2 = w2 - dw2
+    @. w3 = w3 - dw3
 
     # transform `w` to `z`
     @. z1 = T11 * w1 + T12 * w2 + T13 * w3

From 92f302beec5b045657c0a6c065a2b2b6d4972604 Mon Sep 17 00:00:00 2001
From: Yingbo Ma <mayingbo5@gmail.com>
Date: Tue, 19 Mar 2019 16:23:13 -0400
Subject: [PATCH 04/15] Update Rosenbrock solvers

---
 src/perform_step/rosenbrock_perform_step.jl | 107 +++++++++++++-------
 1 file changed, 69 insertions(+), 38 deletions(-)

diff --git a/src/perform_step/rosenbrock_perform_step.jl b/src/perform_step/rosenbrock_perform_step.jl
index a9de33c861..55d623e85b 100644
--- a/src/perform_step/rosenbrock_perform_step.jl
+++ b/src/perform_step/rosenbrock_perform_step.jl
@@ -43,6 +43,7 @@ end
     mul!(vec(k₁), W, vec(linsolve_tmp))
   else
     cache.linsolve(vec(k₁), W, vec(linsolve_tmp), !repeat_step)
+    @. k₁ = -k₁
   end
   integrator.destats.nsolve += 1
 
@@ -61,6 +62,7 @@ end
     mul!(vec(k₂), W, vec(linsolve_tmp))
   else
     cache.linsolve(vec(k₂), W, vec(linsolve_tmp))
+    @. k₂ = -k₂
   end
   integrator.destats.nsolve += 1
 
@@ -84,6 +86,7 @@ end
       mul!(vec(k₃), W, vec(linsolve_tmp))
     else
       cache.linsolve(vec(k₃), W, vec(linsolve_tmp))
+      @. k₃ = -k₃
     end
     integrator.destats.nsolve += 1
 
@@ -114,6 +117,7 @@ end
     mul!(vec(k₁), W, vec(linsolve_tmp))
   else
     cache.linsolve(vec(k₁), W, vec(linsolve_tmp), !repeat_step)
+    @. k₁ = -k₁
   end
   integrator.destats.nsolve += 1
 
@@ -133,6 +137,7 @@ end
     mul!(vec(k₂), W, vec(linsolve_tmp))
   else
     cache.linsolve(vec(k₂), W, vec(linsolve_tmp))
+    @. k₂ = -k₂
   end
   integrator.destats.nsolve += 1
 
@@ -153,6 +158,7 @@ end
     mul!(vec(k₃), W, vec(linsolve_tmp))
   else
     cache.linsolve(vec(k₃), W, vec(linsolve_tmp))
+    @. k₃ = -k₃
   end
   integrator.destats.nsolve += 1
 
@@ -179,12 +185,12 @@ end
   dT = calc_tderivative(integrator, cache)
 
   W = calc_W!(integrator, cache, γ, repeat_step)
-  k₁ = _reshape(W\_vec((integrator.fsalfirst + γ*dT)), axes(uprev))
+  k₁ = _reshape(W\-_vec((integrator.fsalfirst + γ*dT)), axes(uprev))
   integrator.destats.nsolve += 1
   f₁ = f(uprev  + dto2*k₁, p, t+dto2)
   integrator.destats.nf += 1
 
-  k₂ = _reshape(W\_vec(f₁-k₁), axes(uprev)) + k₁
+  k₂ = _reshape(W\-_vec(f₁-k₁), axes(uprev)) + k₁
   integrator.destats.nsolve += 1
   u = uprev  + dt*k₂
 
@@ -192,7 +198,7 @@ end
     integrator.fsallast = f(u, p, t+dt)
     integrator.destats.nf += 1
 
-    k₃ = _reshape(W\_vec((integrator.fsallast - c₃₂*(k₂-f₁) - 2*(k₁-integrator.fsalfirst) + dt*dT)), axes(uprev))
+    k₃ = _reshape(W\-_vec((integrator.fsallast - c₃₂*(k₂-f₁) - 2*(k₁-integrator.fsalfirst) + dt*dT)), axes(uprev))
     integrator.destats.nsolve += 1
 
     utilde =  dto6*(k₁ - 2*k₂ + k₃)
@@ -221,18 +227,18 @@ end
 
   #f₀ = f(uprev, p, t)
 
-  k₁ = _reshape(W\_vec((integrator.fsalfirst + γ*dT)), axes(uprev))
+  k₁ = _reshape(W\-_vec((integrator.fsalfirst + γ*dT)), axes(uprev))
   integrator.destats.nsolve += 1
   f₁ = f(uprev  + dto2*k₁, p, t+dto2)
   integrator.destats.nf += 1
 
-  k₂ = _reshape(W\_vec(f₁-k₁), axes(uprev)) + k₁
+  k₂ = _reshape(W\-_vec(f₁-k₁), axes(uprev)) + k₁
   integrator.destats.nsolve += 1
   tmp = uprev  + dt*k₂
   integrator.fsallast = f(tmp, p, t+dt)
   integrator.destats.nf += 1
 
-  k₃ = _reshape(W\_vec((integrator.fsallast - c₃₂*(k₂-f₁) - 2(k₁-integrator.fsalfirst) + dt*dT)), axes(uprev))
+  k₃ = _reshape(W\-_vec((integrator.fsallast - c₃₂*(k₂-f₁) - 2(k₁-integrator.fsalfirst) + dt*dT)), axes(uprev))
   integrator.destats.nsolve += 1
   u = uprev  + dto6*(k₁ + 4k₂ + k₃)
 
@@ -300,7 +306,7 @@ end
 
   linsolve_tmp =  integrator.fsalfirst + dtd1*dT
 
-  k1 = _reshape(W\_vec(linsolve_tmp), axes(uprev))
+  k1 = _reshape(W\-_vec(linsolve_tmp), axes(uprev))
   integrator.destats.nsolve += 1
   u = uprev  + a21*k1
   du = f(u, p, t+c2*dt)
@@ -308,7 +314,7 @@ end
 
   linsolve_tmp =  du + dtd2*dT + dtC21*k1
 
-  k2 = _reshape(W\_vec(linsolve_tmp), axes(uprev))
+  k2 = _reshape(W\-_vec(linsolve_tmp), axes(uprev))
   integrator.destats.nsolve += 1
   u = uprev  + a31*k1 + a32*k2
   du = f(u, p, t+c3*dt)
@@ -316,7 +322,7 @@ end
 
   linsolve_tmp =  du + dtd3*dT + dtC31*k1 + dtC32*k2
 
-  k3 = _reshape(W\_vec(linsolve_tmp), axes(uprev))
+  k3 = _reshape(W\-_vec(linsolve_tmp), axes(uprev))
   integrator.destats.nsolve += 1
   u = uprev  + b1*k1 + b2*k2 + b3*k3
   integrator.fsallast = f(u, p, t + dt)
@@ -361,6 +367,7 @@ end
     mul!(vec(k1), W, vec(linsolve_tmp))
   else
     cache.linsolve(vec(k1), W, vec(linsolve_tmp), !repeat_step)
+    @. k1 = -k1
   end
   integrator.destats.nsolve += 1
 
@@ -380,6 +387,7 @@ end
     mul!(vec(k2), W, vec(linsolve_tmp))
   else
     cache.linsolve(vec(k2), W, vec(linsolve_tmp))
+    @. k2 = -k2
   end
   integrator.destats.nsolve += 1
 
@@ -399,6 +407,7 @@ end
     mul!(vec(k3), W, vec(linsolve_tmp))
   else
     cache.linsolve(vec(k3), W, vec(linsolve_tmp))
+    @. k3 = -k3
   end
   integrator.destats.nsolve += 1
 
@@ -443,14 +452,14 @@ end
 
   linsolve_tmp =  integrator.fsalfirst + dtd1*dT
 
-  k1 = _reshape(W\_vec(linsolve_tmp), axes(uprev))
+  k1 = _reshape(W\-_vec(linsolve_tmp), axes(uprev))
   integrator.destats.nsolve += 1
   u = uprev # +a21*k1 a21 == 0
   # du = f(u, p, t+c2*dt) c2 == 0 and a21 == 0 => du = f(uprev, p, t) == fsalfirst
 
   linsolve_tmp =  integrator.fsalfirst + dtd2*dT + dtC21*k1
 
-  k2 = _reshape(W\_vec(linsolve_tmp), axes(uprev))
+  k2 = _reshape(W\-_vec(linsolve_tmp), axes(uprev))
   integrator.destats.nsolve += 1
   u = uprev  + a31*k1 + a32*k2
   du = f(u, p, t+c3*dt)
@@ -458,11 +467,11 @@ end
 
   linsolve_tmp =  du + dtd3*dT + dtC31*k1 + dtC32*k2
 
-  k3 = _reshape(W\_vec(linsolve_tmp), axes(uprev))
+  k3 = _reshape(W\-_vec(linsolve_tmp), axes(uprev))
   integrator.destats.nsolve += 1
   linsolve_tmp =  du + dtd4*dT + dtC41*k1 + dtC42*k2 + dtC43*k3
 
-  k4 = _reshape(W\_vec(linsolve_tmp), axes(uprev))
+  k4 = _reshape(W\-_vec(linsolve_tmp), axes(uprev))
   integrator.destats.nsolve += 1
   u = uprev  + b1*k1 + b2*k2 + b3*k3 + b4*k4
   integrator.fsallast = f(u, p, t + dt)
@@ -512,6 +521,7 @@ end
     mul!(vec(k1), W, vec(linsolve_tmp))
   else
     cache.linsolve(vec(k1), W, vec(linsolve_tmp), !repeat_step)
+    @. k1 = -k1
   end
   integrator.destats.nsolve += 1
 
@@ -535,6 +545,7 @@ end
     mul!(vec(k2), W, vec(linsolve_tmp))
   else
     cache.linsolve(vec(k2), W, vec(linsolve_tmp))
+    @. k2 = -k2
   end
   integrator.destats.nsolve += 1
 
@@ -554,6 +565,7 @@ end
     mul!(vec(k3), W, vec(linsolve_tmp))
   else
     cache.linsolve(vec(k3), W, vec(linsolve_tmp))
+    @. k3 = -k3
   end
   integrator.destats.nsolve += 1
 
@@ -569,6 +581,7 @@ end
     mul!(vec(k4), W, vec(linsolve_tmp))
   else
     cache.linsolve(vec(k4), W, vec(linsolve_tmp))
+    @. k4 = -k4
   end
   integrator.destats.nsolve += 1
 
@@ -616,7 +629,7 @@ end
 
   linsolve_tmp =  integrator.fsalfirst + dtd1*dT
 
-  k1 = _reshape(W\_vec(linsolve_tmp), axes(uprev))
+  k1 = _reshape(W\-_vec(linsolve_tmp), axes(uprev))
   integrator.destats.nsolve += 1
   u = uprev +a21*k1
   du = f(u, p, t+c2*dt)
@@ -624,7 +637,7 @@ end
 
   linsolve_tmp =  du + dtd2*dT + dtC21*k1
 
-  k2 = _reshape(W\_vec(linsolve_tmp), axes(uprev))
+  k2 = _reshape(W\-_vec(linsolve_tmp), axes(uprev))
   integrator.destats.nsolve += 1
   u = uprev  + a31*k1 + a32*k2
   du = f(u, p, t+c3*dt)
@@ -632,12 +645,12 @@ end
 
   linsolve_tmp =  du + dtd3*dT + dtC31*k1 + dtC32*k2
 
-  k3 = _reshape(W\_vec(linsolve_tmp), axes(uprev))
+  k3 = _reshape(W\-_vec(linsolve_tmp), axes(uprev))
   integrator.destats.nsolve += 1
 
   linsolve_tmp =  du + dtd4*dT + dtC41*k1 + dtC42*k2 + dtC43*k3
 
-  k4 = _reshape(W\_vec(linsolve_tmp), axes(uprev))
+  k4 = _reshape(W\-_vec(linsolve_tmp), axes(uprev))
   integrator.destats.nsolve += 1
   u = uprev  + b1*k1 + b2*k2 + b3*k3 + b4*k4
   integrator.fsallast = f(u, p, t + dt)
@@ -686,6 +699,7 @@ end
     mul!(vec(k1), W, vec(linsolve_tmp))
   else
     cache.linsolve(vec(k1), W, vec(linsolve_tmp), !repeat_step)
+    @. k1 = -k1
   end
   integrator.destats.nsolve += 1
 
@@ -705,6 +719,7 @@ end
     mul!(vec(k2), W, vec(linsolve_tmp))
   else
     cache.linsolve(vec(k2), W, vec(linsolve_tmp))
+    @. k2 = -k2
   end
   integrator.destats.nsolve += 1
 
@@ -724,6 +739,7 @@ end
     mul!(vec(k3), W, vec(linsolve_tmp))
   else
     cache.linsolve(vec(k3), W, vec(linsolve_tmp))
+    @. k3 = -k3
   end
   integrator.destats.nsolve += 1
 
@@ -739,6 +755,7 @@ end
     mul!(vec(k4), W, vec(linsolve_tmp))
   else
     cache.linsolve(vec(k4), W, vec(linsolve_tmp))
+    @. k4 = -k4
   end
   integrator.destats.nsolve += 1
 
@@ -805,7 +822,7 @@ end
 
   linsolve_tmp =  du + dtd1*dT
 
-  k1 = _reshape(W\_vec(linsolve_tmp), axes(uprev))
+  k1 = _reshape(W\-_vec(linsolve_tmp), axes(uprev))
   integrator.destats.nsolve += 1
   u = uprev  + a21*k1
   du = f(u, p, t+c2*dt)
@@ -813,7 +830,7 @@ end
 
   linsolve_tmp =  du + dtd2*dT + dtC21*k1
 
-  k2 = _reshape(W\_vec(linsolve_tmp), axes(uprev))
+  k2 = _reshape(W\-_vec(linsolve_tmp), axes(uprev))
   integrator.destats.nsolve += 1
   u = uprev  + a31*k1 + a32*k2
   du = f(u, p, t+c3*dt)
@@ -821,7 +838,7 @@ end
 
   linsolve_tmp =  du + dtd3*dT + (dtC31*k1 + dtC32*k2)
 
-  k3 = _reshape(W\_vec(linsolve_tmp), axes(uprev))
+  k3 = _reshape(W\-_vec(linsolve_tmp), axes(uprev))
   integrator.destats.nsolve += 1
   u = uprev  + a41*k1 + a42*k2 + a43*k3
   du = f(u, p, t+c4*dt)
@@ -829,7 +846,7 @@ end
 
   linsolve_tmp =  du + dtd4*dT + (dtC41*k1 + dtC42*k2 + dtC43*k3)
 
-  k4 = _reshape(W\_vec(linsolve_tmp), axes(uprev))
+  k4 = _reshape(W\-_vec(linsolve_tmp), axes(uprev))
   integrator.destats.nsolve += 1
   u = uprev  + a51*k1 + a52*k2 + a53*k3 + a54*k4
   du = f(u, p, t+dt)
@@ -837,7 +854,7 @@ end
 
   linsolve_tmp =  du + (dtC52*k2 + dtC54*k4 + dtC51*k1 + dtC53*k3)
 
-  k5 = _reshape(W\_vec(linsolve_tmp), axes(uprev))
+  k5 = _reshape(W\-_vec(linsolve_tmp), axes(uprev))
   integrator.destats.nsolve += 1
   u = u + k5
   du = f(u, p, t+dt)
@@ -845,7 +862,7 @@ end
 
   linsolve_tmp =  du + (dtC61*k1 + dtC62*k2 + dtC65*k5 + dtC64*k4 + dtC63*k3)
 
-  k6 = _reshape(W\_vec(linsolve_tmp), axes(uprev))
+  k6 = _reshape(W\-_vec(linsolve_tmp), axes(uprev))
   integrator.destats.nsolve += 1
   u = u + k6
 
@@ -911,6 +928,7 @@ end
     mul!(vec(k1), W, vec(linsolve_tmp))
   else
     cache.linsolve(vec(k1), W, vec(linsolve_tmp), !repeat_step)
+    @. k1 = -k1
   end
   integrator.destats.nsolve += 1
 
@@ -930,6 +948,7 @@ end
     mul!(vec(k2), W, vec(linsolve_tmp))
   else
     cache.linsolve(vec(k2), W, vec(linsolve_tmp))
+    @. k2 = -k2
   end
   integrator.destats.nsolve += 1
 
@@ -949,6 +968,7 @@ end
     mul!(vec(k3), W, vec(linsolve_tmp))
   else
     cache.linsolve(vec(k3), W, vec(linsolve_tmp))
+    @. k3 = -k3
   end
   integrator.destats.nsolve += 1
 
@@ -968,6 +988,7 @@ end
     mul!(vec(k4), W, vec(linsolve_tmp))
   else
     cache.linsolve(vec(k4), W, vec(linsolve_tmp))
+    @. k4 = -k4
   end
   integrator.destats.nsolve += 1
 
@@ -987,6 +1008,7 @@ end
     mul!(vec(k5), W, vec(linsolve_tmp))
   else
     cache.linsolve(vec(k5), W, vec(linsolve_tmp))
+    @. k5 = -k5
   end
   integrator.destats.nsolve += 1
 
@@ -1012,6 +1034,7 @@ end
     mul!(vec(k6), W, vec(linsolve_tmp))
   else
     cache.linsolve(vec(k6), W, vec(linsolve_tmp))
+    @. k6 = -k6
   end
   integrator.destats.nsolve += 1
 
@@ -1086,7 +1109,7 @@ end
 
   linsolve_tmp =  du1 + dtd1*dT
 
-  k1 = _reshape(W\_vec(linsolve_tmp), axes(uprev))
+  k1 = _reshape(W\-_vec(linsolve_tmp), axes(uprev))
   integrator.destats.nsolve += 1
   u = uprev  + a21*k1
   du = f(u, p, t+c2*dt)
@@ -1094,7 +1117,7 @@ end
 
   linsolve_tmp =  du + dtd2*dT + dtC21*k1
 
-  k2 = _reshape(W\_vec(linsolve_tmp), axes(uprev))
+  k2 = _reshape(W\-_vec(linsolve_tmp), axes(uprev))
   integrator.destats.nsolve += 1
   u = uprev  + a31*k1 + a32*k2
   du = f(u, p, t+c3*dt)
@@ -1102,7 +1125,7 @@ end
 
   linsolve_tmp =  du + dtd3*dT + (dtC31*k1 + dtC32*k2)
 
-  k3 = _reshape(W\_vec(linsolve_tmp), axes(uprev))
+  k3 = _reshape(W\-_vec(linsolve_tmp), axes(uprev))
   integrator.destats.nsolve += 1
   u = uprev  + a41*k1 + a42*k2 + a43*k3
   du = f(u, p, t+c4*dt)
@@ -1110,7 +1133,7 @@ end
 
   linsolve_tmp =  du + dtd4*dT + (dtC41*k1 + dtC42*k2 + dtC43*k3)
 
-  k4 = _reshape(W\_vec(linsolve_tmp), axes(uprev))
+  k4 = _reshape(W\-_vec(linsolve_tmp), axes(uprev))
   integrator.destats.nsolve += 1
   u = uprev  + a51*k1 + a52*k2 + a53*k3 + a54*k4
   du = f(u, p, t+c5*dt)
@@ -1118,7 +1141,7 @@ end
 
   linsolve_tmp = du + dtd5*dT + (dtC52*k2 + dtC54*k4 + dtC51*k1 + dtC53*k3)
 
-  k5 = _reshape(W\_vec(linsolve_tmp), axes(uprev))
+  k5 = _reshape(W\-_vec(linsolve_tmp), axes(uprev))
   integrator.destats.nsolve += 1
   u = uprev  + a61*k1 + a62*k2 + a63*k3 + a64*k4 + a65*k5
   du = f(u, p, t+dt)
@@ -1126,7 +1149,7 @@ end
 
   linsolve_tmp =  du + (dtC61*k1 + dtC62*k2 + dtC63*k3 + dtC64*k4 + dtC65*k5)
 
-  k6 = _reshape(W\_vec(linsolve_tmp), axes(uprev))
+  k6 = _reshape(W\-_vec(linsolve_tmp), axes(uprev))
   integrator.destats.nsolve += 1
   u = u + k6
   du = f(u, p, t+dt)
@@ -1134,7 +1157,7 @@ end
 
   linsolve_tmp = du + (dtC71*k1 + dtC72*k2 + dtC73*k3 + dtC74*k4 + dtC75*k5 + dtC76*k6)
 
-  k7 = _reshape(W\_vec(linsolve_tmp), axes(uprev))
+  k7 = _reshape(W\-_vec(linsolve_tmp), axes(uprev))
   integrator.destats.nsolve += 1
   u = u + k7
   du = f(u, p, t+dt)
@@ -1142,7 +1165,7 @@ end
 
   linsolve_tmp = du + (dtC81*k1 + dtC82*k2 + dtC83*k3 + dtC84*k4 + dtC85*k5 + dtC86*k6 + dtC87*k7)
 
-  k8 = _reshape(W\_vec(linsolve_tmp), axes(uprev))
+  k8 = _reshape(W\-_vec(linsolve_tmp), axes(uprev))
   integrator.destats.nsolve += 1
   u = u + k8
   du = f(u, p, t+dt)
@@ -1222,6 +1245,7 @@ end
     mul!(vec(k1), W, vec(linsolve_tmp))
   else
     cache.linsolve(vec(k1), W, vec(linsolve_tmp), !repeat_step)
+    @. k1 = -k1
   end
   integrator.destats.nsolve += 1
 
@@ -1241,6 +1265,7 @@ end
     mul!(vec(k2), W, vec(linsolve_tmp))
   else
     cache.linsolve(vec(k2), W, vec(linsolve_tmp))
+    @. k2 = -k2
   end
   integrator.destats.nsolve += 1
 
@@ -1260,6 +1285,7 @@ end
     mul!(vec(k3), W, vec(linsolve_tmp))
   else
     cache.linsolve(vec(k3), W, vec(linsolve_tmp))
+    @. k3 = -k3
   end
   integrator.destats.nsolve += 1
 
@@ -1279,6 +1305,7 @@ end
     mul!(vec(k4), W, vec(linsolve_tmp))
   else
     cache.linsolve(vec(k4), W, vec(linsolve_tmp))
+    @. k4 = -k4
   end
   integrator.destats.nsolve += 1
 
@@ -1301,6 +1328,7 @@ end
     mul!(vec(k5), W, vec(linsolve_tmp))
   else
     cache.linsolve(vec(k5), W, vec(linsolve_tmp))
+    @. k5 = -k5
   end
   integrator.destats.nsolve += 1
 
@@ -1329,6 +1357,7 @@ end
     mul!(vec(k6), W, vec(linsolve_tmp))
   else
     cache.linsolve(vec(k6), W, vec(linsolve_tmp))
+    @. k6 = -k6
   end
   integrator.destats.nsolve += 1
 
@@ -1354,6 +1383,7 @@ end
     mul!(vec(k7), W, vec(linsolve_tmp))
   else
     cache.linsolve(vec(k7), W, vec(linsolve_tmp))
+    @. k7 = -k7
   end
   integrator.destats.nsolve += 1
 
@@ -1379,6 +1409,7 @@ end
     mul!(vec(k8), W, vec(linsolve_tmp))
   else
     cache.linsolve(vec(k8), W, vec(linsolve_tmp))
+    @. k8 = -k8
   end
   integrator.destats.nsolve += 1
 
@@ -1451,7 +1482,7 @@ end
 
   linsolve_tmp = integrator.fsalfirst + dtd1*dT
 
-  k1 = _reshape(W\_vec(linsolve_tmp), axes(uprev))
+  k1 = _reshape(W\-_vec(linsolve_tmp), axes(uprev))
   integrator.destats.nsolve += 1
   u = uprev  + a21*k1
   du = f(u, p, t+c2*dt)
@@ -1459,7 +1490,7 @@ end
 
   linsolve_tmp =  du + dtd2*dT + dtC21*k1
 
-  k2 = _reshape(W\_vec(linsolve_tmp), axes(uprev))
+  k2 = _reshape(W\-_vec(linsolve_tmp), axes(uprev))
   integrator.destats.nsolve += 1
   u = uprev  + a31*k1 + a32*k2
   du = f(u, p, t+c3*dt)
@@ -1467,7 +1498,7 @@ end
 
   linsolve_tmp =  du + dtd3*dT + (dtC31*k1 + dtC32*k2)
 
-  k3 = _reshape(W\_vec(linsolve_tmp), axes(uprev))
+  k3 = _reshape(W\-_vec(linsolve_tmp), axes(uprev))
   integrator.destats.nsolve += 1
   u = uprev  + a41*k1 + a42*k2 + a43*k3
   du = f(u, p, t+c4*dt)
@@ -1475,7 +1506,7 @@ end
 
   linsolve_tmp =  du + dtd4*dT + (dtC41*k1 + dtC42*k2 + dtC43*k3)
 
-  k4 = _reshape(W\_vec(linsolve_tmp), axes(uprev))
+  k4 = _reshape(W\-_vec(linsolve_tmp), axes(uprev))
   integrator.destats.nsolve += 1
   u = uprev  + a51*k1 + a52*k2 + a53*k3 + a54*k4
   du = f(u, p, t+c5*dt)
@@ -1483,7 +1514,7 @@ end
 
   linsolve_tmp =  du + dtd5*dT + (dtC52*k2 + dtC54*k4 + dtC51*k1 + dtC53*k3)
 
-  k5 = _reshape(W\_vec(linsolve_tmp), axes(uprev))
+  k5 = _reshape(W\-_vec(linsolve_tmp), axes(uprev))
   integrator.destats.nsolve += 1
   u = uprev  + a61*k1 + a62*k2 + a63*k3 + a64*k4 + a65*k5
   du = f(u, p, t+c6*dt)
@@ -1491,7 +1522,7 @@ end
 
   linsolve_tmp =  du + dtd6*dT + (dtC61*k1 + dtC62*k2 + dtC63*k3 + dtC64*k4 + dtC65*k5)
 
-  k6 = _reshape(W\_vec(linsolve_tmp), axes(uprev))
+  k6 = _reshape(W\-_vec(linsolve_tmp), axes(uprev))
   integrator.destats.nsolve += 1
   u = uprev+b1*k1+b2*k2+b3*k3+b4*k4+b5*k5+b6*k6
 
@@ -1501,4 +1532,4 @@ end
   integrator.k[1] = integrator.fsalfirst
   integrator.k[2] = integrator.fsallast
   integrator.u = u
-end
\ No newline at end of file
+end

From 3ed2997c6705561b8be1d82c2fa8ed352246c2a6 Mon Sep 17 00:00:00 2001
From: Yingbo Ma <mayingbo5@gmail.com>
Date: Tue, 19 Mar 2019 16:40:44 -0400
Subject: [PATCH 05/15] invW handling

---
 src/nlsolve/newton.jl | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/nlsolve/newton.jl b/src/nlsolve/newton.jl
index fbe8add743..ee2c7b9814 100644
--- a/src/nlsolve/newton.jl
+++ b/src/nlsolve/newton.jl
@@ -62,7 +62,7 @@ Equations II, Springer Series in Computational Mathematics. ISBN
     end
     integrator.destats.nf += 1
     if DiffEqBase.has_invW(f)
-      dz = _reshape(W * _vec(ztmp), axes(ztmp)) # Here W is actually invW
+      dz = _reshape(W * -_vec(ztmp), axes(ztmp)) # Here W is actually invW
     else
       dz = _reshape(W \ _vec(ztmp), axes(ztmp))
     end
@@ -132,6 +132,7 @@ end
     end
     if DiffEqBase.has_invW(f)
       mul!(vecdz,W,vecztmp) # Here W is actually invW
+      @. vecdz = -vecdz
     else
       cache.linsolve(vecdz,W,vecztmp,iter == 1 && new_W)
     end

From c7cf968602b1bb38d8f07f8023d38ac4e9281499 Mon Sep 17 00:00:00 2001
From: Yingbo Ma <mayingbo5@gmail.com>
Date: Tue, 19 Mar 2019 16:41:09 -0400
Subject: [PATCH 06/15] Update RadauIIA5

---
 src/derivative_utils.jl               | 17 ++++++++---------
 src/perform_step/firk_perform_step.jl | 19 +++----------------
 test/ode/ode_firk_tests.jl            |  7 +++++--
 3 files changed, 16 insertions(+), 27 deletions(-)

diff --git a/src/derivative_utils.jl b/src/derivative_utils.jl
index b56afde72a..248dd96091 100644
--- a/src/derivative_utils.jl
+++ b/src/derivative_utils.jl
@@ -237,18 +237,18 @@ function LinearAlgebra.mul!(Y::AbstractVecOrMat, W::WOperator, B::AbstractVecOrM
   end
 end
 
-function do_nowJ(integrator, alg, repeat_step)::Bool
+function do_newJ(integrator, alg::T, repeat_step)::Bool where T
   repeat_step && return false
   !alg_can_repeat_jac(alg) && return true
-  isnewton = alg isa NewtonAlgorithm
-  isnewton && ( @unpack ηold,nl_iters = integrator.cache.nlsolver )
+  isnewton = T <: NewtonAlgorithm
+  isnewton && (T <: RadauIIA5 ? ( @unpack ηold,nl_iters = integrator.cache ) : ( @unpack ηold,nl_iters = integrator.cache.nlsolver ))
   integrator.force_stepfail && return true
   # reuse J when there is fast convergence
   fastconvergence = nl_iters == 1 && ηold >= alg.new_jac_conv_bound
   return !fastconvergence
 end
 
-function do_nowW(integrator, new_jac)::Bool
+function do_newW(integrator, new_jac)::Bool
   integrator.iter <= 1 && return true
   new_jac && return true
   # reuse W when the change in stepsize is small enough
@@ -312,18 +312,17 @@ function calc_W!(integrator, cache::OrdinaryDiffEqMutableCache, dtgamma, repeat_
     # skip calculation of inv(W) if step is repeated
     (!repeat_step && W_transform) ? f.invW_t(W, uprev, p, dtgamma, t) : f.invW(W, uprev, p, dtgamma, t) # W == inverse W
     is_compos && calc_J!(integrator, cache, true)
-
   elseif DiffEqBase.has_jac(f) && f.jac_prototype !== nothing
     # skip calculation of J if step is repeated
-    ( new_jac = do_nowJ(integrator, alg, repeat_step) ) && DiffEqBase.update_coefficients!(W,uprev,p,t)
+    ( new_jac = do_newJ(integrator, alg, repeat_step) ) && DiffEqBase.update_coefficients!(W,uprev,p,t)
     # skip calculation of W if step is repeated
     @label J2W
-    ( new_W = do_nowW(integrator, new_jac) ) && (W.transform = W_transform; set_gamma!(W, dtgamma))
+    ( new_W = do_newW(integrator, new_jac) ) && (W.transform = W_transform; set_gamma!(W, dtgamma))
   else # concrete W using jacobian from `calc_J!`
     # skip calculation of J if step is repeated
-    ( new_jac = do_nowJ(integrator, alg, repeat_step) ) && calc_J!(integrator, cache, is_compos)
+    ( new_jac = do_newJ(integrator, alg, repeat_step) ) && calc_J!(integrator, cache, is_compos)
     # skip calculation of W if step is repeated
-    ( new_W = do_nowW(integrator, new_jac) ) && jacobian2W!(W, mass_matrix, dtgamma, J, W_transform)
+    ( new_W = do_newW(integrator, new_jac) ) && jacobian2W!(W, mass_matrix, dtgamma, J, W_transform)
   end
   isnewton && set_new_W!(cache.nlsolver, new_W)
   new_W && (integrator.destats.nw += 1)
diff --git a/src/perform_step/firk_perform_step.jl b/src/perform_step/firk_perform_step.jl
index bdad4d9af6..9b9a1e45e8 100644
--- a/src/perform_step/firk_perform_step.jl
+++ b/src/perform_step/firk_perform_step.jl
@@ -214,27 +214,14 @@ end
   c1mc2= c1-c2
   κtol = κ*tol # used in Newton iteration
   γdt, αdt, βdt = γ/dt, α/dt, β/dt
-  new_W = true
-  if repeat_step || (alg_can_repeat_jac(alg) &&
-                     (!integrator.last_stepfail && cache.nl_iters == 1 &&
-                      cache.ηold < alg.new_jac_conv_bound))
-    new_jac = false
-  else
-    new_jac = true
-    calc_J!(integrator, cache, is_compos)
-  end
-  # skip calculation of W if step is repeated
-  if !repeat_step && (!alg_can_repeat_jac(alg) ||
-                      (integrator.iter < 1 || new_jac ||
-                       abs(dt - (t-integrator.tprev)) > 100eps(typeof(integrator.t))))
+  (new_jac = do_newJ(integrator, alg, repeat_step)) && calc_J!(integrator, cache, is_compos)
+  if (new_W = do_newW(integrator, new_jac))
     @inbounds for II in CartesianIndices(J)
       W1[II] = -γdt * mass_matrix[Tuple(II)...] + J[II]
       W2[II] = -(αdt + βdt*im) * mass_matrix[Tuple(II)...] + J[II]
     end
-  else
-    new_W = false
+    integrator.destats.nw += 1
   end
-  new_W && (integrator.destats.nw += 1)
 
   # TODO better initial guess
   if integrator.iter == 1 || integrator.u_modified || alg.extrapolant == :constant
diff --git a/test/ode/ode_firk_tests.jl b/test/ode/ode_firk_tests.jl
index 87a655d496..a6e896e53d 100644
--- a/test/ode/ode_firk_tests.jl
+++ b/test/ode/ode_firk_tests.jl
@@ -12,10 +12,13 @@ end
 # test adaptivity
 for iip in (true, false)
   vanstiff = ODEProblem{iip}(van, [0;sqrt(3)], (0.0,1.0), 1e6)
-  @test length(solve(vanstiff, RadauIIA5())) < 110
+  sol = solve(vanstiff, RadauIIA5())
+  @test sol.naccept + sol.nreject > sol.njacs # J reuse
+  @test sol.destats.njacs < sol.destats.nw # W reuse
+  @test length(sol) < 120
   @test length(solve(remake(vanstiff, p=1e7), RadauIIA5())) < 150
   @test length(solve(remake(vanstiff, p=1e7), reltol=[1e-4, 1e-6], RadauIIA5())) < 160
-  @test length(solve(remake(vanstiff, p=1e7), RadauIIA5(), reltol=1e-9, abstol=1e-9)) < 860
+  @test length(solve(remake(vanstiff, p=1e7), RadauIIA5(), reltol=1e-9, abstol=1e-9)) < 870
   @test length(solve(remake(vanstiff, p=1e9), RadauIIA5())) < 160
   @test length(solve(remake(vanstiff, p=1e10), RadauIIA5())) < 190
 end

From 68d7b0dec3eb3f9bac36d4e7268c2cea40fd7067 Mon Sep 17 00:00:00 2001
From: Yingbo Ma <mayingbo5@gmail.com>
Date: Tue, 19 Mar 2019 19:03:55 -0400
Subject: [PATCH 07/15] Fix `fastconvergence` judgment

---
 src/derivative_utils.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/derivative_utils.jl b/src/derivative_utils.jl
index 248dd96091..40b72179a0 100644
--- a/src/derivative_utils.jl
+++ b/src/derivative_utils.jl
@@ -244,7 +244,7 @@ function do_newJ(integrator, alg::T, repeat_step)::Bool where T
   isnewton && (T <: RadauIIA5 ? ( @unpack ηold,nl_iters = integrator.cache ) : ( @unpack ηold,nl_iters = integrator.cache.nlsolver ))
   integrator.force_stepfail && return true
   # reuse J when there is fast convergence
-  fastconvergence = nl_iters == 1 && ηold >= alg.new_jac_conv_bound
+  fastconvergence = nl_iters == 1 && ηold <= alg.new_jac_conv_bound
   return !fastconvergence
 end
 

From 546fdc9ed002803741153db00b7204e021adfd01 Mon Sep 17 00:00:00 2001
From: Yingbo Ma <mayingbo5@gmail.com>
Date: Wed, 20 Mar 2019 16:07:21 -0400
Subject: [PATCH 08/15] Fix WOperator test errors

---
 test/utility_tests.jl | 25 +++++++++++++------------
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/test/utility_tests.jl b/test/utility_tests.jl
index 20ec547bf8..449c7db7ce 100644
--- a/test/utility_tests.jl
+++ b/test/utility_tests.jl
@@ -4,20 +4,21 @@ using OrdinaryDiffEq, LinearAlgebra, SparseArrays, Random, Test, DiffEqOperators
 @testset "WOperator" begin
   Random.seed!(0); y = zeros(2); b = rand(2)
   mm = I; _J = rand(2,2)
-  W = WOperator(mm, 1.0, DiffEqArrayOperator(_J), false)
-  set_gamma!(W, 2.0)
-  _W = mm - 2.0 * _J
-  @test convert(AbstractMatrix,W) ≈ _W
-  @test W * b ≈ _W * b
-  mul!(y, W, b); @test y ≈ _W * b
+  _Ws = [-mm + 2.0 * _J, -mm/2.0 + _J]
+  for (_W, W_transform) in zip(_Ws, [false, true])
+    W = WOperator(mm, 1.0, DiffEqArrayOperator(_J), W_transform)
+    set_gamma!(W, 2.0)
+    @test convert(AbstractMatrix,W) == _W
+    @test W * b == _W * b
+    mul!(y, W, b); @test y == _W * b
+  end
 end
 
 @testset "calc_W!" begin
   A = [-1.0 0.0; 0.0 -0.5]; mm = [2.0 0.0; 0.0 1.0]
   u0 = [1.0, 1.0]; tmp = zeros(2)
   tspan = (0.0,1.0); dt = 0.01; dtgamma = 0.5dt
-  concrete_W = mm - dtgamma * A
-  concrete_Wt = mm/dtgamma - A
+  concrete_W = -mm + dtgamma * A
 
   # Out-of-place
   fun = ODEFunction((u,p,t) -> A*u;
@@ -25,7 +26,7 @@ end
                     jac=(u,p,t) -> A)
   integrator = init(ODEProblem(fun,u0,tspan), ImplicitEuler(); adaptive=false, dt=dt)
   W = calc_W!(integrator, integrator.cache, dtgamma, false)
-  @test convert(AbstractMatrix, W) ≈ concrete_W
+  @test convert(AbstractMatrix, W) == concrete_W
   @test W \ u0 ≈ concrete_W \ u0
 
   # In-place
@@ -34,8 +35,8 @@ end
                     jac_prototype=DiffEqArrayOperator(A))
   integrator = init(ODEProblem(fun,u0,tspan), ImplicitEuler(); adaptive=false, dt=dt)
   calc_W!(integrator, integrator.cache, dtgamma, false)
-  @test convert(AbstractMatrix, integrator.cache.W) ≈ concrete_W
-  ldiv!(tmp, lu!(integrator.cache.W), u0); @test tmp ≈ concrete_W \ u0
+  @test convert(AbstractMatrix, integrator.cache.W) == concrete_W
+  ldiv!(tmp, lu!(integrator.cache.W), u0); @test tmp == concrete_W \ u0
 end
 
 @testset "Implicit solver with lazy W" begin
@@ -50,7 +51,7 @@ end
   fun2_ip = ODEFunction(_f_ip; mass_matrix=mm,
   jac_prototype=DiffEqArrayOperator(similar(A); update_func=(J,u,p,t) -> (J .= t .* A; J)))
 
-  for Alg in [ImplicitEuler, Rosenbrock23]
+  for Alg in [ImplicitEuler, Rosenbrock23, Rodas3]
     println(Alg)
     sol1 = solve(ODEProblem(fun1,u0,tspan), Alg(); adaptive=false, dt=0.01)
     sol2 = solve(ODEProblem(fun2,u0,tspan), Alg(); adaptive=false, dt=0.01)

From b570be4186e3792c00c08a55f9693e4086420d8f Mon Sep 17 00:00:00 2001
From: Yingbo Ma <mayingbo5@gmail.com>
Date: Wed, 20 Mar 2019 16:59:29 -0400
Subject: [PATCH 09/15] Fix `WOperator` argument order

---
 src/caches/rosenbrock_caches.jl |  2 +-
 src/derivative_utils.jl         |  2 +-
 test/utility_tests.jl           | 15 ++++++++-------
 3 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/src/caches/rosenbrock_caches.jl b/src/caches/rosenbrock_caches.jl
index c3bad488d2..216ba612ca 100644
--- a/src/caches/rosenbrock_caches.jl
+++ b/src/caches/rosenbrock_caches.jl
@@ -821,4 +821,4 @@ function alg_cache(alg::RosenbrockW6S4OS,u,rate_prototype,uEltypeNoUnits,uBottom
   tf = DiffEqDiffTools.TimeDerivativeWrapper(f,u,p)
   uf = DiffEqDiffTools.UDerivativeWrapper(f,t,p)
   RosenbrockWConstantCache(tf,uf,RosenbrockW6S4OSConstantCache(real(uBottomEltypeNoUnits),real(tTypeNoUnits)))
-end
\ No newline at end of file
+end
diff --git a/src/derivative_utils.jl b/src/derivative_utils.jl
index 40b72179a0..9553b608a4 100644
--- a/src/derivative_utils.jl
+++ b/src/derivative_utils.jl
@@ -126,7 +126,7 @@ mutable struct WOperator{T,
   _func_cache           # cache used in `mul!`
   _concrete_form         # non-lazy form (matrix/number) of the operator
   WOperator(mass_matrix, gamma, J, inplace; transform=false) = new{eltype(J),typeof(mass_matrix),
-    typeof(gamma),typeof(J)}(mass_matrix,gamma,J,inplace,transform,nothing,nothing)
+    typeof(gamma),typeof(J)}(mass_matrix,gamma,J,transform,inplace,nothing,nothing)
 end
 function WOperator(f::DiffEqBase.AbstractODEFunction, gamma, inplace; transform=false)
   @assert DiffEqBase.has_jac(f) "f needs to have an associated jacobian"
diff --git a/test/utility_tests.jl b/test/utility_tests.jl
index 449c7db7ce..2b3e796d5b 100644
--- a/test/utility_tests.jl
+++ b/test/utility_tests.jl
@@ -2,15 +2,16 @@ using OrdinaryDiffEq: WOperator, set_gamma!, calc_W!
 using OrdinaryDiffEq, LinearAlgebra, SparseArrays, Random, Test, DiffEqOperators
 
 @testset "WOperator" begin
-  Random.seed!(0); y = zeros(2); b = rand(2)
+  Random.seed!(123)
+  y = zeros(2); b = rand(2)
   mm = I; _J = rand(2,2)
   _Ws = [-mm + 2.0 * _J, -mm/2.0 + _J]
-  for (_W, W_transform) in zip(_Ws, [false, true])
-    W = WOperator(mm, 1.0, DiffEqArrayOperator(_J), W_transform)
+  for inplace in (true, false), (_W, W_transform) in zip(_Ws, [false, true])
+    W = WOperator(mm, 1.0, DiffEqArrayOperator(_J), inplace, transform=W_transform)
     set_gamma!(W, 2.0)
-    @test convert(AbstractMatrix,W) == _W
-    @test W * b == _W * b
-    mul!(y, W, b); @test y == _W * b
+    @test convert(AbstractMatrix,W) ≈ _W
+    @test W * b ≈ _W * b
+    mul!(y, W, b); @test y ≈ _W * b
   end
 end
 
@@ -51,7 +52,7 @@ end
   fun2_ip = ODEFunction(_f_ip; mass_matrix=mm,
   jac_prototype=DiffEqArrayOperator(similar(A); update_func=(J,u,p,t) -> (J .= t .* A; J)))
 
-  for Alg in [ImplicitEuler, Rosenbrock23, Rodas3]
+  for Alg in [ImplicitEuler, Rosenbrock23, Rodas5]
     println(Alg)
     sol1 = solve(ODEProblem(fun1,u0,tspan), Alg(); adaptive=false, dt=0.01)
     sol2 = solve(ODEProblem(fun2,u0,tspan), Alg(); adaptive=false, dt=0.01)

From c833ab3ad3116841f05c1ac39fb7479a5776080e Mon Sep 17 00:00:00 2001
From: Yingbo Ma <mayingbo5@gmail.com>
Date: Wed, 20 Mar 2019 17:06:55 -0400
Subject: [PATCH 10/15] Fix FIRK tests

---
 test/ode/ode_firk_tests.jl | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/test/ode/ode_firk_tests.jl b/test/ode/ode_firk_tests.jl
index a6e896e53d..5878c05b3c 100644
--- a/test/ode/ode_firk_tests.jl
+++ b/test/ode/ode_firk_tests.jl
@@ -13,12 +13,14 @@ end
 for iip in (true, false)
   vanstiff = ODEProblem{iip}(van, [0;sqrt(3)], (0.0,1.0), 1e6)
   sol = solve(vanstiff, RadauIIA5())
-  @test sol.naccept + sol.nreject > sol.njacs # J reuse
-  @test sol.destats.njacs < sol.destats.nw # W reuse
-  @test length(sol) < 120
+  if iip
+    @test sol.destats.naccept + sol.destats.nreject > sol.destats.njacs # J reuse
+    @test sol.destats.njacs < sol.destats.nw # W reuse
+  end
+  @test length(sol) < 150
   @test length(solve(remake(vanstiff, p=1e7), RadauIIA5())) < 150
-  @test length(solve(remake(vanstiff, p=1e7), reltol=[1e-4, 1e-6], RadauIIA5())) < 160
+  @test length(solve(remake(vanstiff, p=1e7), reltol=[1e-4, 1e-6], RadauIIA5())) < 170
   @test length(solve(remake(vanstiff, p=1e7), RadauIIA5(), reltol=1e-9, abstol=1e-9)) < 870
-  @test length(solve(remake(vanstiff, p=1e9), RadauIIA5())) < 160
+  @test length(solve(remake(vanstiff, p=1e9), RadauIIA5())) < 170
   @test length(solve(remake(vanstiff, p=1e10), RadauIIA5())) < 190
 end

From d41a7533e85ba986ef7f222a6d8a4a9a97f1047c Mon Sep 17 00:00:00 2001
From: Yingbo Ma <mayingbo5@gmail.com>
Date: Wed, 20 Mar 2019 17:22:51 -0400
Subject: [PATCH 11/15] Add random seed at the beginning of cache tests

---
 test/ode/ode_cache_tests.jl | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/test/ode/ode_cache_tests.jl b/test/ode/ode_cache_tests.jl
index 084b62e53b..e17a3ac2bd 100644
--- a/test/ode/ode_cache_tests.jl
+++ b/test/ode/ode_cache_tests.jl
@@ -1,5 +1,6 @@
 using OrdinaryDiffEq, DiffEqBase, DiffEqCallbacks, Test
 using Random
+Random.seed!(213)
 CACHE_TEST_ALGS = [Euler(),Midpoint(),RK4(),SSPRK22(),SSPRK33(),SSPRK53(), SSPRK53_2N1(), SSPRK53_2N2(),
   SSPRK63(),SSPRK73(),SSPRK83(),SSPRK432(),SSPRK932(),SSPRK54(),SSPRK104(),
   ORK256(), CarpenterKennedy2N54(), HSLDDRK64(), DGLDDRK73_C(), DGLDDRK84_C(), DGLDDRK84_F(), NDBLSRK124(), NDBLSRK134(), NDBLSRK144(),
@@ -50,21 +51,14 @@ for i in 1:50
 end
 
 println("Check some other integrators")
-Random.seed!(1)
 @test_nowarn sol = solve(prob,GenericImplicitEuler(nlsolve=OrdinaryDiffEq.NLSOLVEJL_SETUP(chunk_size=1)),callback=callback,dt=1/2)
-Random.seed!(2)
 @test_nowarn sol = solve(prob,GenericTrapezoid(nlsolve=OrdinaryDiffEq.NLSOLVEJL_SETUP(chunk_size=1)),callback=callback,dt=1/2)
-Random.seed!(3)
 @test_nowarn sol = solve(prob,Rosenbrock23(chunk_size=1),callback=callback,dt=1/2)
-Random.seed!(4)
 @test_nowarn sol = solve(prob,Rosenbrock32(chunk_size=1),callback=callback,dt=1/2)
 
 for alg in CACHE_TEST_ALGS
-  Random.seed!(hash(alg))
   @test_nowarn sol = solve(prob,alg,callback=callback,dt=1/2)
 end
 
-Random.seed!(5)
 @test_nowarn sol = solve(prob,Rodas4(chunk_size=1),callback=callback,dt=1/2)
-Random.seed!(6)
 @test_nowarn sol = solve(prob,Rodas5(chunk_size=1),callback=callback,dt=1/2)

From 3adc552de2c1d78ec736c21d9e6195a8cfeb288b Mon Sep 17 00:00:00 2001
From: Yingbo Ma <mayingbo5@gmail.com>
Date: Wed, 20 Mar 2019 18:04:09 -0400
Subject: [PATCH 12/15] Fix KenCarp's error on operators

---
 src/derivative_utils.jl | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/derivative_utils.jl b/src/derivative_utils.jl
index 9553b608a4..80d805aca5 100644
--- a/src/derivative_utils.jl
+++ b/src/derivative_utils.jl
@@ -162,7 +162,9 @@ function Base.convert(::Type{AbstractMatrix}, W::WOperator)
       rmul!(copyto!(W._concrete_form, W.mass_matrix), -1/W.gamma)
       axpy!(one(W.gamma), convert(AbstractMatrix,W.J), W._concrete_form)
     else
-      @. W._concrete_form = -W.mass_matrix
+      @inbounds for j in axes(W._concrete_form, 2), i in axes(W._concrete_form, 1)
+        W._concrete_form[i, j] = -W.mass_matrix[i, j]
+      end
       axpy!(W.gamma, convert(AbstractMatrix,W.J), W._concrete_form)
     end
   end

From 51ae0083e0f4afaa3af0bc0694c5b139bfb870f6 Mon Sep 17 00:00:00 2001
From: Yingbo Ma <mayingbo5@gmail.com>
Date: Thu, 21 Mar 2019 12:20:19 -0400
Subject: [PATCH 13/15] Fix CompositeAlgorithm errors

---
 src/alg_utils.jl                      | 14 ++++++++++++++
 src/derivative_utils.jl               |  8 ++++----
 src/derivative_wrappers.jl            | 16 +++++++++-------
 src/nlsolve/newton.jl                 |  1 +
 src/perform_step/firk_perform_step.jl |  2 +-
 test/stiffness_detection_test.jl      |  7 ++++---
 6 files changed, 33 insertions(+), 15 deletions(-)

diff --git a/src/alg_utils.jl b/src/alg_utils.jl
index cd9f180dcf..38535498fe 100644
--- a/src/alg_utils.jl
+++ b/src/alg_utils.jl
@@ -454,6 +454,20 @@ function unwrap_alg(integrator, is_stiff)
   end
 end
 
+function unwrap_cache(integrator, is_stiff)
+  alg   = integrator.alg
+  cache = integrator.cache
+  iscomp = alg isa CompositeAlgorithm
+  if !iscomp
+    return cache
+  elseif alg.choice_function isa AutoSwitch
+    num = is_stiff ? 2 : 1
+    return cache.caches[num]
+  else
+    return cache.caches[integrator.cache.current]
+  end
+end
+
 # Whether `uprev` is used in the algorithm directly.
 uses_uprev(alg::OrdinaryDiffEqAlgorithm, adaptive::Bool) = true
 uses_uprev(alg::ORK256, adaptive::Bool) = false
diff --git a/src/derivative_utils.jl b/src/derivative_utils.jl
index 80d805aca5..b010dbeb41 100644
--- a/src/derivative_utils.jl
+++ b/src/derivative_utils.jl
@@ -239,11 +239,11 @@ function LinearAlgebra.mul!(Y::AbstractVecOrMat, W::WOperator, B::AbstractVecOrM
   end
 end
 
-function do_newJ(integrator, alg::T, repeat_step)::Bool where T
+function do_newJ(integrator, alg::T, cache, repeat_step)::Bool where T
   repeat_step && return false
   !alg_can_repeat_jac(alg) && return true
   isnewton = T <: NewtonAlgorithm
-  isnewton && (T <: RadauIIA5 ? ( @unpack ηold,nl_iters = integrator.cache ) : ( @unpack ηold,nl_iters = integrator.cache.nlsolver ))
+  isnewton && (T <: RadauIIA5 ? ( @unpack ηold,nl_iters = cache ) : ( @unpack ηold,nl_iters = cache.nlsolver ))
   integrator.force_stepfail && return true
   # reuse J when there is fast convergence
   fastconvergence = nl_iters == 1 && ηold <= alg.new_jac_conv_bound
@@ -316,13 +316,13 @@ function calc_W!(integrator, cache::OrdinaryDiffEqMutableCache, dtgamma, repeat_
     is_compos && calc_J!(integrator, cache, true)
   elseif DiffEqBase.has_jac(f) && f.jac_prototype !== nothing
     # skip calculation of J if step is repeated
-    ( new_jac = do_newJ(integrator, alg, repeat_step) ) && DiffEqBase.update_coefficients!(W,uprev,p,t)
+    ( new_jac = do_newJ(integrator, alg, cache, repeat_step) ) && DiffEqBase.update_coefficients!(W,uprev,p,t)
     # skip calculation of W if step is repeated
     @label J2W
     ( new_W = do_newW(integrator, new_jac) ) && (W.transform = W_transform; set_gamma!(W, dtgamma))
   else # concrete W using jacobian from `calc_J!`
     # skip calculation of J if step is repeated
-    ( new_jac = do_newJ(integrator, alg, repeat_step) ) && calc_J!(integrator, cache, is_compos)
+    ( new_jac = do_newJ(integrator, alg, cache, repeat_step) ) && calc_J!(integrator, cache, is_compos)
     # skip calculation of W if step is repeated
     ( new_W = do_newW(integrator, new_jac) ) && jacobian2W!(W, mass_matrix, dtgamma, J, W_transform)
   end
diff --git a/src/derivative_wrappers.jl b/src/derivative_wrappers.jl
index 513b0d4674..3e713935ba 100644
--- a/src/derivative_wrappers.jl
+++ b/src/derivative_wrappers.jl
@@ -1,11 +1,12 @@
 function derivative!(df::AbstractArray{<:Number}, f, x::Union{Number,AbstractArray{<:Number}}, fx::AbstractArray{<:Number}, integrator, grad_config)
+    alg = unwrap_alg(integrator, true)
     tmp = length(x) # We calculate derivtive for all elements in gradient
-    if get_current_alg_autodiff(integrator.alg, integrator.cache)
+    if alg_autodiff(alg)
         ForwardDiff.derivative!(df, f, fx, x, grad_config)
         integrator.destats.nf += 1
     else
         DiffEqDiffTools.finite_difference_gradient!(df, f, x, grad_config)
-        fdtype = integrator.alg.diff_type
+        fdtype = alg.diff_type
         if fdtype == Val{:forward} || fdtype == Val{:central}
             tmp *= 2
             if eltype(df)<:Complex
@@ -22,7 +23,7 @@ function derivative(f, x::Union{Number,AbstractArray{<:Number}},
     local d
     tmp = length(x) # We calculate derivtive for all elements in gradient
     alg = unwrap_alg(integrator, true)
-    if get_current_alg_autodiff(integrator.alg, integrator.cache)
+    if alg_autodiff(alg)
       integrator.destats.nf += 1
       d = ForwardDiff.derivative(f, x)
     else
@@ -38,7 +39,7 @@ end
 function jacobian(f, x, integrator)
     alg = unwrap_alg(integrator, true)
     local tmp
-    if get_current_alg_autodiff(alg, integrator.cache)
+    if alg_autodiff(alg)
       J = jacobian_autodiff(f, x)
       tmp = 1
     else
@@ -62,11 +63,12 @@ jacobian_finitediff(f, x::AbstractArray, diff_type) =
     DiffEqDiffTools.finite_difference_jacobian(f, x, diff_type, eltype(x), Val{false})
 
 function jacobian!(J::AbstractMatrix{<:Number}, f, x::AbstractArray{<:Number}, fx::AbstractArray{<:Number}, integrator::DiffEqBase.DEIntegrator, jac_config)
-    if get_current_alg_autodiff(integrator.alg, integrator.cache)
+    alg = unwrap_alg(integrator, true)
+    if alg_autodiff(alg)
       ForwardDiff.jacobian!(J, f, fx, x, jac_config)
       integrator.destats.nf += 1
     else
-      isforward = integrator.alg.diff_type === Val{:forward}
+      isforward = alg.diff_type === Val{:forward}
       if isforward
         forwardcache = get_tmp_cache(integrator)[2]
         f(forwardcache, x)
@@ -75,7 +77,7 @@ function jacobian!(J::AbstractMatrix{<:Number}, f, x::AbstractArray{<:Number}, f
       else # not forward difference
         DiffEqDiffTools.finite_difference_jacobian!(J, f, x, jac_config)
       end
-      integrator.destats.nf += (integrator.alg.diff_type==Val{:complex} && eltype(x)<:Real || isforward) ? length(x) : 2length(x)
+      integrator.destats.nf += (alg.diff_type==Val{:complex} && eltype(x)<:Real || isforward) ? length(x) : 2length(x)
     end
     nothing
 end
diff --git a/src/nlsolve/newton.jl b/src/nlsolve/newton.jl
index ee2c7b9814..e88924741a 100644
--- a/src/nlsolve/newton.jl
+++ b/src/nlsolve/newton.jl
@@ -104,6 +104,7 @@ end
   @unpack t,dt,uprev,u,p,cache = integrator
   @unpack z,dz,tmp,ztmp,k,κtol,c,γ,max_iter = nlsolver
   @unpack W, new_W = nlcache
+  cache = unwrap_cache(integrator, true)
 
   # precalculations
   mass_matrix = integrator.f.mass_matrix
diff --git a/src/perform_step/firk_perform_step.jl b/src/perform_step/firk_perform_step.jl
index 9b9a1e45e8..922b48cfdc 100644
--- a/src/perform_step/firk_perform_step.jl
+++ b/src/perform_step/firk_perform_step.jl
@@ -214,7 +214,7 @@ end
   c1mc2= c1-c2
   κtol = κ*tol # used in Newton iteration
   γdt, αdt, βdt = γ/dt, α/dt, β/dt
-  (new_jac = do_newJ(integrator, alg, repeat_step)) && calc_J!(integrator, cache, is_compos)
+  (new_jac = do_newJ(integrator, alg, cache, repeat_step)) && calc_J!(integrator, cache, is_compos)
   if (new_W = do_newW(integrator, new_jac))
     @inbounds for II in CartesianIndices(J)
       W1[II] = -γdt * mass_matrix[Tuple(II)...] + J[II]
diff --git a/test/stiffness_detection_test.jl b/test/stiffness_detection_test.jl
index e76c94a98d..8da469325c 100644
--- a/test/stiffness_detection_test.jl
+++ b/test/stiffness_detection_test.jl
@@ -2,15 +2,16 @@ using OrdinaryDiffEq, Test
 using DiffEqProblemLibrary.ODEProblemLibrary: importodeproblems; importodeproblems()
 import DiffEqProblemLibrary.ODEProblemLibrary: van
 
-prob1 = ODEProblem(van,[0,2.],(0.0,6),inv(0.003))
+prob1 = ODEProblem(van,  [0,2.],(0.0,6),inv(0.003))
+prob2 = ODEProblem(van.f,[0,2.],(0.0,6),inv(0.003))
 # out-of-place test
 function _van(u, p, t)
   μ = p[1]
   [μ*((1-u[2]^2)*u[1] - u[2]),
    1*u[1]]
 end
-prob2 = ODEProblem(_van,[0,2.],(0.0,6),inv(0.003))
-probArr = [prob1, prob2]
+prob3 = ODEProblem(_van,[0,2.],(0.0,6),inv(0.003))
+probArr = [prob1, prob2, prob3]
 # Test if switching back and forth
 is_switching_fb(sol) = maximum(diff(findall(x->x==2, sol.alg_choice))) > 5
 for prob in probArr

From 7a2285fefe1e55ffc647e68fa7b9b8952db29ebc Mon Sep 17 00:00:00 2001
From: Yingbo Ma <mayingbo5@gmail.com>
Date: Thu, 21 Mar 2019 12:57:51 -0400
Subject: [PATCH 14/15] No loops in WOperator

---
 src/derivative_utils.jl | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/src/derivative_utils.jl b/src/derivative_utils.jl
index b010dbeb41..ef4faf1d39 100644
--- a/src/derivative_utils.jl
+++ b/src/derivative_utils.jl
@@ -159,13 +159,11 @@ function Base.convert(::Type{AbstractMatrix}, W::WOperator)
   else
     # Non-allocating
     if W.transform
-      rmul!(copyto!(W._concrete_form, W.mass_matrix), -1/W.gamma)
-      axpy!(one(W.gamma), convert(AbstractMatrix,W.J), W._concrete_form)
+      copyto!(W._concrete_form, W.mass_matrix)
+      axpby!(one(W.gamma), convert(AbstractMatrix,W.J), -inv(W.gamma), W._concrete_form)
     else
-      @inbounds for j in axes(W._concrete_form, 2), i in axes(W._concrete_form, 1)
-        W._concrete_form[i, j] = -W.mass_matrix[i, j]
-      end
-      axpy!(W.gamma, convert(AbstractMatrix,W.J), W._concrete_form)
+      copyto!(W._concrete_form, W.mass_matrix)
+      axpby!(W.gamma, convert(AbstractMatrix,W.J), -one(W.gamma), W._concrete_form)
     end
   end
   W._concrete_form

From 938b6270011f697ffb3d2f7a42ad3c1a975f180e Mon Sep 17 00:00:00 2001
From: Yingbo Ma <mayingbo5@gmail.com>
Date: Thu, 21 Mar 2019 13:06:07 -0400
Subject: [PATCH 15/15] =?UTF-8?q?Cache=20tests:=20make=20`=CE=98`=20range?=
 =?UTF-8?q?=20in=20`[0.25,=200.45)`?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 test/ode/ode_cache_tests.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/ode/ode_cache_tests.jl b/test/ode/ode_cache_tests.jl
index e17a3ac2bd..2ceaa984d3 100644
--- a/test/ode/ode_cache_tests.jl
+++ b/test/ode/ode_cache_tests.jl
@@ -33,7 +33,7 @@ affect! = function (integrator)
   u = integrator.u
   resize!(integrator,length(u)+1)
   maxidx = findmax(u)[2]
-  Θ = rand()
+  Θ = rand()/5 + 0.25
   u[maxidx] = Θ
   u[end] = 1-Θ
   nothing