diff --git a/NDTensors/Project.toml b/NDTensors/Project.toml
index d687d67f9c..a32ba6814e 100644
--- a/NDTensors/Project.toml
+++ b/NDTensors/Project.toml
@@ -12,6 +12,7 @@ Folds = "41a02a25-b8f0-4f67-bc48-60067656b558"
 Functors = "d9f16b24-f501-4c13-a1f2-28368ffc5196"
 HDF5 = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
+MatrixFactorizations = "a3b82374-2e81-5b9e-98ce-41277c0e4c87"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 Requires = "ae029012-a4dd-5104-9daa-d747884805df"
 SimpleTraits = "699a6c99-e7fa-54fc-8d76-47d257e15c1d"
@@ -29,6 +30,7 @@ FLoops = "0.2.1"
 Folds = "0.2.8"
 Functors = "0.2, 0.3, 0.4"
 HDF5 = "0.14, 0.15, 0.16"
+MatrixFactorizations = "0.9.6"
 Requires = "1.1"
 SimpleTraits = "0.9.4"
 SplitApplyCombine = "1.2.2"
diff --git a/NDTensors/src/NDTensors.jl b/NDTensors/src/NDTensors.jl
index 96f72ef08b..1ec4b84f72 100644
--- a/NDTensors/src/NDTensors.jl
+++ b/NDTensors/src/NDTensors.jl
@@ -8,6 +8,7 @@ using FLoops
 using Folds
 using Random
 using LinearAlgebra
+using MatrixFactorizations
 using StaticArrays
 using Functors
 using HDF5
diff --git a/NDTensors/src/blocksparse/linearalgebra.jl b/NDTensors/src/blocksparse/linearalgebra.jl
index 0954bb3383..6f96d6a34c 100644
--- a/NDTensors/src/blocksparse/linearalgebra.jl
+++ b/NDTensors/src/blocksparse/linearalgebra.jl
@@ -304,7 +304,9 @@ qr(T::BlockSparseTensor{<:Any,2}; kwargs...) = qx(qr, T; kwargs...)
 #  This code thanks to Niklas Tausendpfund 
 #  https://github.com/ntausend/variance_iTensor/blob/main/Hubig_variance_test.ipynb
 #
-function qx(qx::Function, T::BlockSparseTensor{<:Any,2}; kwargs...)
+function qx(
+  qx::Function, T::BlockSparseTensor{<:Any,2}; block_rtol=-1.0, return_Rp=false, kwargs...
+)
   ElT = eltype(T)
   # getting total number of blocks
   nnzblocksT = nnzblocks(T)
@@ -312,20 +314,24 @@ function qx(qx::Function, T::BlockSparseTensor{<:Any,2}; kwargs...)
 
   Qs = Vector{DenseTensor{ElT,2}}(undef, nnzblocksT)
   Xs = Vector{DenseTensor{ElT,2}}(undef, nnzblocksT)
+  if return_Rp
+    Xps = Vector{DenseTensor{ElT,2}}(undef, nnzblocksT)
+  end
 
   for (jj, b) in enumerate(eachnzblock(T))
     blockT = blockview(T, b)
-    QXb = qx(blockT; kwargs...) #call dense qr at src/linearalgebra.jl 387
+    QXb = qx(blockT; rtol=block_rtol, return_Rp, kwargs...) #call dense qr at src/linearalgebra.jl 387
 
     if (isnothing(QXb))
       return nothing
     end
 
-    Q, X = QXb
-    Qs[jj] = Q
-    Xs[jj] = X
+    Qs[jj] = QXb[1]
+    Xs[jj] = QXb[2]
+    if return_Rp
+      Xps[jj] = QXb[3]
+    end
   end
-
   #
   # Make the new index connecting Q and R  
   #
@@ -352,13 +358,17 @@ function qx(qx::Function, T::BlockSparseTensor{<:Any,2}; kwargs...)
 
   Q = BlockSparseTensor(ElT, undef, nzblocksQ, indsQ)
   X = BlockSparseTensor(ElT, undef, nzblocksX, indsX)
+  Xp = return_Rp ? BlockSparseTensor(ElT, undef, nzblocksX, indsX) : nothing
 
   for n in 1:nnzblocksT
     blockview(Q, nzblocksQ[n]) .= Qs[n]
     blockview(X, nzblocksX[n]) .= Xs[n]
+    if return_Rp
+      blockview(Xp, nzblocksX[n]) .= Xps[n]
+    end
   end
 
-  return Q, X
+  return Q, X, Xp
 end
 
 function exp(
diff --git a/NDTensors/src/exports.jl b/NDTensors/src/exports.jl
index 427f220e10..853da4a7d0 100644
--- a/NDTensors/src/exports.jl
+++ b/NDTensors/src/exports.jl
@@ -80,3 +80,7 @@ export
 
   # linearalgebra.jl
   qr
+
+if VERSION >= v"1.7"
+  export RowNorm
+end
diff --git a/NDTensors/src/imports.jl b/NDTensors/src/imports.jl
index 13b466fca4..c32dcba281 100644
--- a/NDTensors/src/imports.jl
+++ b/NDTensors/src/imports.jl
@@ -57,4 +57,6 @@ import Adapt: adapt_structure, adapt_storage
 
 import LinearAlgebra: diag, exp, norm, qr
 
+import MatrixFactorizations: ql, rq
+
 import TupleTools: isperm
diff --git a/NDTensors/src/linearalgebra/linearalgebra.jl b/NDTensors/src/linearalgebra/linearalgebra.jl
index 2a62a0a7bd..22e324f695 100644
--- a/NDTensors/src/linearalgebra/linearalgebra.jl
+++ b/NDTensors/src/linearalgebra/linearalgebra.jl
@@ -366,39 +366,180 @@ function LinearAlgebra.eigen(
   V = complex(tensor(Dense(vec(VM)), Vinds))
   return D, V, spec
 end
+#
+#  Trim out n rows of R based on norm(R_nn)<cutoff, where R_nn is bottom n rows of R. 
+#  Also trim the corresponding columns of Q. 
+#
+function trim_rows(
+  Q::AbstractMatrix, R::AbstractMatrix, atol::Float64, rtol::Float64; verbose=false
+)
+  nr = size(R, 1)
+  @assert size(Q, 2) == nr #Sanity check.
+  #
+  #  Find the largest n such than norm(R_nn)<=cutoff, where Rnn if the bottom right block with rows
+  #  from n:nr.  n=last_row_to_keep+1
+  #
+  last_row_to_keep = nr
+  do_atol, do_rtol = atol >= 0.0, rtol >= 0.0
+  # for r in nr:-1:1
+  #   Rnn=norm(R[r:nr, :])
+  #   R11=norm(R[1:r-1, :])
+  #   if (do_atol && Rnn > atol) || (do_rtol && Rnn/R11 > rtol)
+  #     last_row_to_keep = r
+  #     break
+  #   end
+  # end
+  #
+  #  Could also do the same test but only looking at the diagonals
+  #
+  dR = diag(R)
+  for r in nr:-1:1
+    Rnn = norm(dR[r:nr])
+    R11 = norm(dR[1:(r - 1)])
+    if (do_atol && Rnn > atol) || (do_rtol && Rnn / R11 > rtol)
+      last_row_to_keep = r
+      break
+    end
+  end
+
+  num_zero_rows = nr - last_row_to_keep
+  if num_zero_rows == 0
+    verbose &&
+      println("Rank Reveal removing $num_zero_rows rows with atol=$atol, rtol=$rtol")
+    return Q, R
+  end
+  #
+  # Useful output for trouble shooting.
+  #
+  if verbose
+    println("Rank Reveal removing $num_zero_rows rows with atol=$atol, rtol=$rtol")
+  end
+
+  return Q[:, 1:last_row_to_keep], R[1:last_row_to_keep, :]
+end
+
+if VERSION >= v"1.7"
+  struct RowNorm end #for row pivoting lq
+end
+
+qr(T::DenseTensor{<:Any,2}; kwargs...) = qx(qr, T; kwargs...)
+ql(T::DenseTensor{<:Any,2}; kwargs...) = qx(ql, T; kwargs...)
 
-function qr(T::DenseTensor{<:Any,2}; positive=false, kwargs...)
-  qxf = positive ? qr_positive : qr
-  return qx(qxf, T; kwargs...)
+# LinearAlgebra qr takes types like Val(true) of ColumnNorm for control of pivoting.
+# We some helper functions to deal with all these types changing between julia versions.
+#
+pivot_to_Bool(pivot::Bool)::Bool = pivot
+pivot_to_Bool(::Val{false})::Bool = false
+pivot_to_Bool(::Val{true})::Bool = true
+if VERSION < v"1.7"
+  call_pivot(bpivot::Bool, ::Function) = Val(bpivot)
 end
-function ql(T::DenseTensor{<:Any,2}; positive=false, kwargs...)
-  qxf = positive ? ql_positive : ql
-  return qx(qxf, T; kwargs...)
+if VERSION >= v"1.7"
+  pivot_to_Bool(pivot::NoPivot)::Bool = false
+  pivot_to_Bool(pivot::ColumnNorm)::Bool = true
+  pivot_to_Bool(pivot::RowNorm)::Bool = true
+  function call_pivot(bpivot::Bool, qx::Function)
+    if qx == qr
+      return bpivot ? ColumnNorm() : NoPivot() # LinearAlgebra
+    else
+      return Val(bpivot) # MatrixFactorizations
+    end
+  end
 end
 
+matrix(Q::LinearAlgebra.QRCompactWYQ) = Matrix(Q)
+function matrix(Q::MatrixFactorizations.QLPackedQ)
+  n, m = size(Q.factors)
+  if n <= m
+    return Matrix(Q)
+  else
+    return Q * Matrix(LinearAlgebra.I, m, m)
+  end
+end
 #
 #  Generic function for qr and ql decomposition of dense matrix.
 #  The X tensor = R or L.
 #
-function qx(qx::Function, T::DenseTensor{<:Any,2}; kwargs...)
-  QM, XM = qx(matrix(T))
-  # Be aware that if positive==false, then typeof(QM)=LinearAlgebra.QRCompactWYQ, not Matrix
-  # It gets converted to matrix below.
-  # Make the new indices to go onto Q and R
-  q, r = inds(T)
-  q = dim(q) < dim(r) ? sim(q) : sim(r)
-  IndsT = indstype(T) #get the index type
+function qx(
+  qx::Function,
+  T::DenseTensor{<:Any,2};
+  positive=false,
+  pivot=call_pivot(false, qx),
+  atol=-1.0, #absolute tolerance for rank reduction
+  rtol=-1.0, #relative tolerance for rank reduction
+  block_rtol=-1.0, #This is supposed to be for block sparse, but we reluctantly accept it here.
+  return_Rp=false,
+  verbose=false,
+  kwargs...,
+)
+  bpivot = pivot_to_Bool(pivot) #We need a simple bool for making decisions below.
+
+  if rtol < 0.0 && block_rtol >= 0.0
+    rtol = block_rtol
+  end
+  do_rank_reduction = (atol >= 0.0) || (rtol >= 0.0)
+  if do_rank_reduction && qx == ql
+    @warn "User requested rq/ql decomposition with atol=$atol, rtol=$rtol." *
+      "  Rank reduction requires column/row pivoting which is not supported for rq/ql decomposition in lapack/ITensors"
+    do_rank_reduction = false
+  end
+  if bpivot && qx == ql
+    @warn "User requested rq/ql decomposition with row/column pivoting." *
+      "  Pivoting is not supported for rq/ql decomposition in lapack/ITensors"
+    bpivot = false
+  end
+  if do_rank_reduction #if do_rank_reduction==false then don't change bpivot.
+    bpivot = true
+  end
+  if !bpivot && return_Rp
+    @warn "User requested return of Rp matrix with no pivoting." *
+      "  Please eneable QR/LQ with pivoting to return the Rp matrix."
+    return_Rp = false
+  end
+
+  pivot = call_pivot(bpivot, qx) #Convert the bool to whatever type the qx function expects.
+  if bpivot
+    QM, XM, p = qx(matrix(T), pivot) #with colun pivoting
+    QM, XM = trim_rows(Matrix(QM), XM, atol, rtol; verbose=verbose)
+  else
+    QM, XM = qx(matrix(T), pivot) #no column pivoting
+    QM = matrix(QM)
+    p = nothing
+  end
+  #
+  #  Gauge fix diagonal of X into positive definite form. 
+  #
+  positive && qx_positive!(qx, QM, XM)
+  #
+  #  undo the permutation on R, so the T=Q*R again.
+  #
+  if bpivot
+    if return_Rp
+      XMp = XM # If requested save the permuted columns version of X
+    end
+    XM = XM[:, invperm(p)] # un-permute the columns of X
+  end
+  #
+  # Make the new indices to go onto Q and X
+  #
+  IndsT = indstype(T) #get the indices type
+  @assert IndsT.parameters[1] == IndsT.parameters[2] #they better be the same!
+  IndexT = IndsT.parameters[1] #establish the single index type.
+  q = IndexT(size(XM)[1]) #create the Q--X link index.
   Qinds = IndsT((ind(T, 1), q))
   Xinds = IndsT((q, ind(T, 2)))
-  Q = tensor(Dense(vec(Matrix(QM))), Qinds) #Q was strided
+  Q = tensor(Dense(vec(QM)), Qinds)
   X = tensor(Dense(vec(XM)), Xinds)
-  return Q, X
+  if return_Rp
+    Xp = tensor(Dense(vec(XMp)), Xinds)
+  else
+    Xp = nothing
+  end
+
+  return Q, X, Xp
 end
 
-#
-# Just flip signs between Q and R to get all the diagonals of R >=0.
-# For rectangular M the indexing for "diagonal" is non-trivial.
-#
+# Required by svd_recursive 
 """
     qr_positive(M::AbstractMatrix)
 
@@ -423,74 +564,23 @@ function qr_positive(M::AbstractMatrix)
   return (Q, R)
 end
 
-"""
-    ql_positive(M::AbstractMatrix)
-
-Compute the QL decomposition of a matrix M
-such that the diagonal elements of L are
-non-negative. Such a QL decomposition of a
-matrix is unique. Returns a tuple (Q,L).
-"""
-function ql_positive(M::AbstractMatrix)
-  sparseQ, L = ql(M)
-  Q = convert(Matrix, sparseQ)
-  nr, nc = size(L)
-  dc = nc > nr ? nc - nr : 0 #diag is shifted over by dc if nc>nr
-  for c in 1:(nc - dc)
-    if L[c, c + dc] != 0.0 #sign(0.0)==0.0 so we don't want to zero out a column of Q.
-      sign_Lc = sign(L[c, c + dc])
-      if c <= nr && !isone(sign_Lc)
-        L[c, 1:(c + dc)] *= sign_Lc #only fip non-zero portion of the column.
-        Q[:, c] *= conj(sign_Lc)
-      end
-    end
-  end
-  return (Q, L)
-end
-
-#
-#  Lapack replaces A with Q & R carefully packed together.  So here we just copy a
-#  before letting lapack overwirte it. 
-#
-function ql(A::AbstractMatrix; kwargs...)
-  Base.require_one_based_indexing(A)
-  T = eltype(A)
-  AA = similar(A, LinearAlgebra._qreltype(T), size(A))
-  copyto!(AA, A)
-  return ql!(AA; kwargs...)
-end
 #
-# This is where the low level call to lapack actually occurs.  Most of the work is
-# about unpacking Q and L from the A matrix.
+#  Semi generic function for gauge fixing the diagonal of X into positive definite form.
+#  becuase the diagonal is difficult to locate for rectangular X (it moves between R and L)
+#  we use qx==ql to know if X is lower or upper.
 #
-function ql!(A::StridedMatrix{<:LAPACK.BlasFloat})
-  tau = Base.similar(A, min(size(A)...))
-  x = LAPACK.geqlf!(A, tau)
-  #save L from the lower portion of A, before orgql! mangles it!
-  nr, nc = size(A)
-  mn = min(nr, nc)
-  L = similar(A, (mn, nc))
-  for r in 1:mn
-    for c in 1:(r + nc - mn)
-      L[r, c] = A[r + nr - mn, c]
-    end
-    for c in (r + 1 + nc - mn):nc
-      L[r, c] = 0.0
-    end
-  end
-  # Now we need shift the orth vectors from the right side of Q over the left side, before
-  if (mn < nc)
-    for r in 1:nr
-      for c in 1:mn
-        A[r, c] = A[r, c + nc - mn]
+function qx_positive!(qx::Function, Q::AbstractMatrix, X::AbstractMatrix)
+  nr, nc = size(X)
+  dc = (nc > nr && qx == ql) ? nc - nr : 0 #diag is shifted over by dc if nc>nr
+  for c in 1:Base.min(nr, nc)
+    if X[c, c + dc] != 0.0 #sign(0.0)==0.0 so we don't want to zero out a column of Q.
+      sign_Xc = sign(X[c, c + dc])
+      if !isone(sign_Xc)
+        X[c, :] *= sign_Xc
+        Q[:, c] *= conj(sign_Xc)
       end
     end
-    for r in 1:nr
-      A = A[:, 1:mn] #whack the extra columns in A.
-    end
   end
-  LAPACK.orgql!(A, tau)
-  return A, L
 end
 
 # TODO: support alg keyword argument to choose the svd algorithm
diff --git a/NDTensors/test/linearalgebra.jl b/NDTensors/test/linearalgebra.jl
index 3a5a8902c7..42f7c00fc6 100644
--- a/NDTensors/test/linearalgebra.jl
+++ b/NDTensors/test/linearalgebra.jl
@@ -2,6 +2,10 @@ using NDTensors
 using LinearAlgebra
 using Test
 
+# Not available on CI machine that tests NDTensors.
+# using Random
+# Random.seed!(314159)
+
 @testset "random_orthog" begin
   n, m = 10, 4
   O1 = random_orthog(n, m)
@@ -20,58 +24,109 @@ end
   @test norm(U2 * U2' - Diagonal(fill(1.0, m))) < 1E-14
 end
 
-@testset "Dense $qx decomposition, elt=$elt, positve=$positive, singular=$singular" for qx in
-                                                                                        [
+@testset "Dense $qx decomposition, elt=$elt, positve=$positive, singular=$singular, rank_reveal=$rank_reveal, pivot=$pivot" for qx in
+                                                                                                                                [
     qr, ql
   ],
   elt in [Float64, ComplexF64, Float32, ComplexF32],
   positive in [false, true],
-  singular in [false, true]
+  singular in [false, true],
+  rank_reveal in [false, true],
+  pivot in [false, true],
+  return_Rp in [false, true]
+
+  if qx == ql && (rank_reveal || pivot)
+    continue
+  end
+
+  # avoid warnings.
+  if !(rank_reveal || pivot) && return_Rp
+    continue
+  end
 
-  eps = Base.eps(real(elt)) * 30 #this is set rather tight, so if you increase/change m,n you may have open up the tolerance on eps.
+  eps = Base.eps(real(elt)) * 30
+  #this is set rather tight, so if you increase/change m,n you may have open up the tolerance on eps.
+  atol = rank_reveal ? eps * 1.0 : -1.0
   n, m = 4, 8
-  Id = Diagonal(fill(1.0, min(n, m)))
   #
   # Wide matrix (more columns than rows)
   #
   A = randomTensor(elt, (n, m))
-  # We want to test 0.0 on the diagonal.  We need make all roaw equal to gaurantee this with numerical roundoff.
+  # We want to test 0.0 on the diagonal.  We need make all rows linearly dependent 
+  # gaurantee this with numerical roundoff.
   if singular
-    for i in 2:n
-      A[i, :] = A[1, :]
+    for i in 2:3
+      A[i, :] = A[1, :] * 1.05^n
     end
   end
-  Q, X = qx(A; positive=positive) #X is R or L.
+  # you can set verbose=true if you want to get debug output on rank reduction.
+  Q, X, Xp = qx(
+    A; positive=positive, atol=atol, pivot=pivot, return_Rp=return_Rp, verbose=false
+  ) #X is R or L. 
   @test A ≈ Q * X atol = eps
-  @test array(Q)' * array(Q) ≈ Id atol = eps
-  @test array(Q) * array(Q)' ≈ Id atol = eps
-  if positive
+  @test array(Q)' * array(Q) ≈ Diagonal(fill(1.0, dim(Q, 2))) atol = eps
+  if dim(Q, 1) == dim(Q, 2)
+    @test array(Q) * array(Q)' ≈ Diagonal(fill(1.0, min(n, m))) atol = eps
+  end
+  if positive && !rank_reveal && !pivot
     nr, nc = size(X)
     dr = qx == ql ? Base.max(0, nc - nr) : 0
     diagX = diag(X[:, (1 + dr):end]) #location of diag(L) is shifted dr columns over the right.
     @test all(real(diagX) .>= 0.0)
     @test all(imag(diagX) .== 0.0)
   end
+  if positive && !isnothing(Xp)
+    nr, nc = size(Xp)
+    dr = qx == ql ? Base.max(0, nc - nr) : 0
+    diagX = diag(Xp[:, (1 + dr):end]) #location of diag(L) is shifted dr columns over the right.
+    @test all(real(diagX) .>= 0.0)
+    @test all(imag(diagX) .== 0.0)
+  end
+
+  if atol >= 0 && singular
+    @test dim(Q, 2) == 2 #make sure the rank revealing mechanism hacked off the columns of Q (and rows of X).
+    @test dim(X, 1) == 2 #Redundant?
+  end
+  if (atol >= 0.0 || pivot) && qx == qr
+    @test !isnothing(Xp) == return_Rp
+  end
   #
   # Tall matrix (more rows than cols)
   #
   A = randomTensor(elt, (m, n)) #Tall array
   # We want to test 0.0 on the diagonal.  We need make all rows equal to gaurantee this with numerical roundoff.
   if singular
-    for i in 2:m
+    for i in 2:4
       A[i, :] = A[1, :]
     end
   end
-  Q, X = qx(A; positive=positive)
+  Q, X, Xp = qx(
+    A; positive=positive, atol=atol, pivot=pivot, return_Rp=return_Rp, verbose=false
+  )
   @test A ≈ Q * X atol = eps
-  @test array(Q)' * array(Q) ≈ Id atol = eps
-  if positive
+  @test array(Q)' * array(Q) ≈ Diagonal(fill(1.0, dim(Q, 2))) atol = eps
+  #@test array(Q) * array(Q)' no such relationship for tall matrices.
+  if positive && !rank_reveal && !pivot
     nr, nc = size(X)
     dr = qx == ql ? Base.max(0, nc - nr) : 0
     diagX = diag(X[:, (1 + dr):end]) #location of diag(L) is shifted dr columns over the right.
     @test all(real(diagX) .>= 0.0)
     @test all(imag(diagX) .== 0.0)
   end
+  if positive && !isnothing(Xp)
+    nr, nc = size(Xp)
+    dr = qx == ql ? Base.max(0, nc - nr) : 0
+    diagX = diag(Xp[:, (1 + dr):end]) #location of diag(L) is shifted dr columns over the right.
+    @test all(real(diagX) .>= 0.0)
+    @test all(imag(diagX) .== 0.0)
+  end
+  if atol > 0 && singular
+    @test dim(Q, 2) == 4 #make sure the rank revealing mechanism hacked off the columns of Q (and rows of X).
+    @test dim(X, 1) == 4 #Redundant?
+  end
+  if (atol >= 0.0 || pivot) && qx == qr
+    @test !isnothing(Xp) == return_Rp
+  end
 end
 
 nothing
diff --git a/src/exports.jl b/src/exports.jl
index b5cfea5fa3..6aebdf67a5 100644
--- a/src/exports.jl
+++ b/src/exports.jl
@@ -393,3 +393,7 @@ export
   hasqns,
   nblocks,
   qn
+
+if VERSION >= v"1.7"
+  export RowNorm
+end
diff --git a/src/tensor_operations/matrix_decomposition.jl b/src/tensor_operations/matrix_decomposition.jl
index c0fd02943f..8b3c75d1f6 100644
--- a/src/tensor_operations/matrix_decomposition.jl
+++ b/src/tensor_operations/matrix_decomposition.jl
@@ -390,6 +390,8 @@ remove_trivial_index(Q::ITensor, R::ITensor, vαl, vαr) = (Q * dag(vαl), R * d
 remove_trivial_index(Q::ITensor, R::ITensor, ::Nothing, vαr) = (Q, R * dag(vαr))
 remove_trivial_index(Q::ITensor, R::ITensor, vαl, ::Nothing) = (Q * dag(vαl), R)
 remove_trivial_index(Q::ITensor, R::ITensor, ::Nothing, ::Nothing) = (Q, R)
+remove_trivial_index(R::ITensor, vαr) = R * dag(vαr)
+remove_trivial_index(R::ITensor, ::Nothing) = R
 
 #
 #  Force users to knowingly ask for zero indices using qr(A,()) syntax
@@ -422,8 +424,13 @@ lq(A::ITensor, Linds...; kwargs...) = lq(A, Linds, uniqueinds(A, Linds); kwargs.
 # Core function where both left and right indices are supplied as tuples or vectors
 # Handle default tags and dispatch to generic qx/xq functions.
 #
-function qr(A::ITensor, Linds::Indices, Rinds::Indices; tags=ts"Link,qr", kwargs...)
-  return qx(qr, A, Linds, Rinds; tags, kwargs...)
+function qr(
+  A::ITensor, Linds::Indices, Rinds::Indices; tags=ts"Link,qr", pivot=false, kwargs...
+)
+  if VERSION >= v"1.7" && typeof(pivot) == RowNorm
+    @warn "Please use ColumnNorm() instead of RowNorm() for pivoted qr decomposition."
+  end
+  return qx(qr, A, Linds, Rinds; tags, pivot=pivot, kwargs...)
 end
 function ql(A::ITensor, Linds::Indices, Rinds::Indices; tags=ts"Link,ql", kwargs...)
   return qx(ql, A, Linds, Rinds; tags, kwargs...)
@@ -431,13 +438,20 @@ end
 function rq(A::ITensor, Linds::Indices, Rinds::Indices; tags=ts"Link,rq", kwargs...)
   return xq(ql, A, Linds, Rinds; tags, kwargs...)
 end
-function lq(A::ITensor, Linds::Indices, Rinds::Indices; tags=ts"Link,lq", kwargs...)
-  return xq(qr, A, Linds, Rinds; tags, kwargs...)
+function lq(
+  A::ITensor, Linds::Indices, Rinds::Indices; tags=ts"Link,lq", pivot=false, kwargs...
+)
+  if VERSION >= v"1.7" && typeof(pivot) == ColumnNorm
+    @warn "Please use RowNorm() instead of ColumnNorm() for pivoted lq decomposition."
+  end
+  return xq(qr, A, Linds, Rinds; tags, pivot=pivot, kwargs...)
 end
 #
 #  Generic function implementing both qr and ql decomposition. The X tensor = R or L. 
 #
-function qx(qx::Function, A::ITensor, Linds::Indices, Rinds::Indices; tags, kwargs...)
+function qx(
+  qx::Function, A::ITensor, Linds::Indices, Rinds::Indices; tags, return_Rp=false, kwargs...
+)
   # Strip out any extra indices that are not in A.
   # Unit test test/base/test_itensor.jl line 1469 will fail without this.
   Linds = commoninds(A, Linds)
@@ -458,7 +472,8 @@ function qx(qx::Function, A::ITensor, Linds::Indices, Rinds::Indices; tags, kwar
   #
   AC = permute(AC, cL, cR; allow_alias=true)
 
-  QT, XT = qx(tensor(AC); kwargs...) #pass order(AC)==2 matrix down to the NDTensors level where qr/ql are implemented.
+  QXp = qx(tensor(AC); return_Rp, kwargs...) #pass order(AC)==2 matrix down to the NDTensors level where qr/ql are implemented.
+  QT, XT = QXp[1], QXp[2]
   #
   #  Undo the combine oepration, to recover all tensor indices.
   #
@@ -472,17 +487,27 @@ function qx(qx::Function, A::ITensor, Linds::Indices, Rinds::Indices; tags, kwar
   q = commonind(Q, X)
   Q = settags(Q, tags, q)
   X = settags(X, tags, q)
-  q = settags(q, tags)
 
-  return Q, X, q
+  # repeat all operations of Xp if requested by user.
+  if return_Rp && length(QXp) == 3 # GPU code does not support new features yet, so we check length as well.
+    Xp = itensor(QXp[3]) * dag(CR)
+    Xp = remove_trivial_index(Xp, vαr)
+    Xp = settags(Xp, tags, q)
+  else
+    Xp = nothing
+  end
+
+  q = settags(q, tags) #fix tags of q last.
+
+  return Q, X, q, Xp
 end
 
 #
 #  Generic function implementing both rq and lq decomposition. Implemented using qr/ql 
 #  with swapping the left and right indices.  The X tensor = R or L. 
 #
-function xq(qx::Function, A::ITensor, Linds::Indices, Rinds::Indices; tags, kwargs...)
-  Q, X, q = qx(A, Rinds, Linds; kwargs...)
+function xq(qxf::Function, A::ITensor, Linds::Indices, Rinds::Indices; tags, kwargs...)
+  Q, X, q, perm = qx(qxf, A, Rinds, Linds; tags=tags, kwargs...)
   #
   # fix up the tag name for the index between Q and L.
   #  
@@ -490,7 +515,7 @@ function xq(qx::Function, A::ITensor, Linds::Indices, Rinds::Indices; tags, kwar
   X = settags(X, tags, q)
   q = settags(q, tags)
 
-  return X, Q, q
+  return X, Q, q, perm
 end
 
 polar(A::ITensor; kwargs...) = error(noinds_error_message("polar"))
diff --git a/test/base/test_decomp.jl b/test/base/test_decomp.jl
index 68601f35a1..189265981b 100644
--- a/test/base/test_decomp.jl
+++ b/test/base/test_decomp.jl
@@ -1,4 +1,6 @@
-using ITensors, LinearAlgebra, Test
+using ITensors, LinearAlgebra, Test, Random
+
+Random.seed!(314159)
 
 #
 #  Decide if rank 2 tensor is upper triangular, i.e. all zeros below the diagonal.
@@ -67,6 +69,43 @@ function is_upper(A::ITensor, r::Index)::Bool
 end
 is_lower(A::ITensor, r::Index)::Bool = is_upper(r, A)
 
+#
+#  Makes all columns lineary depenedent but scaled differently.
+#
+function rank_fix(A::ITensor, Linds...)
+  Lis = commoninds(A, (Linds...))
+  Ris = uniqueinds(A, Lis)
+  #
+  #  Use combiners to render A down to a rank 2 tensor ready matrix QR routine.
+  #
+  CL, CR = combiner(Lis...), combiner(Ris...)
+  cL, cR = combinedind(CL), combinedind(CR)
+  AC = A * CR * CL
+  if inds(AC) != IndexSet(cL, cR)
+    AC = permute(AC, cL, cR)
+  end
+  At = NDTensors.tensor(AC)
+  nc = dim(At, 2)
+  @assert nc >= 2
+  if hasqns(At)
+    # In this case we make each nz block have rank=1
+    expected_rank = nnzblocks(At)
+    for b in nzblocks(At)
+      bv = ITensors.blockview(At, b)
+      nr, nc = dims(bv)
+      for c in 2:nc
+        bv[:, c] = bv[:, 1] * 1.05^c
+      end
+    end
+  else
+    for c in 2:nc
+      At[:, c] = At[:, 1] * 1.05^c
+    end
+    expected_rank = 1
+  end
+  return itensor(At) * dag(CL) * dag(CR), expected_rank
+end
+
 function diag_upper(l::Index, A::ITensor)
   At = NDTensors.tensor(A * combiner(noncommoninds(A, l)...))
   if size(At) == (1,)
@@ -330,23 +369,28 @@ end
     A = randomITensor(l, s, s', r)
     Ainds = inds(A)
 
-    Q, R, q = qr(A, Ainds[1:ninds]; positive=true)
+    Q, R, q, p = qr(A, Ainds[1:ninds]; positive=true)
     @test min(diag_upper(q, R)...) > 0.0
     @test A ≈ Q * R atol = 1e-13
     @test Q * dag(prime(Q, q)) ≈ δ(Float64, q, q') atol = 1e-13
-    Q, L, q = ql(A, Ainds[1:ninds]; positive=true)
+    @test isnothing(p)
+    Q, L, q, p = ql(A, Ainds[1:ninds]; positive=true)
     @test min(diag_lower(q, L)...) > 0.0
     @test A ≈ Q * L atol = 1e-13
     @test Q * dag(prime(Q, q)) ≈ δ(Float64, q, q') atol = 1e-13
+    @test isnothing(p)
 
-    R, Q, q = rq(A, Ainds[1:ninds]; positive=true)
+    R, Q, q, p = rq(A, Ainds[1:ninds]; positive=true)
     @test min(diag_lower(q, R)...) > 0.0 #transpose R is lower
     @test A ≈ Q * R atol = 1e-13
     @test Q * dag(prime(Q, q)) ≈ δ(Float64, q, q') atol = 1e-13
-    L, Q, q = lq(A, Ainds[1:ninds]; positive=true)
+    @test isnothing(p)
+
+    L, Q, q, p = lq(A, Ainds[1:ninds]; positive=true)
     @test min(diag_upper(q, L)...) > 0.0 #transpose L is upper
     @test A ≈ Q * L atol = 1e-13
     @test Q * dag(prime(Q, q)) ≈ δ(Float64, q, q') atol = 1e-13
+    @test isnothing(p)
   end
 
   @testset "QR/QL block sparse with positive R" begin
@@ -354,12 +398,226 @@ end
     s = Index(QN("Sz", -1) => 1, QN("Sz", 1) => 1; tags="s")
     r = Index(QN("Sz", 0) => 3; tags="r")
     A = randomITensor(l, s, dag(s'), r)
-    Q, R, q = qr(A, l, s, dag(s'); positive=true)
+    Q, R, q, p = qr(A, l, s, dag(s'); positive=true)
     @test min(diag(R)...) > 0.0
     @test A ≈ Q * R atol = 1e-13
-    Q, L, q = ql(A, l, s, dag(s'); positive=true)
+    @test isnothing(p)
+
+    Q, L, q, p = ql(A, l, s, dag(s'); positive=true)
     @test min(diag(L)...) > 0.0
     @test A ≈ Q * L atol = 1e-13
+    @test isnothing(p)
+  end
+
+  @testset "Dense rank revealing QR/LQ decomp interface options" begin
+    l = Index(5, "l")
+    s = Index(2, "s")
+    r = Index(5, "r")
+    A = randomITensor(Float64, l, s, s', r)
+    qrinds = inds(A)[1:2]
+    rinds = noncommoninds(A, qrinds)
+    A, expected_rank = rank_fix(A, qrinds) #make all columns linear dependent on column 1, so rank==1.
+
+    Q, R = qr(A, l, s) # no pivoting
+    Q, R, iq = qr(A, qrinds) # no pivoting
+    @test dim(iq) == dim(qrinds)
+    Q, R, iq, Rp = qr(A, qrinds) # no pivoting
+    @test isnothing(Rp)
+    @test dim(iq) == dim(qrinds)
+
+    #  Q, R, iq, p = qx(A,qrinds; pivot=Val(false)) not supported
+    Q, R, iq, Rp = qr(A, qrinds; pivot=false) # no pivoting
+    @test isnothing(Rp)
+    @test dim(iq) == dim(qrinds)
+
+    Q, R, iq, Rp = qr(A, qrinds; pivot=true) # pivoting but no rank reduction, sets `pivot=ColumnNorm()` internally
+    @test isnothing(Rp)
+    @test dim(iq) == dim(qrinds)
+
+    Q, R, iq, Rp = qr(A, qrinds; pivot=true, return_Rp=true) # pivoting but no rank reduction, sets `pivot=ColumnNorm()` internally
+    @test !isnothing(Rp)
+    @test dims(Rp) == dims(R)
+    @test dim(iq) == dim(qrinds)
+
+    Q, R, iq, Rp = qr(A, qrinds; atol=1e-14, return_Rp=true) # absolute tolerance for rank reduction
+    @test !isnothing(Rp)
+    @test dims(Rp) == dims(R)
+    @test dim(iq) == expected_rank
+
+    Q, R, iq, Rp = qr(A, qrinds; rtol=1e-15, return_Rp=true) # relative tolerance for rank reduction
+    @test !isnothing(Rp)
+    @test dims(Rp) == dims(R)
+    @test dim(iq) == expected_rank
+
+    Q, R, iq, Rp = qr(A, qrinds; block_rtol=1e-15, return_Rp=true) # relative tolerance for rank reduction
+    @test !isnothing(Rp)
+    @test dims(Rp) == dims(R)
+    @test dim(iq) == expected_rank
+
+    #  LQ versions
+
+    L, Q = lq(A, l, s) # no pivoting
+    L, Q, iq = lq(A, qrinds) # no pivoting
+    @test dim(iq) == dim(qrinds)
+    L, Q, iq, Lp = lq(A, qrinds) # no pivoting
+    @test isnothing(Lp)
+    @test dim(iq) == dim(qrinds)
+
+    L, Q, iq, Lp = lq(A, qrinds; pivot=false) # no pivoting
+    @test isnothing(Lp)
+    @test dim(iq) == dim(qrinds)
+
+    L, Q, iq, Lp = lq(A, qrinds; pivot=true) # pivoting but no rank reduction, sets `pivot=ColumnNorm()` internally
+    @test isnothing(Lp)
+    @test dim(iq) == dim(qrinds)
+
+    L, Q, iq, Lp = lq(A, qrinds; pivot=true, return_Rp=true) # pivoting but no rank reduction, sets `pivot=ColumnNorm()` internally
+    @test !isnothing(Lp)
+    @test dims(Lp) == dims(L)
+    @test dim(iq) == dim(qrinds)
+
+    L, Q, iq, Lp = lq(A, qrinds; atol=1e-14, return_Rp=true) # absolute tolerance for rank reduction
+    @test !isnothing(Lp)
+    @test dims(Lp) == dims(L)
+    @test dim(iq) == expected_rank
+
+    L, Q, iq, Lp = lq(A, qrinds; rtol=1e-15, return_Rp=true) # relative tolerance for rank reduction
+    @test !isnothing(Lp)
+    @test dims(Lp) == dims(L)
+    @test dim(iq) == expected_rank
+
+    L, Q, iq, Lp = lq(A, qrinds; block_rtol=1e-15, return_Rp=true) # relative tolerance for rank reduction
+    @test !isnothing(Lp)
+    @test dims(Lp) == dims(L)
+    @test dim(iq) == expected_rank
+  end
+
+  if VERSION >= v"1.7"
+    @testset "Dense rank revealing QR/LQ decomp interface options, julia VERSION>=1.7" begin
+      l = Index(5, "l")
+      s = Index(2, "s")
+      r = Index(5, "r")
+      A = randomITensor(Float64, l, s, s', r)
+      qrinds = inds(A)[1:2]
+      rinds = noncommoninds(A, qrinds)
+      A, expected_rank = rank_fix(A, qrinds) #make all columns linear dependent on column 1, so rank==1.
+
+      Q, R, iq, Rp = qr(A, qrinds; pivot=NoPivot()) # no pivoting
+      @test isnothing(Rp)
+      @test dim(iq) == dim(qrinds)
+
+      L, Q, iq, Lp = lq(A, qrinds; pivot=NoPivot()) # no pivoting
+      @test isnothing(Lp)
+      @test dim(iq) == dim(qrinds)
+
+      Q, R, iq, Rp = qr(A, qrinds; pivot=ColumnNorm()) # column pivoting, no rank reduction
+      @test isnothing(Rp)
+      @test dim(iq) == dim(qrinds)
+
+      Q, R, iq, Rp = qr(A, qrinds; pivot=ColumnNorm(), return_Rp=true) # column pivoting, no rank reduction
+      @test !isnothing(Rp)
+      @test dims(Rp) == dims(R)
+      @test dim(iq) == dim(qrinds)
+
+      L, Q, iq, Lp = lq(A, qrinds; pivot=RowNorm(), return_Rp=true) # row pivoting, no rank reduction
+      @test !isnothing(Lp)
+      @test dims(Lp) == dims(L)
+      @test dim(iq) == dim(qrinds)
+
+      @test_logs (
+        :warn, "Please use ColumnNorm() instead of RowNorm() for pivoted qr decomposition."
+      ) Q, R, iq, Rp = qr(A, qrinds; pivot=RowNorm(), return_Rp=true) # column pivoting, no rank reduction
+      @test !isnothing(Rp)
+      @test dims(Rp) == dims(R)
+      @test dim(iq) == dim(qrinds)
+
+      @test_logs (
+        :warn, "Please use RowNorm() instead of ColumnNorm() for pivoted lq decomposition."
+      ) L, Q, iq, Lp = lq(A, qrinds; pivot=ColumnNorm(), return_Rp=true) # row pivoting, no rank reduction
+      @test !isnothing(Lp)
+      @test dims(Lp) == dims(L)
+      @test dim(iq) == dim(qrinds)
+    end
+  end
+
+  @testset "Blocksparse rank revealing QR/LQ decomp interface options" begin
+    space = [QN("Sz", 0) => 4, QN("Sz", -2) => 4, QN("Sz", 2) => 4]
+    site_space = [QN("Sz", -1) => 1, QN("Sz", 1) => 1]
+    l = Index(space, "l")
+    s = Index(site_space, "s")
+    r = Index(space, "r")
+    A = randomITensor(Float64, l, s, s', r)
+    qrinds = inds(A)[1:2]
+    rinds = noncommoninds(A, qrinds)
+    A, expected_rank = rank_fix(A, qrinds) #make all columns linear dependent on column 1, so rank==1.
+
+    Q, R, iq, Rp = qr(A, qrinds; atol=1e-14) # absolute tolerance for rank reduction
+    @test isnothing(Rp)
+    @test dim(iq) == expected_rank
+
+    Q, R, iq, Rp = qr(A, qrinds; atol=1e-14, return_Rp=true) # absolute tolerance for rank reduction
+    @test !isnothing(Rp)
+    @test dims(Rp) == dims(R)
+    @test dim(iq) == expected_rank
+
+    Q, R, iq, Rp = qr(A, qrinds; block_rtol=1e-15, return_Rp=true) # relative tolerance for rank reduction
+    @test !isnothing(Rp)
+    @test dims(Rp) == dims(R)
+    @test dim(iq) == expected_rank
+
+    Q, R, iq, Rp = qr(A, qrinds; block_rtol=1e-15, rtol=1000.0, return_Rp=true) # rtol ignored.
+    @test !isnothing(Rp)
+    @test dims(Rp) == dims(R)
+    @test dim(iq) == expected_rank
+  end
+
+  @testset "Rank revealing QR/LQ decomp on MPO dense $elt tensor" for ninds in [1, 2, 3],
+    elt in [Float64, ComplexF64]
+
+    l = Index(5, "l")
+    s = Index(2, "s")
+    r = Index(5, "r")
+    A = randomITensor(elt, l, s, s', r)
+
+    Ainds = inds(A)
+    A, expected_rank = rank_fix(A, Ainds[1:ninds]) #make all columns linear dependent on column 1, so rank==1.
+    Q, R, q, Rp = qr(A, Ainds[1:ninds]; atol=1e-12)
+    @test dim(q) == expected_rank #check that we found rank==1
+    @test A ≈ Q * R atol = 1e-13
+    @test Q * dag(prime(Q, q)) ≈ δ(Float64, q, q') atol = 1e-13
+    @test isnothing(Rp)
+
+    L, Q, q, Rp = lq(A, Ainds[1:ninds]; atol=1e-12)
+    @test dim(q) == expected_rank #check that we found rank==1
+    @test A ≈ Q * L atol = 1e-13 #With ITensors L*Q==Q*L
+    @test Q * dag(prime(Q, q)) ≈ δ(Float64, q, q') atol = 1e-13
+    @test isnothing(Rp)
+  end
+
+  @testset "Rank revealing QR/LQ decomp on MPO block-sparse  $elt tensor" for ninds in
+                                                                              [1, 2, 3],
+    elt in [Float64]
+
+    space = [QN("Sz", 0) => 4, QN("Sz", -2) => 4, QN("Sz", 2) => 4]
+    site_space = [QN("Sz", -1) => 1, QN("Sz", 1) => 1]
+    l = Index(space, "l")
+    s = Index(site_space, "s")
+    r = Index(space, "r")
+    A = randomITensor(elt, l, s, s', r)
+
+    Ainds = inds(A)
+    A, expected_rank = rank_fix(A, Ainds[1:ninds]) #make all columns linear dependent on column 1, so rank==1.
+    Q, R, q, Rp = qr(A, Ainds[1:ninds]; block_rtol=1e-12)
+    @test dim(q) == expected_rank #check that we found teh correct rank
+    @test A ≈ Q * R atol = 1e-13
+    @test norm(dense(Q * dag(prime(Q, q))) - δ(Float64, q, q')) ≈ 0.0 atol = 1e-13
+    @test isnothing(Rp)
+
+    L, Q, q, Rp = lq(A, Ainds[1:ninds]; block_rtol=1e-12)
+    @test dim(q) == expected_rank #check that we found rank==1
+    @test A ≈ Q * L atol = 1e-13 #With ITensors L*Q==Q*L
+    @test norm(dense(Q * dag(prime(Q, q))) - δ(Float64, q, q')) ≈ 0.0 atol = 1e-13
+    @test isnothing(Rp)
   end
 
   @testset "factorize with QR" begin