In [150]:
import numpy as np 
import scipy 

# Lab 3: LU Factorization + Partial Pivoting 
We know that linear systems (at-least full rank matrices) of the form 
$$
    A x = b, 
$$
can be solved as 
$$
    x = A^{-1}b.
$$

1. $\mathcal{O}(n^3)$ complexity due to the matrix product.

## LU Factorization
The $LU$ decomposition (or factorization) of a matrix $A$ is defined as follows 
$$
A = LU,
$$
where $L$ is an upper triangular matrix and $U$ is a lower triangular matrix. If expanded, the decomposition looks as follows (for size 4)
$$
\begin{pmatrix}
    A_{11} & A_{12} & A_{13} & A_{14} \\
    A_{21} & A_{22} & A_{23} & A_{24} \\
    A_{31} & A_{32} & A_{33} & A_{34} \\
    A_{41} & A_{42} & A_{43} & A_{44} \\
\end{pmatrix} = 
\begin{pmatrix}
    1 & 0 & 0 & 0\\
    L_{21} & 1 & 0 & 0 \\
    L_{31} & L_{32} & 1 & 0 \\
    L_{41} & L_{42} & L_{43} & 1
\end{pmatrix}
\begin{pmatrix}
    U_{11} & U_{12} & U_{13} & U_{14} \\
    0 & U_{22} & U_{23} & U_{24}\\
    0 & 0 & U_{33} & U_{34} \\
    0 & 0 & 0 & U_{44} 
\end{pmatrix}
$$
We will revisit this later to demonstrate how this helps us to solve linear equations quicker than simply inverting.  



In [151]:
# Generating a random matrix for example
A = np.random.rand(5, 5).round(2) 
n = A.shape[1]
print(f"A:\n {A}")

# Scipy function for LU 
P, L, U = scipy.linalg.lu(A)
print(f"\n\n P:\n {P} \n\n L:\n {L.round(2)} \n\n U:\n {U.round(2)}")

A:
 [[0.38 0.12 0.9  0.76 0.42]
 [0.67 0.78 0.26 0.34 0.94]
 [0.73 0.24 0.2  0.57 0.16]
 [0.46 0.92 0.28 0.42 0.69]
 [0.31 0.77 0.36 0.51 0.72]]


 P:
 [[0. 0. 1. 0. 0.]
 [0. 0. 0. 1. 0.]
 [1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0.]
 [0. 0. 0. 0. 1.]] 

 L:
 [[ 1.    0.    0.    0.    0.  ]
 [ 0.63  1.    0.    0.    0.  ]
 [ 0.52 -0.01  1.    0.    0.  ]
 [ 0.92  0.73 -0.04  1.    0.  ]
 [ 0.42  0.87  0.18 -0.64  1.  ]] 

 U:
 [[ 0.73  0.24  0.2   0.57  0.16]
 [ 0.    0.77  0.15  0.06  0.59]
 [ 0.    0.    0.8   0.46  0.34]
 [ 0.    0.    0.   -0.21  0.38]
 [ 0.    0.    0.    0.    0.32]]


Now, we will now get into $LU$ decomposition algorithms, but let us introduce some tools first. One of the goals of this section is to re-do the $LU$ code you learnt in lecture using array slicing to demonstrate how powerful and intuitive the tool is. 

### THE LU ALGORITHM
The algorithm works by iterating through each diagonal element (aka pivot) and converting the matrix $A$ into an upper diagonal matrix $U$ using row operations. So for each pivot, we have to come up with row operations that set all the values underneath it to zero. The steps will look something like this - 
$$
\begin{pmatrix}
    A_{11} & A_{12} & A_{13} & A_{14} \\
    A_{21} & A_{22} & A_{23} & A_{24} \\
    A_{31} & A_{32} & A_{33} & A_{34} \\
    A_{41} & A_{42} & A_{43} & A_{44} 
\end{pmatrix} =
\begin{pmatrix}
    \color{red}B_{11} & B_{12} & B_{13} & B_{14} \\
    0      & B_{22} & B_{23} & B_{24} \\
    0      & B_{32} & B_{33} & B_{34} \\
    0      & B_{42} & B_{43} & B_{44} \\
\end{pmatrix}
\longrightarrow
\begin{pmatrix}
    C_{11} & C_{12} & C_{13} & C_{14} \\
    0      & \color{red}C_{22} & C_{23} & C_{24} \\
    0      & 0      & C_{33} & C_{34} \\
    0      & 0      & C_{43} & C_{44} \\
\end{pmatrix}
\longrightarrow
\begin{pmatrix}
    U_{11} & U_{12} & U_{13} & U_{14} \\
    0 & U_{22} & U_{23} & U_{24} \\
    0 & 0 & \color{red}U_{33} & U_{34} \\
    0 & 0 & 0 & U_{44} \\
\end{pmatrix}
$$
Note that the entire matrix changes when we apply a set of row-operations. Hence, the different notation for the matrix in each step. Let us make up a random matrix. Keep in mind the numpy.random.rand(.) function always generates a full-rank matrix.

In [152]:
# LU row operation step (matrix A, pivot point i)
def LU_ROW_OP(L, U, i):
    multipliers  =  U[i+1:, i] / U[i, i]
    L[i+1:, i]   =  multipliers
    U[i+1:, :]  -=  multipliers.reshape(-1, 1) * U[i, :] 
    return L, U

# Step by step visualization of LU factorization

Apply this row operation to each pivot point. Once the algorithm ends, the result will be an upper triangular matrix which will be U. 

In [153]:
A = np.random.rand(5, 5).round(2) # rounding to two digits for simplicity
n = A.shape[1]
print(f"A:\n {A}")

U = A.copy()  
L = np.eye(5)

# iterate trough each diagonal point (pivot) and apply the LU step
for i in range(n):
    L, U = LU_ROW_OP(L, U, i)
    print(f"\nStep {i+1}, Pivot [{i}, {i}]:\n {U.round(2)} \n")

A:
 [[0.84 0.42 0.64 0.74 0.03]
 [0.03 0.97 0.56 0.33 0.45]
 [0.6  0.96 0.05 0.11 0.91]
 [1.   0.04 0.15 0.92 0.62]
 [0.86 0.74 0.97 0.14 0.61]]

Step 1, Pivot [0, 0]:
 [[ 0.84  0.42  0.64  0.74  0.03]
 [ 0.    0.96  0.54  0.3   0.45]
 [ 0.    0.66 -0.41 -0.42  0.89]
 [ 0.   -0.46 -0.61  0.04  0.58]
 [ 0.    0.31  0.31 -0.62  0.58]] 


Step 2, Pivot [1, 1]:
 [[ 0.84  0.42  0.64  0.74  0.03]
 [ 0.    0.96  0.54  0.3   0.45]
 [ 0.    0.   -0.78 -0.63  0.58]
 [ 0.    0.   -0.35  0.19  0.8 ]
 [ 0.    0.    0.14 -0.72  0.43]] 


Step 3, Pivot [2, 2]:
 [[ 0.84  0.42  0.64  0.74  0.03]
 [ 0.    0.96  0.54  0.3   0.45]
 [ 0.    0.   -0.78 -0.63  0.58]
 [ 0.    0.    0.    0.47  0.54]
 [ 0.    0.    0.   -0.83  0.54]] 


Step 4, Pivot [3, 3]:
 [[ 0.84  0.42  0.64  0.74  0.03]
 [ 0.    0.96  0.54  0.3   0.45]
 [ 0.    0.   -0.78 -0.63  0.58]
 [ 0.    0.    0.    0.47  0.54]
 [ 0.    0.    0.    0.    1.49]] 


Step 5, Pivot [4, 4]:
 [[ 0.84  0.42  0.64  0.74  0.03]
 [ 0.    0.96  0.54  0.3   0.4

Next, we will implement this as a function to do some tests. 

# Basic LU Algorithm

In [154]:
# LU with no pivoting 
def basic_lu(A):

    # define L and U matrices 
    n = A.shape[0]
    L = np.eye(n)
    U = A.copy() 
    
    # iterating through each diagonal element 
    for i in range(n): # O(n)
        L, U = LU_ROW_OP(L, U, i) #O( (n-1)*n )
        
    return L, U 
    
# Total complexity = O(n * (n^2 - n)) = O(n^3 - n^2) = O(n^3)

In [155]:
L, U = basic_lu(A)

# check solution PA = LU (in this case, since there is no pivoting P = Identity matrix)
LU = L @ U

# L2 norm to compare A and L@U. 
err = np.linalg.norm(abs(A-LU)) 

#print(f"A:\n {A.round(2)} \n\n L:\n {L.round(2)} \n\n U:\n {U.round(2)} \n\n L@U: \n{L@U} \n\n")
print(f"\n reconstruction error (||A - LU||_2): {err}")


 reconstruction error (||A - LU||_2): 2.420669712248692e-16


# Basic LU (no pivoting) can fail in some cases

We will test our basic LU algorithm on three matrices 
1. A1 : The first pivot is zero
   - Setting the first pivot A[0, 0] leads to zero division when computing the multiplier. 
2. A2 : The first pivot is extremly small
   - Due to floating point innacuracies, extremely small pivot values can cause zero divisions. 
3. A3 : Singular matrix (randomly generated)
   - Why does a singular matrix fail LU? 
   
You will see that the basic LU algorithm will fail in these three cases. 

In [156]:
A1 = A.copy()
A2 = A.copy()
n  = A.shape[1]

# three cases where basic LU will fail
A1[0, 0] = 0                                      # Case1: The first pivot value is zero 
A2[0, 0] = 6.6e-40                                # Case2: The first pivot value is miniscule (eg. Planck's constant)
A3 = np.random.rand(n, 1) @ np.random.rand(1, n)  # Case3: singular matrix (That's right! the outer product of two vectors is a singular matrix.)

# compute the basic LU of the three cases 
L1, U1 = basic_lu(A1)
L2, U2 = basic_lu(A2)
L3, U3 = basic_lu(A3) 

# assert will terminate program if false
test1 = (np.isnan(L1) & np.isnan(U1)).any()        # check if there are any infinite values in L1 and U1
test2 = (np.isnan(L2) & np.isnan(U2)).any()        # same test with L2 and U2
test3 = (np.isnan(L3) & np.isnan(U3)).any()        # same test with L3 and U3 

print("\n *** If you're reading this message, The L, U matrices that we computed for the three matrices have infinite values (LU algorithm failed) *** \n")


 *** If you're reading this message, The L, U matrices that we computed for the three matrices have infinite values (LU algorithm failed) *** 



  multipliers  =  U[i+1:, i] / U[i, i]
  U[i+1:, :]  -=  multipliers.reshape(-1, 1) * U[i, :]
  multipliers  =  U[i+1:, i] / U[i, i]


# Partial pivoting. If zero pivot encountered, simply swap it with with a different row.
. 
There are various methods to determine which row to swap with.

In [157]:
# simple one line code to swap two rows, 'row1' and 'row2' of a matrix A
def swap_rows(A, row1, row2):
    A[[row1, row2], :] = A[[row2, row1], :]
    return A

# LU Factorization with Partial Pivoting (only swap rows)

In [158]:
# loop until you find the next non zero pivot (or if array ends)
def partial_pivot_simple(P, U, i):
    n_possibilities = U.shape[1] - i 
    while np.isclose(U[i, i], 0.0) and i < n_possibilities:
        P = swap_rows(P, i, i+1)           
        U = swap_rows(U, i, i+1)
        i = i + 1
    return P, U



# Use argmax to find the next largest value. 
def partial_pivot_argmax(P, U, i):
    # look at the column values below the pivot and find the largest
    piv_vals         = np.diag(U[i:, i:])
    piv_with_max_val = i + np.argmax(piv_vals)
    # swap rows with the 
    P = swap_rows(P, i, piv_with_max_val)           
    U = swap_rows(U, i, piv_with_max_val)
    return P, U

In [159]:
def partial_pivot_lu(A):
    n = A.shape[1]
    P, L = np.eye(n), np.eye(n)
    U = A.copy()

    # iterating through each pivot (diagonal point for each row)
    for i in range(n):   

        # if zero diagonal encountered, use of the partial pivoting functions
        if np.isclose(U[i, i], 0.0):
            P, U = partial_pivot_argmax(P, U, i)
            # P, U = partial_pivot_simple(P, U, i)
        
        # continue with the normal LU step 
        L, U = LU_ROW_OP(L, U, i)
        
    return P, L, U

In [160]:
# we will return to the three special cases we discussed above
P1, L1, U1 = partial_pivot_lu(A1) # A1[0, 0] = 0
P2, L2, U2 = partial_pivot_lu(A2) # A2[0, 0] = plancks constant (~6e-30, very close to zero)
P3, L3, U3 = partial_pivot_lu(A3) # singular matrix. This will still fail. Why?

err1 = np.linalg.norm( abs(P1@A1 - L1@U1) ) 
err2 = np.linalg.norm( abs(P2@A2 - L2@U2) ) 
print(f"\n Reconstruction error for A1 and A2 is {err1} and {err2} respectively \n")

assert (np.isnan(L3) & np.isnan(U3)).any()
print(f" \n If you're seeing this message then the code failed for A3, as expected, since it is a singular matrix. The bottom line is that there are no solutions for singular matrices\n")



 Reconstruction error for A1 and A2 is 6.812796007220768e-15 and 6.812796007220768e-15 respectively 

 
 If you're seeing this message then the code failed for A3, as expected, since it is a singular matrix. The bottom line is that there are no solutions for singular matrices



  multipliers  =  U[i+1:, i] / U[i, i]


# Forward/Backward Substitution

Now that we have code for decomposing a matrix $A$ into the $LU$ form, we can use it to solve system of linear equations

In [174]:
def forward_subst(L, b):
    n = L.shape[0]
    sol = np.zeros(n);
    sol[0] = b[0] / L[0, 0]
    
    for i in range(1, n): # O(n)
        sol[i] = (b[i] - L[i,:i]@sol[:i]) / L[i,i] # dot product (O(n))

    return sol

In [175]:
def backward_subst(L, b):
    n = L.shape[0]
    sol = np.zeros(n);
    sol[0] = b[0] / L[0, 0]
    
    for i in range(1, n): # O(n)
        sol[i] = (b[i] - L[i,i:]@sol[i:]) / L[i,i] # dot product (O(n))

    return sol

In [176]:
print(f"A:\n {A.round(2)} \n\n b:\n {b.round(2)} \n\n sol:\n {sol.round(2)}")

A:
 [[0.84 0.42 0.64 0.74 0.03]
 [0.03 0.97 0.56 0.33 0.45]
 [0.6  0.96 0.05 0.11 0.91]
 [1.   0.04 0.15 0.92 0.62]
 [0.86 0.74 0.97 0.14 0.61]] 

 b:
 [0.06 0.2  0.79 0.64 1.35] 

 sol:
 [ 0.07  0.21 -1.01  1.36  0.91]


In [171]:
# lets make up a random lower-triangular matrix (and random linear systems problem)
P, L, U = partial_pivot_lu(A)
sol = forward_subst(L, b)
sol = backward_subst(U, sol)

In [181]:
numpy_soln = np.linalg.solve(A, b)
assert np.isclose(sol, numpy_soln).all(), "Numpy and comuted soln not same!"

AssertionError: Numpy and comuted soln not same!

In [169]:
P

array([[1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1.]])