In [2]:
import numpy as np
from scipy.sparse import csr_matrix, csc_matrix, lil_matrix, vstack, hstack, save_npz, load_npz, block_diag, identity, random
from scipy.sparse.linalg import inv, spsolve, splu
from scipy.linalg import lu
from concurrent.futures import ProcessPoolExecutor
from multiprocessing import Pool, shared_memory
import matplotlib.pyplot as plt
import time
import cProfile
import pstats
import csv
import os
from tqdm import tqdm

In [21]:
import numpy as np
import scipy.sparse as sp

def lower_block_bidiagonal_nonsingular(n_blocks, block_size):
    """
    Generate a nonsingular sparse lower block bidiagonal matrix in CSR format.

    Parameters:
        n_blocks (int): Number of diagonal blocks.
        block_size (int): Size of each square block.

    Returns:
        scipy.sparse.csr_matrix: The resulting nonsingular sparse matrix.
        numpy.ndarray: The corresponding RHS vector.
    """
    N = n_blocks * block_size  # Total size of the matrix
    data, row_indices, col_indices = [], [], []

    # Generate diagonal (B_i) and lower diagonal (L_i) blocks
    for i in range(n_blocks):
        row_offset = i * block_size
        col_offset = i * block_size

        # Ensure nonzero entries in the main diagonal block (B_i)
        block_main = np.random.rand(block_size, block_size) + np.eye(block_size)  # Make B_i non-singular
        for r in range(block_size):
            for c in range(block_size):
                val = block_main[r, c]
                data.append(val)
                row_indices.append(row_offset + r)
                col_indices.append(col_offset + c)

        # Lower block (L_i), ensuring nonzero entries
        if i < n_blocks - 1:
            row_offset = (i + 1) * block_size
            col_offset = i * block_size
            block_lower = np.random.rand(block_size, block_size)  # Random values ensure nonzero entries

            for r in range(block_size):
                for c in range(block_size):
                    val = block_lower[r, c]
                    data.append(val)
                    row_indices.append(row_offset + r)
                    col_indices.append(col_offset + c)

    # Create sparse CSR matrix
    sparse_matrix = sp.csr_matrix((data, (row_indices, col_indices)), shape=(N, N))

    # Generate a random RHS vector (column vector)
    rhs_vector = np.random.rand(N, 1)  # Nx1 dense vector

    return sparse_matrix, rhs_vector

In [70]:
p = 4 # no. of processors
k = 2 # recursion/iteration depth
n = int(p*2**k)

n_blocks = n + 1
block_size = 2

A, f = lower_block_bidiagonal_nonsingular(n_blocks, block_size)
x = spsolve(A,f)

save_folder = "LBBM_p4" # Lower block bidiagonal matrix for 4 processors
save_npz(f"{save_folder}/n_{n_blocks}_mat.npz",A)
np.save(f"{save_folder}/n_{n_blocks}_rhs.npy",f)
np.save(f"{save_folder}/n_{n_blocks}_sol.npy",x)

In [71]:
A,f,x = load_npz(f"{save_folder}/n_{n_blocks}_mat.npz"), np.load(f"{save_folder}/n_{n_blocks}_rhs.npy"), np.load(f"{save_folder}/n_{n_blocks}_sol.npy")

In [None]:
def placeholder_name(A, f, block_size : int, processors : int):
    """ 
    Performs Block Cyclic Reduction (BCR) in parallel for solving lower block bidiagonal systems.

    Parameters:
    -----------
    A : scipy.sparse.csr_matrix or numpy.ndarray
        The coefficient matrix of size (N, N), where N = (n+1) * block_size.
        Must be a square lower block bidiagonal matrix.

    f : numpy.ndarray
        The right-hand side (RHS) vector of size (N, 1), corresponding to Ax = f.

    block_size : int
        The size of each block in the matrix.

    processors : int
        The number of processors used for parallel block cyclic reduction.

    Returns:
    --------
    x : numpy.ndarray
        The solution vector of size (N, 1) satisfying Ax = f.
    """
    N, M = A.shape
    assert N == M,  f"A must be sqaure but has dimensions {N}x{M}"
    n = (N - 1) // block_size
    assert n % processors == 0, f"A must have size (n+1)*block_size x (n+1)*block_size, where n = p * 2**k. p is not a multiple of n."
    nbyp = n // processors 
    assert ((nbyp & (nbyp-1) == 0) and nbyp != 0), f"A must have size (n+1)*block_size x (n+1)*block_size, where n = p * 2**k. n/p is not a power of two."
    print(n+1)

    row_index_start = block_size
    row_index_end = block_size*(1+nbyp)
    col_index_start = 0
    col_index_end = block_size*(nbyp+1)
    
    # Divide among the processors
    for processor in range(processors):
        A_copy = A[row_index_start:row_index_end, col_index_start:col_index_end]
        forward_placeholder(A_copy, f, block_size, processors)
        row_index_start = row_index_end
        row_index_end += nbyp*block_size 
        col_index_start = col_index_end - block_size
        col_index_end += block_size*nbyp
    

def forward_placeholder(A, f, block_size : int, processors : int):
    
    

np.set_printoptions(precision=1, suppress=True)
placeholder_name(A,f,block_size=block_size,processors=p)


17
4 5
4 5
4 5
4 5
