In [3]:
from pyspark.sql import SparkSession
import numpy as np

def cholesky_decomposition(A):
    """
    Performs Cholesky Decomposition on a given matrix A.
    A must be a symmetric positive-definite matrix.
    Returns the lower triangular matrix L such that A = L * L^T.
    """
    n = A.shape[0]
    L = np.zeros_like(A)

    for i in range(n):
        for j in range(i + 1):
            if i == j:
                L[i, j] = np.sqrt(A[i, i] - np.sum(L[i, :j] ** 2))
            else:
                L[i, j] = (A[i, j] - np.sum(L[i, :j] * L[j, :j])) / L[j, j]
    return L

In [4]:
# Initialize Spark Session
spark = SparkSession.builder.master("local[8]") \
                    .appName("Cholesky Decomposition") \
                    .config("spark.executor.memory", "6g") \
                    .config("spark.executor.cores", "8") \
                    .getOrCreate()

# Example matrices
matrices = [
    np.array([[25, -50], [-50, 101]]),
    np.array([[25, 15, -5], [15, 18, 0], [-5, 0, 11]]),
    np.array([[7, -2, -3, 0, -1, 0], [-2, 8, 0, 0, -1, 0], [-3, 0, 4, -1, 0, 0], [0, 0, -1, 5, 0, -2], [-1, -1, 0, 0, 4, 0], [0, 0, 0, -2, 0, 6]])
]

# Convert to RDD
rdd = spark.sparkContext.parallelize(matrices)

# Apply Cholesky Decomposition
result_rdd = rdd.map(lambda mat: cholesky_decomposition(mat))

# Collect results
results = result_rdd.collect()

# Print results
for idx, res in enumerate(results):
    print(f"Matrix {idx + 1}:\n{res}\n")

spark.stop()

                                                                                

Matrix 1:
[[  5   0]
 [-10   1]]

Matrix 2:
[[ 5  0  0]
 [ 3  3  0]
 [-1  1  3]]

Matrix 3:
[[ 2  0  0  0  0  0]
 [-1  2  0  0  0  0]
 [-1  0  1  0  0  0]
 [ 0  0 -1  2  0  0]
 [ 0  0  0  0  2  0]
 [ 0  0  0 -1  0  2]]

