In [None]:
import numpy as np
from numba import njit, prange
from numba.typed import Dict, List
from numba.core import types

# Function to generate data

def generate_data(alpha, mu, sigma, gam, beta, N, T, M, p, q, x=None, z=None):
    # print(f"N = {N}")
    # print(f"T = {T}")

    R = np.zeros((N, M))
    if sum(alpha) != 1:
        
        alpha = np.array(alpha) / sum(alpha)

    if len(alpha) != M or len(mu) != M:
        raise ValueError("M must be the size of alpha and mu")
    
    prior = np.random.uniform(size=N)
    alpha_cum = np.cumsum([0] + list(alpha))
    
    if M > 1:
        for m in range(M):
            lb = alpha_cum[m]
            ub = alpha_cum[m + 1]
            R[:, m] = ((prior > lb) & (prior <= ub)).astype(int)
    else:
        R = np.ones((N, M))

    Y = np.zeros((T, N))
    
    if q != 0 and x is None:
        x = np.random.normal(size=(N * T, q))
    if p != 0 and z is None:
        z = np.random.normal(size=(N * T, p))
    
    mu_R = np.dot(R, mu)
    sigma_R = np.dot(R, sigma)
    u = np.random.normal(size=(T, N))
    
    for nn in range(N):
        y_nn = np.zeros(T)
        y_nn = mu_R[nn] + sigma_R[nn] * u[:, nn]
        
        if q > 1:
            beta_R = np.dot(R, beta)
            y_nn += np.dot(x[(T * nn):(T * (nn + 1)), :], beta_R[nn, :])
        elif q == 1:
            beta_R = np.dot(R, np.ravel(beta))
            y_nn += x[(T * nn):(T * (nn + 1)), 0] * beta_R[nn]
        
        if p > 1:
            y_nn += np.dot(z[(T * nn):(T * (nn + 1)), :], gam)
        elif p == 1:
            y_nn += z[(T * nn):(T * (nn + 1)), 0] * gam
        
        Y[:, nn] = y_nn
    
    if p == 0:
        z = None
    if q == 0:
        x = None

    data_dict = {}
    data_dict['Y'] = Y
    data_dict['Z'] = z
    data_dict['X'] = x
    return data_dict


@njit
def create_indicator_list(data_c, T, N, n_bins):
    """
    Create a list of indicator matrices based on quantiles for each time period.
    """
    indicator_list = List()
    for t in range(T):
        # Calculate quantiles manually
        quantiles = np.empty(n_bins + 1)
        sorted_data = np.sort(data_c[t, :])
        for i in range(n_bins + 1):
            if i == 0:
                quantiles[i] = -np.inf
            elif i == n_bins:
                quantiles[i] = np.inf
            else:
                quantiles[i] = sorted_data[int(i * N / n_bins)]

        # Create indicator matrix
        indicator_matrix = np.zeros((N, n_bins))
        for n in range(N):
            for b in range(n_bins):
                if quantiles[b] <= data_c[t, n] < quantiles[b + 1]:
                    indicator_matrix[n, b] = 1
                    break
        indicator_list.append(indicator_matrix)
    return indicator_list

from numba import njit
from numba.typed import List
import numpy as np


@njit
def calculate_P_matrix(data_c, weights, n_grid=3, n_bins=2):
    """
    Calculate P matrices and Sigma matrices for triplets in a Numba-compatible way.
    """
    T = data_c.shape[0]
    N = data_c.shape[1]
    
    # Create `indicator_list_Y` with 2 bins
    indicator_list_Y = create_indicator_list(data_c, T, N, n_bins=n_bins)
    
    # Create `indicator_list_Y_ngrid` with `n_grid` bins
    indicator_list_Y_ngrid = create_indicator_list(data_c, T, N, n_bins=n_grid)
    
    # Initialize the result lists
    P_k_list = List()
    Sigma_P_k_list = List()
    
    # Iterate over the t periods
    for k in range(T):
        # Compute the Kronecker product for each row manually
        result_matrix = np.zeros((N, (n_bins ** (T - 1))))
        for n in range(N):
            # Manually compute Kronecker product for the row
            kron_result = np.array([1.0])  # Start with scalar 1.0
            for t in range(T):
                if t != k:
                    kron_result = np.kron(kron_result, indicator_list_Y[t][n, :])
            result_matrix[n, :] = kron_result
        
        # Compute P_k
        P_k = (weights * indicator_list_Y_ngrid[k].T) @ result_matrix
        P_k_list.append(P_k)
        
        # Compute Sigma_P_k
        P_k_vec = P_k.T.flatten()
        W_P_s = np.diag(P_k_vec) - np.outer(P_k_vec, P_k_vec)
        Sigma_P_k_list.append(W_P_s)

    
    return {
        "P_k_list": P_k_list,
        "Sigma_P_k_list": Sigma_P_k_list
    }



@njit
def compute_matrix_sqrt(U, S, VT):
    # Compute the square root of singular values
    sqrt_singular_values = np.sqrt(S)
    
    # Reconstruct the square root matrix
    sqrt_mat = U @ np.diag(sqrt_singular_values) @ VT
    return sqrt_mat

# Wrapper function to handle SVD outside Numba
def matrix_sqrt_svd(mat):
    if not isinstance(mat, np.ndarray):
        raise ValueError("Input must be a matrix (NumPy array).")
    
    if mat.shape[0] != mat.shape[1]:
        raise ValueError("Input must be a square matrix.")
    
    # Perform SVD decomposition (outside Numba)
    U, S, VT = np.linalg.svd(mat)
    
    # Compute the square root matrix (inside Numba)
    sqrt_mat = compute_matrix_sqrt(U, S, VT)
    return sqrt_mat


@njit
def matrix_sqrt(A):
    """Compute the square root of a matrix using eigen-decomposition."""
    # Eigen-decomposition of the matrix
    vals, vecs = np.linalg.eigh(A)
    # Compute the square root of eigenvalues
    sqrt_vals = np.sqrt(vals)
    # Reconstruct the matrix square root
    sqrt_A = vecs @ np.diag(sqrt_vals) @ vecs.T
    return sqrt_A

@njit
def invert_matrix(mat, epsilon=1e-8):
    """
    Numba-compatible function to compute the inverse of a square matrix.
    If the determinant is close to zero, the matrix is regularized by adding
    epsilon to the diagonal before inversion.

    Parameters:
        mat (ndarray): Input square matrix.
        epsilon (float): Small value added to the diagonal for regularization.

    Returns:
        ndarray: Inverse of the matrix.
    """
    # Ensure the matrix is square
    if mat.shape[0] != mat.shape[1]:
        # Numba cannot raise exceptions, so we return an empty array for invalid input
        return np.zeros_like(mat)
    
    # Compute the determinant
    det_val = np.linalg.det(mat)
    
    # Regularize the matrix if the determinant is close to zero
    if abs(det_val) < epsilon:
        mat = mat + np.eye(mat.shape[0]) * epsilon
    
    # Compute and return the inverse
    return np.linalg.inv(mat)

@njit
def compute_A_q_o(U_22, U_12):
    """Compute A_q_o."""
    sqrt_U22 = matrix_sqrt(U_22 @ U_22.T)
    inv_U22_T = invert_matrix(U_22.T)
    A_q_o = np.transpose(sqrt_U22 @ inv_U22_T @ np.hstack((U_12.T, U_22.T)))
    return A_q_o

@njit
def compute_B_q_o(V_22, V_12):
    """Compute B_q_o."""
    sqrt_V22 = matrix_sqrt(V_22 @ V_22.T)
    inv_V22_T = invert_matrix(V_22.T)
    B_q_o = sqrt_V22 @ inv_V22_T @ np.hstack((V_12.T, V_22.T))
    return B_q_o

@njit
def compute_kron_BA_o(B_q_o, A_q_o):
    """Compute the Kronecker product of B_q_o and A_q_o.T."""
    return np.kron(B_q_o, A_q_o.T)

@njit
def matrix_svd_decomposition(P, m):
    """
    Perform SVD decomposition and compute A_q_o, B_q_o, and Kronecker product.
    """
    # Perform SVD outside the Numba function
    U, S, VT = np.linalg.svd(P, full_matrices=True)
    V = VT.T
    
    # Submatrices of U and V
    U_12 = U[:m, m:]
    V_12 = V[:m, m:]
    U_22 = U[m:, m:]
    V_22 = V[m:, m:]
    
    # Compute A_q_o and B_q_o using Numba-compiled functions
    A_q_o = compute_A_q_o(U_22, U_12)
    B_q_o = compute_B_q_o(V_22, V_12)
    
    # Compute the Kronecker product
    kron_BA_o = compute_kron_BA_o(B_q_o, A_q_o)
    
    # Ensure all arrays are 2D
    S = S.reshape(-1, 1)  # Convert S to a 2D column vector
    U = np.atleast_2d(U)
    V = np.atleast_2d(V)
    U_12 = np.atleast_2d(U_12)
    V_12 = np.atleast_2d(V_12)
    U_22 = np.atleast_2d(U_22)
    V_22 = np.atleast_2d(V_22)
    A_q_o = np.atleast_2d(A_q_o)
    B_q_o = np.atleast_2d(B_q_o)
    kron_BA_o = np.atleast_2d(kron_BA_o)

    # Create a Numba-compatible dictionary
    numba_dict = Dict.empty(
        key_type=types.unicode_type,  # Keys are strings
        value_type=types.float64[:, :],  # Values are 2D arrays
    )

    # Add key-value pairs
    numba_dict["D"] = S
    numba_dict["U"] = U
    numba_dict["V"] = V
    numba_dict["U_12"] = U_12
    numba_dict["V_12"] = V_12
    numba_dict["U_22"] = U_22
    numba_dict["V_22"] = V_22
    numba_dict["A_q_o"] = A_q_o
    numba_dict["B_q_o"] = B_q_o
    numba_dict["kron_BA_o"] = kron_BA_o

    return numba_dict


    
@njit
def compute_stat(P, Sigma_P, P_svd, m, n_size, lambda_c):
    """
    Compute statistical metrics for the "P" or "Q" transform case.
    """
    # Extract SVD components
    A_q_o = P_svd["A_q_o"]
    B_q_o = P_svd["B_q_o"]
    kron_BA_o = P_svd["kron_BA_o"]

    # Compute lambda_q
    lambda_q = A_q_o.T @ P @ B_q_o.T - lambda_c

    # Compute Omega_q
    Omega_q = kron_BA_o @ Sigma_P @ kron_BA_o.T

    # Compute rk_c
    lambda_q_flat = lambda_q.flatten()
    Omega_q_inv = invert_matrix(Omega_q)
    rk_c = n_size * (lambda_q_flat @ Omega_q_inv @ lambda_q_flat) 

    return lambda_q, Omega_q, rk_c

@njit
def compute_criteria(rk_c, r, n_size):
    """
    Compute AIC, BIC, and HQ criteria.
    """
    AIC_c = rk_c - 2 * r
    BIC_c = rk_c - np.log(n_size) * r
    HQ_c = rk_c - 2 * np.log(np.log(n_size)) * r
    return AIC_c, BIC_c, HQ_c

@njit
def construct_stat_KP(P, Sigma_P, m, n_size, lambda_c=0):
    """
    Construct statistical metrics for the Kronecker Product and return 
    a Numba-compatible typed dictionary.
    """
    # Perform SVD decomposition
    P_svd = matrix_svd_decomposition(P, m)

    # Compute stats using Numba
    lambda_q, Omega_q, rk_c = compute_stat(P, Sigma_P, P_svd, m, n_size, lambda_c)

    # Compute the rank (r)
    r = Omega_q.shape[0]

    # Compute AIC, BIC, and HQ using Numba
    AIC_c, BIC_c, HQ_c = compute_criteria(rk_c, r, n_size)

    # Create a Numba-compatible dictionary to store results
    result_dict = Dict.empty(
        key_type=types.unicode_type,  # Keys are strings
        value_type=types.float64[:, :],  # Values are 2D arrays
    )

    # Add results to the dictionary
    result_dict["rk_c"] = np.array([[rk_c]])  # Scalars must be converted to 2D arrays
    result_dict["lambda_c"] = lambda_q
    result_dict["Omega_q"] = Omega_q
    result_dict["AIC_c"] = np.array([[AIC_c]])
    result_dict["BIC_c"] = np.array([[BIC_c]])
    result_dict["HQ_c"] = np.array([[HQ_c]])

    return result_dict

@njit
def compute_quantile(data, q):
    """
    Compute the q-th quantile manually.
    This replaces np.quantile for Numba compatibility.
    """
    sorted_data = np.sort(data)  # Sort the data
    idx = int(q * (len(sorted_data) - 1))  # Find the index for the quantile
    return sorted_data[idx]

@njit
def max_along_axis_1(matrix):
    """
    Compute the maximum along axis 1 for a 2D array.
    This replaces np.max(axis=1) for Numba compatibility.
    """
    n_rows, n_cols = matrix.shape
    max_values = np.empty(n_rows)  # Array to store max values for each row
    for i in range(n_rows):
        max_values[i] = -np.inf  # Initialize with negative infinity
        for j in range(n_cols):
            if matrix[i, j] > max_values[i]:
                max_values[i] = matrix[i, j]
    return max_values

@njit
def mean_along_axis_1(matrix):
    """
    Compute the mean along axis 1 for a 2D array.
    This replaces np.mean(array, axis=1) for Numba compatibility.
    """
    n_rows, n_cols = matrix.shape
    mean_values = np.empty(n_rows)  # Array to store mean values for each row
    for i in range(n_rows):
        row_sum = 0.0
        for j in range(n_cols):
            row_sum += matrix[i, j]
        mean_values[i] = row_sum / n_cols  # Compute mean for the row
    return mean_values

@njit
def non_par_test(data_nopar, N, T, M, p, q, nrep, n_grid, BB, r_test):
    # Result array
    result_rk_each = np.zeros((nrep,2))
    for ii in range(nrep):
        # Generate synthetic data (replace with actual logic)
        data_c = data_nopar[ii]  # Example: Replace this with your `generate_data` logic
        # Initialize weights
        # Compute P and Sigma matrices
        data_P_W = calculate_P_matrix(data_c, weights_equal, n_grid=n_grid, n_bins=2)
        
        # Initialize results
        rk = np.zeros(T)
        lambda_c_list = List()
        omega_c = List()
        Sigma_P_list = List()
        P_k_list = List()
        
        # Loop through T periods to compute statistics
        for k in range(T):
            # Extract P_k and Sigma_P_k from the data_P_W object
            P_k = data_P_W["P_k_list"][k]
            Sigma_P_k = data_P_W["Sigma_P_k_list"][k]
            
            # Compute KP statistics for the k-th triplet
            stat_KP = construct_stat_KP(P_k, Sigma_P_k, r_test, N)
            
            # Store results
            rk[k] = stat_KP["rk_c"][0,0]
            lambda_c_list.append(stat_KP["lambda_c"])
            omega_c.append(stat_KP["Omega_q"])
            Sigma_P_list.append(Sigma_P_k)
            P_k_list.append(P_k)
        # Initialize result matrix
        rk_b = np.zeros((BB, T))
        
        # Smoothed Nonparametric Bootstrap
        ru = np.random.exponential(scale=1, size=(BB, N))  # Exponential random variables
        row_sums = ru.sum(axis=1).reshape(-1, 1)  # Reshape to keep dimensions
        ru /= row_sums
        
        for i in range(BB):
            # Calculate bootstrapped P and Sigma_P matrices
            data_P_W_b = calculate_P_matrix(data_c, ru[i, :], n_grid=n_grid, n_bins=2)
            
            for k in range(T):
                P_k = data_P_W_b['P_k_list'][k]
                Sigma_P_k = data_P_W_b['Sigma_P_k_list'][k]
                # Compute KP statistics for the k-th triplet
                rk_b[i, k] = construct_stat_KP(P_k, Sigma_P_k, r_test, N, lambda_c_list[k])['rk_c'][0,0]
        # Compute max and mean values for rk and rk_b
        rk_b_max = max_along_axis_1(rk_b)  # Maximum of rk_b along axis 1
        rk_b_max_95 = compute_quantile(rk_b_max, 0.95)  # 95th quantile of rk_b_max

        
        # Store results
        result_rk_each[ii, 0] = 1 * (rk.max() > rk_b_max_95)
        rk_mean = np.mean(rk)  # Mean of rk (Numba supports this)
        rk_b_mean = mean_along_axis_1(rk_b)  # Mean of rk_b along axis 1
        rk_b_mean_95 = compute_quantile(rk_b_mean, 0.95)  # 95th quantile of rk_b_mean
        result_rk_each[ii, 1] = 1 * (rk_mean > rk_b_mean_95)
    return result_rk_each

In [None]:
# ----------------------------------------------------------
# LR test functions
# ----------------------------------------------------------

@njit
def process_array_or_none(arr, nt):
    if arr is None:  # Check for None
        return np.zeros((nt,0),dtype=np.float64)  # Default behavior for None
    return arr  # Process the array if it's valid

@njit
def safe_solve(A, b):
    """
    Numba-compatible replacement for np.linalg.solve.
    Solves Ax = b using LU decomposition if the matrix is not singular.
    """
    if np.linalg.cond(A) < SINGULAR_EPS:
        raise ValueError("Matrix is singular or near-singular.")
    return np.linalg.solve(A, b)

@njit
def log_likelihood_normal(y, mu, sigma):
    """
    Calculate the log-likelihood of the data under a normal distribution.

    Parameters:
    - y: array-like, the observed data points
    - mu: float, the mean of the normal distribution
    - sigma: float, the standard deviation of the normal distribution

    Returns:
    - log_likelihood: float, the log-likelihood value
    """
    n = len(y)
    term1 = -n / 2 * np.log(2 * np.pi)  # Constant term
    term2 = -n * np.log(sigma)  # Log of the standard deviation
    term3 = -1 / (2 * sigma**2) * np.sum((y - mu) ** 2)  # Data fitting term
    return term1 + term2 + term3

@njit
def log_likelihood_array(y, mu, sigma):
    """
    Calculate the log-likelihood of each element in the data under a normal distribution.

    Parameters:
    - y: array-like, the observed data points
    - mu: float, the mean of the normal distribution
    - sigma: float, the standard deviation of the normal distribution

    Returns:
    - log_likelihoods: array, log-likelihood of each data point
    """
    # Precompute constants
    constant = -0.5 * np.log(2 * np.pi)
    variance = sigma ** 2

    # Initialize result array
    log_likelihoods = np.empty(len(y))

    # Compute log-likelihood for each data point
    for i in range(len(y)):
        log_likelihoods[i] = (
            constant 
            - np.log(sigma) 
            - ((y[i] - mu) ** 2) / (2 * variance)
        )
    
    return log_likelihoods

import numpy as np
from numba import njit

@njit
def compute_residuals(m, n, t, sigma_jn, ytilde, mubeta_jn):
    """
    Compute residuals for the EM optimization loop in a Numba-compatible way.

    Parameters:
    - m: Number of components (int)
    - n: Number of groups (int)
    - t: Number of time points per group (int)
    - sigma_jn: Array of current sigma values (1D array of floats, shape (m,))
    - ytilde: Adjusted response variable (1D array of floats, shape (n * t,))
    - mubeta_jn: Array of current beta means (1D array of floats, shape (m,))

    Returns:
    - r: Residuals array (2D array of floats, shape (m, n))
    """
    # Initialize the residuals array
    r = np.zeros((m, n), dtype=np.float64)

    # Loop over each component (m)
    for j in range(m):
        
        # Loop over each group (n)
        for i in range(n):
            sum_r_t = 0.0

            # Loop over each time point within the group (t)
            for k in range(t):
                idx = i * t + k  # Compute the flattened index
                diff = ytilde[idx] - mubeta_jn[j]
                r_t = (1.0 / sigma_jn[j]) * diff
                sum_r_t += 0.5 * (r_t**2)

            # Compute residual for group i and component j
            r[j, i] = t * np.log(sigma_jn[j]) + sum_r_t
    return r

@njit
def min_along_axis_0(r):
    # Get the shape of the array
    rows, cols = r.shape
    
    # Initialize an array to store the minimum values for each column
    min_vals = np.empty(cols)
    
    # Iterate through each column
    for j in range(cols):
        # Initialize the minimum value for the current column
        min_val = r[0, j]
        
        # Iterate through each row in the current column
        for i in range(1, rows):
            if r[i, j] < min_val:
                min_val = r[i, j]
        
        # Store the minimum value for the column
        min_vals[j] = min_val
    
    return min_vals

@njit
def solve_linear_system_safe(A, b):
    """
    Safely solve the linear system Ax = b.
    If A is singular or nearly singular, return a default solution (e.g., zeros).
    """
    # Check if the matrix is singular
    det = np.linalg.det(A)
    if abs(det) < 1e-12:  # Threshold for singularity
        # Handle singular matrix (e.g., return zeros or raise an error)
        return np.zeros_like(b)  # Return a vector of zeros
    else:
        # Solve the system using np.linalg.solve
        return np.linalg.solve(A, b)
    
@njit
def solve_least_squares(A, b):
    """Solve the least squares problem Ax = b using the normal equation."""
    AtA = A.T @ A  # Compute A^T * A
    Atb = A.T @ b  # Compute A^T * b
    return solve_linear_system_safe(AtA, Atb)


@njit
def generate_random_uniform(low, high, size):
    """Generate random uniform samples using Numba."""
    out = np.empty(size)
    for i in range(size[0]):
        for j in range(size[1]):
            out[i, j] = low + (high - low) * np.random.random()
    return out


SINGULAR_EPS = 1e-10  # Criteria for matrix singularity
M_LN_SQRT_2PI = 0.9189385332046727  # log(sqrt(2*pi))


@njit
def EM_optimization(y, x, z, p, q, sigma_0, alpha_draw, mubeta_draw, sigma_draw, gamma_draw, m, t, an, maxit=2000, tol=1e-8, tau = 0.5, epsilon=0.05):
    
    nt = len(y)
    n = nt // t

    ninits = alpha_draw.shape[1]
    # Handle x
    if q == 0:
        x1 = np.ones((nt, 1))
        q1 = 1
    else:
        x1 = np.zeros((nt, x.shape[1] + 1))
        x1[:, 0] = 1  # Add intercept
        x1[:, 1:] = x
        q1 = x1.shape[1]
    
    # Initialize variables
    lb = np.zeros(m)
    ub = np.zeros(m)
    l_j = np.zeros(m)
    w = np.zeros((m, nt))
    post = np.zeros((m * n, ninits))
    notcg = np.zeros(ninits)
    penloglikset = np.zeros(ninits)
    loglikset = np.zeros(ninits)
    
    
    for jn in range(ninits):
        alpha_jn = alpha_draw[:, jn]
        mubeta_jn = mubeta_draw[:, jn]
        sigma_jn = sigma_draw[:, jn]
        gamma_jn = gamma_draw[:, jn]  # Likely float64
    
        oldpenloglik = -np.inf
        emit = 0
        diff = 1.0
        sing = 0
        
        for iter_ii in range(maxit):
            ll = -nt * M_LN_SQRT_2PI
            
            if p > 0:
                ytilde = y - np.dot(z, gamma_jn)
            else:
                ytilde = y
            
            r = compute_residuals(m, n, t, sigma_jn, ytilde, mubeta_jn)
            
            minr = min_along_axis_0(r)
            
            # Initialize arrays
            l_j = np.zeros((m,n))  # Same shape as `r`
            sum_l_j = np.zeros(n)   # Sum along axis 0
            w = np.zeros((m,n))    # Weights
            ll = 0.0                # Log-likelihood accumulator

            # Compute l_j = alpha_jn[:, None] * exp(minr - r)
            for i in range(n):
                for j in range(m):
                    l_j[j, i] = alpha_jn[j] * np.exp(minr[i] - r[j,i])
            
            # Compute sum_l_j = np.sum(l_j, axis=0)
            for j in range(m):
                for i in range(n):
                    sum_l_j[i] += l_j[j, i]
            
            # Compute w = l_j / sum_l_j
            for i in range(n):
                for j in range(m):
                    w[j, i] = l_j[j, i] / sum_l_j[i]
            
            # Compute ll += np.sum(np.log(sum_l_j) - minr)
            for i in range(n):
                ll += np.log(sum_l_j[i]) - minr[i]
            
            penloglik = ll + np.log(2.0) + min(np.log(tau), np.log(1 - tau))
            
            for j in range(m):
                s0j = sigma_0[j] / sigma_jn[j]
                penloglik += -an * (s0j**2 - 2.0 * np.log(s0j) - 1.0)
                penloglik += min(np.log(alpha_jn[j]), np.log(1 - alpha_jn[j]))
            diff = penloglik - oldpenloglik
            oldpenloglik = penloglik
            emit += 1
            
            # Update parameters
            mubeta_jn_mat = np.zeros((m,q1),dtype=np.float64)
            wtilde = np.zeros(nt)
            for j in range(m):
                alpha_jn[j] = np.mean(w[j, :])
                wtilde = w[j, :].T
                w_j = np.zeros(nt)
                for i in range(n):
                    w_j[i * t : (i + 1) * t] = wtilde[i]
                xtilde = np.zeros((nt, q1))
                for ii in range(q1):
                    xtilde[:, ii] = w_j * x1[:, ii]
                # design_matrix = xtilde.T @ x1
                # solve_linear_system_safe(xtilde.T @ x1, xtilde.T @ ytilde)
                # xtilde.T @ ytilde
                mubeta_jn_mat[j,:] = solve_linear_system_safe(xtilde.T @ x1, xtilde.T @ ytilde)
                ssr_j = np.sum(w_j * (ytilde - x1 @ mubeta_jn_mat[j,:])**2)
                sigma_jn[j] = np.sqrt((ssr_j + 2.0 * an * sigma_0[j]**2) / (np.sum(w_j) + 2.0 * an))
                sigma_jn[j] = max(sigma_jn[j], epsilon * sigma_0[j])
            
            # update alpha
            total_alpha = np.sum(alpha_jn)
            for j in range(m):
                alpha_jn[j] = max(0.01, alpha_jn[j] / total_alpha)
            
            # update gamma
            if p > 0:
                ztilde = np.zeros((nt, p), dtype=np.float64) 
                zz = np.zeros((p, p), dtype=np.float64) 
                ze = np.zeros((p, 1), dtype=np.float64) 
                for j in range(m):
                    wtilde = w[j, :]
                    w_j = np.zeros(nt)
                    for i in range(n):
                        w_j[i * t : (i + 1) * t] = wtilde[i]
                    for ii in range(p):
                        ztilde[:, ii] = w_j * z[:, ii]
                    zz += ztilde.T @ z / (sigma_jn[j]**2)
                    ze += ztilde.T @( y - x1 @ mubeta_jn_mat[j,:]) / (sigma_jn[j]**2)
                gamma_jn = solve_linear_system_safe(zz,ze).flatten()
            
        penloglikset[jn] = penloglik
        loglikset[jn] = ll
        post[:, jn] = w.T.flatten()
        alpha_draw[:, jn] = alpha_jn
        mubeta_draw[:, jn] = mubeta_jn_mat.T.flatten()
        sigma_draw[:, jn] = sigma_jn
        if p > 0:
            gamma_draw[:, jn] = gamma_jn
    return(alpha_draw,mubeta_draw,sigma_draw,gamma_draw,penloglikset, loglikset ,post)

In [None]:
@njit
def regpanelmixPMLE(y,x,z, p, q, m, ninits=10, epsilon=1e-8, maxit=2000, epsilon_short=1e-2, maxit_short=500): 
    
    t,n = y.shape
    nt = n * t
    y = y.T.flatten()
    
    # y.reshape((n,t)).T - data_lr[0][0] # check equivalence
    # Handle x
    
    x1 = np.hstack((np.ones((nt, 1)), x))
    q1 = q + 1
    

    xz = np.hstack((x1, z))
    
    out_coef = solve_least_squares(xz, y)  # Replace np.linalg.lstsq
    residuals = y - xz @ out_coef
    stdR = np.std(residuals)
    npar = m - 1 + (q1 + 1) * m + p
    ninits_short = ninits * 10 * (q1 + p) * m
    
    if (m == 1) :
        mubeta = out_coef[:q1]
        if p > 0:
            gamma = out_coef[q1:(q1 + p)]
        else:
            gamma = np.array([0.0])
        res = y - xz @ out_coef
        sigma = np.sqrt(np.mean(res**2))
        loglik = log_likelihood_normal(res,0,sigma)

        aic = -2 * loglik + 2 * npar
        bic = -2 * loglik + np.log(n) * npar
        penloglik = loglik
        alpha = np.array([1])
        postprobs = np.ones(n)
    else: 
        # First draw random start point
        if p > 0:
            gamma = out_coef[q1:(q1 + p)]
            # Perform least squares regression with both x and z
            gamma_draw = generate_random_uniform(0.5, 1.5, (p, ninits_short)) * gamma
            mubeta_hat = out_coef[:q1]
            y = y - z @ gamma
        else:
            # Perform least squares regression with x only
            
            gamma = np.array([0.0])
            mubeta_hat = out_coef
            gamma_draw = np.zeros((1,ninits_short), dtype=np.float64)

        # Initialize alpha
        alpha_draw = generate_random_uniform(0, 1, (m, ninits_short))
        alpha_draw = (alpha_draw / np.sum(alpha_draw, axis=0))

        # Initialize mubeta
        if q > 0:
            minMU = np.min(y - x @ mubeta_hat[1:])
            maxMU = np.max(y - x @ mubeta_hat[1:])
            mubeta_draw = np.zeros((q1 * m, ninits_short))
            for j in range(m):
                mubeta_draw[q1 * j, :] = np.random.uniform(minMU, maxMU, size=ninits_short)
                for i in range(1, q1):
                    mubeta_draw[q1 * j + i, :] = mubeta_hat[i] * np.random.uniform(-2, 2, size=ninits_short)
        else:
            minMU = np.min(y)
            maxMU = np.max(y)
            mubeta_draw = np.zeros((q1 * m, ninits_short))
            for j in range(m):
                mubeta_draw[q1 * j, :] = np.random.uniform(minMU, maxMU, size=ninits_short)
        
        an = 1 / n    
        sigma_0 = np.full(m, stdR)
    
        # Initialize sigma
        sigma_draw = generate_random_uniform(0.01, 1, (m, ninits_short)) * stdR
        
        alpha_draw,mubeta_draw,sigma_draw,gamma_draw,penloglikset, loglikset, post = EM_optimization(y, x, z, p, q, sigma_0, alpha_draw, mubeta_draw, sigma_draw, gamma_draw, m, t, an, maxit=maxit_short, tol=epsilon_short)
        
        # 
        components = np.argsort(penloglikset)[::-1][:ninits]
        alpha_draw = alpha_draw[:,components]
        mubeta_draw = mubeta_draw[:,components]
        sigma_draw = sigma_draw[:,components]
        gamma_draw = gamma_draw[:,components]
        
        
        alpha_draw,mubeta_draw,sigma_draw,gamma_draw,penloglikset, loglikset, post = EM_optimization(y, x, z, p, q, sigma_0, alpha_draw, mubeta_draw, sigma_draw, gamma_draw, m, t, an, maxit=maxit, tol=epsilon)
        
        index = np.argmax(penloglikset)
        alpha_hat = alpha_draw[:,index]
        mubeta_hat = mubeta_draw[:,index]
        sigma_hat = sigma_draw[:,index]
        gamma_hat = gamma_draw[:,index]
        post = post[:, index]
        penloglik = penloglikset[index]
        loglik = loglikset[index]
        aic = -2 * loglik + 2 * npar
        bic = -2 * loglik + np.log(n) * npar
        
        return(penloglik)
    
        # return(penloglik, loglik, aic, bic, alpha_hat, mubeta_hat, sigma_hat, gamma_hat, post)

In [None]:
@njit(parallel=True)
def compute_lr_stat(y_lr, x_lr, z_lr, m, p, q, nrep):
    
    # Preallocate lr_stat as a 1D array (Numba-compatible)
    lr_stat = np.zeros(nrep, dtype=np.float64)

    for ii in prange(nrep):
        # Extract y, x, z from data_lr (passed as separate arrays in Numba)
        y = y_lr[ii]  # y for replication ii
        x = x_lr[ii]  # x for replication ii
        z = z_lr[ii]  # z for replication ii

        # Call regpanelmixPMLE for m components
        penloglik = regpanelmixPMLE(y,x,z, p, q, m)
        
        # Call regpanelmixPMLE for m+1 components
        penloglik_m1 = regpanelmixPMLE(y, x, z, p, q, m + 1)

        # Compute likelihood ratio statistic
        lr_stat[ii] = -2 * (penloglik_m1 - penloglik)
    return lr_stat

In [None]:
@njit(parallel=True)
def compute_lr_stat(y_lr, x_lr, z_lr, m, p, q, nrep):
    
    # Preallocate lr_stat as a 1D array (Numba-compatible)
    lr_stat = np.zeros(nrep, dtype=np.float64)

    for ii in prange(nrep):
        # Extract y, x, z from data_lr (passed as separate arrays in Numba)
        y = y_lr[ii]  # y for replication ii
        x = x_lr[ii]  # x for replication ii
        z = z_lr[ii]  # z for replication ii

        # Call regpanelmixPMLE for m components
        penloglik = regpanelmixPMLE(y,x,z, p, q, m, ninits=1)
        
        # Call regpanelmixPMLE for m+1 components
        penloglik_m1 = regpanelmixPMLE(y, x, z, p, q, m + 1, ninits=1)

        # Compute likelihood ratio statistic
        lr_stat[ii] = -2 * (penloglik_m1 - penloglik)
    return lr_stat

In [None]:
import numpy as np
from numba import njit, prange
from numba.typed import Dict, List
from numba.core import types

# Function to generate data

def generate_data(alpha, mu, sigma, gam, beta, N, T, M, p, q, x=None, z=None):
    # print(f"N = {N}")
    # print(f"T = {T}")

    R = np.zeros((N, M))
    if sum(alpha) != 1:
        
        alpha = np.array(alpha) / sum(alpha)

    if len(alpha) != M or len(mu) != M:
        raise ValueError("M must be the size of alpha and mu")
    
    prior = np.random.uniform(size=N)
    alpha_cum = np.cumsum([0] + list(alpha))
    
    if M > 1:
        for m in range(M):
            lb = alpha_cum[m]
            ub = alpha_cum[m + 1]
            R[:, m] = ((prior > lb) & (prior <= ub)).astype(int)
    else:
        R = np.ones((N, M))

    Y = np.zeros((T, N))
    
    if q != 0 and x is None:
        x = np.random.normal(size=(N * T, q))
    if p != 0 and z is None:
        z = np.random.normal(size=(N * T, p))
    
    mu_R = np.dot(R, mu)
    sigma_R = np.dot(R, sigma)
    u = np.random.normal(size=(T, N))
    
    for nn in range(N):
        y_nn = np.zeros(T)
        y_nn = mu_R[nn] + sigma_R[nn] * u[:, nn]
        
        if q > 1:
            beta_R = np.dot(R, beta)
            y_nn += np.dot(x[(T * nn):(T * (nn + 1)), :], beta_R[nn, :])
        elif q == 1:
            beta_R = np.dot(R, np.ravel(beta))
            y_nn += x[(T * nn):(T * (nn + 1)), 0] * beta_R[nn]
        
        if p > 1:
            y_nn += np.dot(z[(T * nn):(T * (nn + 1)), :], gam)
        elif p == 1:
            y_nn += z[(T * nn):(T * (nn + 1)), 0] * gam
        
        Y[:, nn] = y_nn
    
    if p == 0:
        z = None
    if q == 0:
        x = None

    data_dict = {}
    data_dict['Y'] = Y
    data_dict['Z'] = z
    data_dict['X'] = x
    return data_dict


@njit
def create_indicator_list(data_c, T, N, n_bins):
    """
    Create a list of indicator matrices based on quantiles for each time period.
    """
    indicator_list = List()
    for t in range(T):
        # Calculate quantiles manually
        quantiles = np.empty(n_bins + 1)
        sorted_data = np.sort(data_c[t, :])
        for i in range(n_bins + 1):
            if i == 0:
                quantiles[i] = -np.inf
            elif i == n_bins:
                quantiles[i] = np.inf
            else:
                quantiles[i] = sorted_data[int(i * N / n_bins)]

        # Create indicator matrix
        indicator_matrix = np.zeros((N, n_bins))
        for n in range(N):
            for b in range(n_bins):
                if quantiles[b] <= data_c[t, n] < quantiles[b + 1]:
                    indicator_matrix[n, b] = 1
                    break
        indicator_list.append(indicator_matrix)
    return indicator_list

from numba import njit
from numba.typed import List
import numpy as np


@njit
def calculate_P_matrix(data_c, weights, n_grid=3, n_bins=2):
    """
    Calculate P matrices and Sigma matrices for triplets in a Numba-compatible way.
    """
    T = data_c.shape[0]
    N = data_c.shape[1]
    
    # Create `indicator_list_Y` with 2 bins
    indicator_list_Y = create_indicator_list(data_c, T, N, n_bins=n_bins)
    
    # Create `indicator_list_Y_ngrid` with `n_grid` bins
    indicator_list_Y_ngrid = create_indicator_list(data_c, T, N, n_bins=n_grid)
    
    # Initialize the result lists
    P_k_list = List()
    Sigma_P_k_list = List()
    
    # Iterate over the t periods
    for k in range(T):
        # Compute the Kronecker product for each row manually
        result_matrix = np.zeros((N, (n_bins ** (T - 1))))
        for n in range(N):
            # Manually compute Kronecker product for the row
            kron_result = np.array([1.0])  # Start with scalar 1.0
            for t in range(T):
                if t != k:
                    kron_result = np.kron(kron_result, indicator_list_Y[t][n, :])
            result_matrix[n, :] = kron_result
        
        # Compute P_k
        P_k = (weights * indicator_list_Y_ngrid[k].T) @ result_matrix
        P_k_list.append(P_k)
        
        # Compute Sigma_P_k
        P_k_vec = P_k.T.flatten()
        W_P_s = np.diag(P_k_vec) - np.outer(P_k_vec, P_k_vec)
        Sigma_P_k_list.append(W_P_s)

    
    return {
        "P_k_list": P_k_list,
        "Sigma_P_k_list": Sigma_P_k_list
    }



@njit
def compute_matrix_sqrt(U, S, VT):
    # Compute the square root of singular values
    sqrt_singular_values = np.sqrt(S)
    
    # Reconstruct the square root matrix
    sqrt_mat = U @ np.diag(sqrt_singular_values) @ VT
    return sqrt_mat

# Wrapper function to handle SVD outside Numba
def matrix_sqrt_svd(mat):
    if not isinstance(mat, np.ndarray):
        raise ValueError("Input must be a matrix (NumPy array).")
    
    if mat.shape[0] != mat.shape[1]:
        raise ValueError("Input must be a square matrix.")
    
    # Perform SVD decomposition (outside Numba)
    U, S, VT = np.linalg.svd(mat)
    
    # Compute the square root matrix (inside Numba)
    sqrt_mat = compute_matrix_sqrt(U, S, VT)
    return sqrt_mat


@njit
def matrix_sqrt(A):
    """Compute the square root of a matrix using eigen-decomposition."""
    # Eigen-decomposition of the matrix
    vals, vecs = np.linalg.eigh(A)
    # Compute the square root of eigenvalues
    sqrt_vals = np.sqrt(vals)
    # Reconstruct the matrix square root
    sqrt_A = vecs @ np.diag(sqrt_vals) @ vecs.T
    return sqrt_A

@njit
def invert_matrix(mat, epsilon=1e-8):
    """
    Numba-compatible function to compute the inverse of a square matrix.
    If the determinant is close to zero, the matrix is regularized by adding
    epsilon to the diagonal before inversion.

    Parameters:
        mat (ndarray): Input square matrix.
        epsilon (float): Small value added to the diagonal for regularization.

    Returns:
        ndarray: Inverse of the matrix.
    """
    # Ensure the matrix is square
    if mat.shape[0] != mat.shape[1]:
        # Numba cannot raise exceptions, so we return an empty array for invalid input
        return np.zeros_like(mat)
    
    # Compute the determinant
    det_val = np.linalg.det(mat)
    
    # Regularize the matrix if the determinant is close to zero
    if abs(det_val) < epsilon:
        mat = mat + np.eye(mat.shape[0]) * epsilon
    
    # Compute and return the inverse
    return np.linalg.inv(mat)

@njit
def compute_A_q_o(U_22, U_12):
    """Compute A_q_o."""
    sqrt_U22 = matrix_sqrt(U_22 @ U_22.T)
    inv_U22_T = invert_matrix(U_22.T)
    A_q_o = np.transpose(sqrt_U22 @ inv_U22_T @ np.hstack((U_12.T, U_22.T)))
    return A_q_o

@njit
def compute_B_q_o(V_22, V_12):
    """Compute B_q_o."""
    sqrt_V22 = matrix_sqrt(V_22 @ V_22.T)
    inv_V22_T = invert_matrix(V_22.T)
    B_q_o = sqrt_V22 @ inv_V22_T @ np.hstack((V_12.T, V_22.T))
    return B_q_o

@njit
def compute_kron_BA_o(B_q_o, A_q_o):
    """Compute the Kronecker product of B_q_o and A_q_o.T."""
    return np.kron(B_q_o, A_q_o.T)

@njit
def matrix_svd_decomposition(P, m):
    """
    Perform SVD decomposition and compute A_q_o, B_q_o, and Kronecker product.
    """
    # Perform SVD outside the Numba function
    U, S, VT = np.linalg.svd(P, full_matrices=True)
    V = VT.T
    
    # Submatrices of U and V
    U_12 = U[:m, m:]
    V_12 = V[:m, m:]
    U_22 = U[m:, m:]
    V_22 = V[m:, m:]
    
    # Compute A_q_o and B_q_o using Numba-compiled functions
    A_q_o = compute_A_q_o(U_22, U_12)
    B_q_o = compute_B_q_o(V_22, V_12)
    
    # Compute the Kronecker product
    kron_BA_o = compute_kron_BA_o(B_q_o, A_q_o)
    
    # Ensure all arrays are 2D
    S = S.reshape(-1, 1)  # Convert S to a 2D column vector
    U = np.atleast_2d(U)
    V = np.atleast_2d(V)
    U_12 = np.atleast_2d(U_12)
    V_12 = np.atleast_2d(V_12)
    U_22 = np.atleast_2d(U_22)
    V_22 = np.atleast_2d(V_22)
    A_q_o = np.atleast_2d(A_q_o)
    B_q_o = np.atleast_2d(B_q_o)
    kron_BA_o = np.atleast_2d(kron_BA_o)

    # Create a Numba-compatible dictionary
    numba_dict = Dict.empty(
        key_type=types.unicode_type,  # Keys are strings
        value_type=types.float64[:, :],  # Values are 2D arrays
    )

    # Add key-value pairs
    numba_dict["D"] = S
    numba_dict["U"] = U
    numba_dict["V"] = V
    numba_dict["U_12"] = U_12
    numba_dict["V_12"] = V_12
    numba_dict["U_22"] = U_22
    numba_dict["V_22"] = V_22
    numba_dict["A_q_o"] = A_q_o
    numba_dict["B_q_o"] = B_q_o
    numba_dict["kron_BA_o"] = kron_BA_o

    return numba_dict


    
@njit
def compute_stat(P, Sigma_P, P_svd, m, n_size, lambda_c):
    """
    Compute statistical metrics for the "P" or "Q" transform case.
    """
    # Extract SVD components
    A_q_o = P_svd["A_q_o"]
    B_q_o = P_svd["B_q_o"]
    kron_BA_o = P_svd["kron_BA_o"]

    # Compute lambda_q
    lambda_q = A_q_o.T @ P @ B_q_o.T - lambda_c

    # Compute Omega_q
    Omega_q = kron_BA_o @ Sigma_P @ kron_BA_o.T

    # Compute rk_c
    lambda_q_flat = lambda_q.flatten()
    Omega_q_inv = invert_matrix(Omega_q)
    rk_c = n_size * (lambda_q_flat @ Omega_q_inv @ lambda_q_flat) 

    return lambda_q, Omega_q, rk_c

@njit
def compute_criteria(rk_c, r, n_size):
    """
    Compute AIC, BIC, and HQ criteria.
    """
    AIC_c = rk_c - 2 * r
    BIC_c = rk_c - np.log(n_size) * r
    HQ_c = rk_c - 2 * np.log(np.log(n_size)) * r
    return AIC_c, BIC_c, HQ_c

@njit
def construct_stat_KP(P, Sigma_P, m, n_size, lambda_c=0):
    """
    Construct statistical metrics for the Kronecker Product and return 
    a Numba-compatible typed dictionary.
    """
    # Perform SVD decomposition
    P_svd = matrix_svd_decomposition(P, m)

    # Compute stats using Numba
    lambda_q, Omega_q, rk_c = compute_stat(P, Sigma_P, P_svd, m, n_size, lambda_c)

    # Compute the rank (r)
    r = Omega_q.shape[0]

    # Compute AIC, BIC, and HQ using Numba
    AIC_c, BIC_c, HQ_c = compute_criteria(rk_c, r, n_size)

    # Create a Numba-compatible dictionary to store results
    result_dict = Dict.empty(
        key_type=types.unicode_type,  # Keys are strings
        value_type=types.float64[:, :],  # Values are 2D arrays
    )

    # Add results to the dictionary
    result_dict["rk_c"] = np.array([[rk_c]])  # Scalars must be converted to 2D arrays
    result_dict["lambda_c"] = lambda_q
    result_dict["Omega_q"] = Omega_q
    result_dict["AIC_c"] = np.array([[AIC_c]])
    result_dict["BIC_c"] = np.array([[BIC_c]])
    result_dict["HQ_c"] = np.array([[HQ_c]])

    return result_dict

@njit
def compute_quantile(data, q):
    """
    Compute the q-th quantile manually.
    This replaces np.quantile for Numba compatibility.
    """
    sorted_data = np.sort(data)  # Sort the data
    idx = int(q * (len(sorted_data) - 1))  # Find the index for the quantile
    return sorted_data[idx]

@njit
def max_along_axis_1(matrix):
    """
    Compute the maximum along axis 1 for a 2D array.
    This replaces np.max(axis=1) for Numba compatibility.
    """
    n_rows, n_cols = matrix.shape
    max_values = np.empty(n_rows)  # Array to store max values for each row
    for i in range(n_rows):
        max_values[i] = -np.inf  # Initialize with negative infinity
        for j in range(n_cols):
            if matrix[i, j] > max_values[i]:
                max_values[i] = matrix[i, j]
    return max_values

@njit
def mean_along_axis_1(matrix):
    """
    Compute the mean along axis 1 for a 2D array.
    This replaces np.mean(array, axis=1) for Numba compatibility.
    """
    n_rows, n_cols = matrix.shape
    mean_values = np.empty(n_rows)  # Array to store mean values for each row
    for i in range(n_rows):
        row_sum = 0.0
        for j in range(n_cols):
            row_sum += matrix[i, j]
        mean_values[i] = row_sum / n_cols  # Compute mean for the row
    return mean_values

@njit
def non_par_test(data_nopar, N, T, M, p, q, nrep, n_grid, BB, r_test):
    # Result array
    result_rk_each = np.zeros((nrep,2))
    for ii in range(nrep):
        # Generate synthetic data (replace with actual logic)
        data_c = data_nopar[ii]  # Example: Replace this with your `generate_data` logic
        # Initialize weights
        # Compute P and Sigma matrices
        data_P_W = calculate_P_matrix(data_c, weights_equal, n_grid=n_grid, n_bins=2)
        
        # Initialize results
        rk = np.zeros(T)
        lambda_c_list = List()
        omega_c = List()
        Sigma_P_list = List()
        P_k_list = List()
        
        # Loop through T periods to compute statistics
        for k in range(T):
            # Extract P_k and Sigma_P_k from the data_P_W object
            P_k = data_P_W["P_k_list"][k]
            Sigma_P_k = data_P_W["Sigma_P_k_list"][k]
            
            # Compute KP statistics for the k-th triplet
            stat_KP = construct_stat_KP(P_k, Sigma_P_k, r_test, N)
            
            # Store results
            rk[k] = stat_KP["rk_c"][0,0]
            lambda_c_list.append(stat_KP["lambda_c"])
            omega_c.append(stat_KP["Omega_q"])
            Sigma_P_list.append(Sigma_P_k)
            P_k_list.append(P_k)
        # Initialize result matrix
        rk_b = np.zeros((BB, T))
        
        # Smoothed Nonparametric Bootstrap
        ru = np.random.exponential(scale=1, size=(BB, N))  # Exponential random variables
        row_sums = ru.sum(axis=1).reshape(-1, 1)  # Reshape to keep dimensions
        ru /= row_sums
        
        for i in range(BB):
            # Calculate bootstrapped P and Sigma_P matrices
            data_P_W_b = calculate_P_matrix(data_c, ru[i, :], n_grid=n_grid, n_bins=2)
            
            for k in range(T):
                P_k = data_P_W_b['P_k_list'][k]
                Sigma_P_k = data_P_W_b['Sigma_P_k_list'][k]
                # Compute KP statistics for the k-th triplet
                rk_b[i, k] = construct_stat_KP(P_k, Sigma_P_k, r_test, N, lambda_c_list[k])['rk_c'][0,0]
        # Compute max and mean values for rk and rk_b
        rk_b_max = max_along_axis_1(rk_b)  # Maximum of rk_b along axis 1
        rk_b_max_95 = compute_quantile(rk_b_max, 0.95)  # 95th quantile of rk_b_max

        
        # Store results
        result_rk_each[ii, 0] = 1 * (rk.max() > rk_b_max_95)
        rk_mean = np.mean(rk)  # Mean of rk (Numba supports this)
        rk_b_mean = mean_along_axis_1(rk_b)  # Mean of rk_b along axis 1
        rk_b_mean_95 = compute_quantile(rk_b_mean, 0.95)  # 95th quantile of rk_b_mean
        result_rk_each[ii, 1] = 1 * (rk_mean > rk_b_mean_95)
    return result_rk_each

In [None]:
# ----------------------------------------------------------
# LR test functions
# ----------------------------------------------------------

@njit
def process_array_or_none(arr, nt):
    if arr is None:  # Check for None
        return np.zeros((nt,0),dtype=np.float64)  # Default behavior for None
    return arr  # Process the array if it's valid

@njit
def safe_solve(A, b):
    """
    Numba-compatible replacement for np.linalg.solve.
    Solves Ax = b using LU decomposition if the matrix is not singular.
    """
    if np.linalg.cond(A) < SINGULAR_EPS:
        raise ValueError("Matrix is singular or near-singular.")
    return np.linalg.solve(A, b)

@njit
def log_likelihood_normal(y, mu, sigma):
    """
    Calculate the log-likelihood of the data under a normal distribution.

    Parameters:
    - y: array-like, the observed data points
    - mu: float, the mean of the normal distribution
    - sigma: float, the standard deviation of the normal distribution

    Returns:
    - log_likelihood: float, the log-likelihood value
    """
    n = len(y)
    term1 = -n / 2 * np.log(2 * np.pi)  # Constant term
    term2 = -n * np.log(sigma)  # Log of the standard deviation
    term3 = -1 / (2 * sigma**2) * np.sum((y - mu) ** 2)  # Data fitting term
    return term1 + term2 + term3

@njit
def log_likelihood_array(y, mu, sigma):
    """
    Calculate the log-likelihood of each element in the data under a normal distribution.

    Parameters:
    - y: array-like, the observed data points
    - mu: float, the mean of the normal distribution
    - sigma: float, the standard deviation of the normal distribution

    Returns:
    - log_likelihoods: array, log-likelihood of each data point
    """
    # Precompute constants
    constant = -0.5 * np.log(2 * np.pi)
    variance = sigma ** 2

    # Initialize result array
    log_likelihoods = np.empty(len(y))

    # Compute log-likelihood for each data point
    for i in range(len(y)):
        log_likelihoods[i] = (
            constant 
            - np.log(sigma) 
            - ((y[i] - mu) ** 2) / (2 * variance)
        )
    
    return log_likelihoods

import numpy as np
from numba import njit

@njit
def compute_residuals(m, n, t, sigma_jn, ytilde, mubeta_jn):
    """
    Compute residuals for the EM optimization loop in a Numba-compatible way.

    Parameters:
    - m: Number of components (int)
    - n: Number of groups (int)
    - t: Number of time points per group (int)
    - sigma_jn: Array of current sigma values (1D array of floats, shape (m,))
    - ytilde: Adjusted response variable (1D array of floats, shape (n * t,))
    - mubeta_jn: Array of current beta means (1D array of floats, shape (m,))

    Returns:
    - r: Residuals array (2D array of floats, shape (m, n))
    """
    # Initialize the residuals array
    r = np.zeros((m, n), dtype=np.float64)

    # Loop over each component (m)
    for j in range(m):
        
        # Loop over each group (n)
        for i in range(n):
            sum_r_t = 0.0

            # Loop over each time point within the group (t)
            for k in range(t):
                idx = i * t + k  # Compute the flattened index
                diff = ytilde[idx] - mubeta_jn[j]
                r_t = (1.0 / sigma_jn[j]) * diff
                sum_r_t += 0.5 * (r_t**2)

            # Compute residual for group i and component j
            r[j, i] = t * np.log(sigma_jn[j]) + sum_r_t
    return r

@njit
def min_along_axis_0(r):
    # Get the shape of the array
    rows, cols = r.shape
    
    # Initialize an array to store the minimum values for each column
    min_vals = np.empty(cols)
    
    # Iterate through each column
    for j in range(cols):
        # Initialize the minimum value for the current column
        min_val = r[0, j]
        
        # Iterate through each row in the current column
        for i in range(1, rows):
            if r[i, j] < min_val:
                min_val = r[i, j]
        
        # Store the minimum value for the column
        min_vals[j] = min_val
    
    return min_vals

@njit
def solve_linear_system_safe(A, b):
    """
    Safely solve the linear system Ax = b.
    If A is singular or nearly singular, return a default solution (e.g., zeros).
    """
    # Check if the matrix is singular
    det = np.linalg.det(A)
    if abs(det) < 1e-12:  # Threshold for singularity
        # Handle singular matrix (e.g., return zeros or raise an error)
        return np.zeros_like(b)  # Return a vector of zeros
    else:
        # Solve the system using np.linalg.solve
        return np.linalg.solve(A, b)
    
@njit
def solve_least_squares(A, b):
    """Solve the least squares problem Ax = b using the normal equation."""
    AtA = A.T @ A  # Compute A^T * A
    Atb = A.T @ b  # Compute A^T * b
    return solve_linear_system_safe(AtA, Atb)


@njit
def generate_random_uniform(low, high, size):
    """Generate random uniform samples using Numba."""
    out = np.empty(size)
    for i in range(size[0]):
        for j in range(size[1]):
            out[i, j] = low + (high - low) * np.random.random()
    return out


SINGULAR_EPS = 1e-10  # Criteria for matrix singularity
M_LN_SQRT_2PI = 0.9189385332046727  # log(sqrt(2*pi))


@njit
def EM_optimization(y, x, z, p, q, sigma_0, alpha_draw, mubeta_draw, sigma_draw, gamma_draw, m, t, an, maxit=2000, tol=1e-8, tau = 0.5, epsilon=0.05):
    
    nt = len(y)
    n = nt // t

    ninits = alpha_draw.shape[1]
    # Handle x
    if q == 0:
        x1 = np.ones((nt, 1))
        q1 = 1
    else:
        x1 = np.zeros((nt, x.shape[1] + 1))
        x1[:, 0] = 1  # Add intercept
        x1[:, 1:] = x
        q1 = x1.shape[1]
    
    # Initialize variables
    lb = np.zeros(m)
    ub = np.zeros(m)
    l_j = np.zeros(m)
    w = np.zeros((m, nt))
    post = np.zeros((m * n, ninits))
    notcg = np.zeros(ninits)
    penloglikset = np.zeros(ninits)
    loglikset = np.zeros(ninits)
    
    
    for jn in range(ninits):
        alpha_jn = alpha_draw[:, jn]
        mubeta_jn = mubeta_draw[:, jn]
        sigma_jn = sigma_draw[:, jn]
        gamma_jn = gamma_draw[:, jn]  # Likely float64
    
        oldpenloglik = -np.inf
        emit = 0
        diff = 1.0
        sing = 0
        
        for iter_ii in range(maxit):
            ll = -nt * M_LN_SQRT_2PI
            
            if p > 0:
                ytilde = y - np.dot(z, gamma_jn)
            else:
                ytilde = y
            
            r = compute_residuals(m, n, t, sigma_jn, ytilde, mubeta_jn)
            
            minr = min_along_axis_0(r)
            
            # Initialize arrays
            l_j = np.zeros((m,n))  # Same shape as `r`
            sum_l_j = np.zeros(n)   # Sum along axis 0
            w = np.zeros((m,n))    # Weights
            ll = 0.0                # Log-likelihood accumulator

            # Compute l_j = alpha_jn[:, None] * exp(minr - r)
            for i in range(n):
                for j in range(m):
                    l_j[j, i] = alpha_jn[j] * np.exp(minr[i] - r[j,i])
            
            # Compute sum_l_j = np.sum(l_j, axis=0)
            for j in range(m):
                for i in range(n):
                    sum_l_j[i] += l_j[j, i]
            
            # Compute w = l_j / sum_l_j
            for i in range(n):
                for j in range(m):
                    w[j, i] = l_j[j, i] / sum_l_j[i]
            
            # Compute ll += np.sum(np.log(sum_l_j) - minr)
            for i in range(n):
                ll += np.log(sum_l_j[i]) - minr[i]
            
            penloglik = ll + np.log(2.0) + min(np.log(tau), np.log(1 - tau))
            
            for j in range(m):
                s0j = sigma_0[j] / sigma_jn[j]
                penloglik += -an * (s0j**2 - 2.0 * np.log(s0j) - 1.0)
                penloglik += min(np.log(alpha_jn[j]), np.log(1 - alpha_jn[j]))
            diff = penloglik - oldpenloglik
            oldpenloglik = penloglik
            emit += 1
            
            # Update parameters
            mubeta_jn_mat = np.zeros((m,q1),dtype=np.float64)
            wtilde = np.zeros(nt)
            for j in range(m):
                alpha_jn[j] = np.mean(w[j, :])
                wtilde = w[j, :].T
                w_j = np.zeros(nt)
                for i in range(n):
                    w_j[i * t : (i + 1) * t] = wtilde[i]
                xtilde = np.zeros((nt, q1))
                for ii in range(q1):
                    xtilde[:, ii] = w_j * x1[:, ii]
                # design_matrix = xtilde.T @ x1
                # solve_linear_system_safe(xtilde.T @ x1, xtilde.T @ ytilde)
                # xtilde.T @ ytilde
                mubeta_jn_mat[j,:] = solve_linear_system_safe(xtilde.T @ x1, xtilde.T @ ytilde)
                ssr_j = np.sum(w_j * (ytilde - x1 @ mubeta_jn_mat[j,:])**2)
                sigma_jn[j] = np.sqrt((ssr_j + 2.0 * an * sigma_0[j]**2) / (np.sum(w_j) + 2.0 * an))
                sigma_jn[j] = max(sigma_jn[j], epsilon * sigma_0[j])
            
            # update alpha
            total_alpha = np.sum(alpha_jn)
            for j in range(m):
                alpha_jn[j] = max(0.01, alpha_jn[j] / total_alpha)
            
            # update gamma
            if p > 0:
                ztilde = np.zeros((nt, p), dtype=np.float64) 
                zz = np.zeros((p, p), dtype=np.float64) 
                ze = np.zeros((p, 1), dtype=np.float64) 
                for j in range(m):
                    wtilde = w[j, :]
                    w_j = np.zeros(nt)
                    for i in range(n):
                        w_j[i * t : (i + 1) * t] = wtilde[i]
                    for ii in range(p):
                        ztilde[:, ii] = w_j * z[:, ii]
                    zz += ztilde.T @ z / (sigma_jn[j]**2)
                    ze += ztilde.T @( y - x1 @ mubeta_jn_mat[j,:]) / (sigma_jn[j]**2)
                gamma_jn = solve_linear_system_safe(zz,ze).flatten()
            
        penloglikset[jn] = penloglik
        loglikset[jn] = ll
        post[:, jn] = w.T.flatten()
        alpha_draw[:, jn] = alpha_jn
        mubeta_draw[:, jn] = mubeta_jn_mat.T.flatten()
        sigma_draw[:, jn] = sigma_jn
        if p > 0:
            gamma_draw[:, jn] = gamma_jn
    return(alpha_draw,mubeta_draw,sigma_draw,gamma_draw,penloglikset, loglikset ,post)

In [None]:
@njit
def regpanelmixPMLE(y,x,z, p, q, m, ninits=10, epsilon=1e-8, maxit=2000, epsilon_short=1e-2, maxit_short=500): 
    
    t,n = y.shape
    nt = n * t
    y = y.T.flatten()
    
    # y.reshape((n,t)).T - data_lr[0][0] # check equivalence
    # Handle x
    
    x1 = np.hstack((np.ones((nt, 1)), x))
    q1 = q + 1
    

    xz = np.hstack((x1, z))
    
    out_coef = solve_least_squares(xz, y)  # Replace np.linalg.lstsq
    residuals = y - xz @ out_coef
    stdR = np.std(residuals)
    npar = m - 1 + (q1 + 1) * m + p
    ninits_short = ninits * 10 * (q1 + p) * m
    
    if (m == 1) :
        mubeta = out_coef[:q1]
        if p > 0:
            gamma = out_coef[q1:(q1 + p)]
        else:
            gamma = np.array([0.0])
        res = y - xz @ out_coef
        sigma = np.sqrt(np.mean(res**2))
        loglik = log_likelihood_normal(res,0,sigma)

        aic = -2 * loglik + 2 * npar
        bic = -2 * loglik + np.log(n) * npar
        penloglik = loglik
        alpha = np.array([1])
        postprobs = np.ones(n)
    else: 
        # First draw random start point
        if p > 0:
            gamma = out_coef[q1:(q1 + p)]
            # Perform least squares regression with both x and z
            gamma_draw = generate_random_uniform(0.5, 1.5, (p, ninits_short)) * gamma
            mubeta_hat = out_coef[:q1]
            y = y - z @ gamma
        else:
            # Perform least squares regression with x only
            
            gamma = np.array([0.0])
            mubeta_hat = out_coef
            gamma_draw = np.zeros((1,ninits_short), dtype=np.float64)

        # Initialize alpha
        alpha_draw = generate_random_uniform(0, 1, (m, ninits_short))
        alpha_draw = (alpha_draw / np.sum(alpha_draw, axis=0))

        # Initialize mubeta
        if q > 0:
            minMU = np.min(y - x @ mubeta_hat[1:])
            maxMU = np.max(y - x @ mubeta_hat[1:])
            mubeta_draw = np.zeros((q1 * m, ninits_short))
            for j in range(m):
                mubeta_draw[q1 * j, :] = np.random.uniform(minMU, maxMU, size=ninits_short)
                for i in range(1, q1):
                    mubeta_draw[q1 * j + i, :] = mubeta_hat[i] * np.random.uniform(-2, 2, size=ninits_short)
        else:
            minMU = np.min(y)
            maxMU = np.max(y)
            mubeta_draw = np.zeros((q1 * m, ninits_short))
            for j in range(m):
                mubeta_draw[q1 * j, :] = np.random.uniform(minMU, maxMU, size=ninits_short)
        
        an = 1 / n    
        sigma_0 = np.full(m, stdR)
    
        # Initialize sigma
        sigma_draw = generate_random_uniform(0.01, 1, (m, ninits_short)) * stdR
        
        alpha_draw,mubeta_draw,sigma_draw,gamma_draw,penloglikset, loglikset, post = EM_optimization(y, x, z, p, q, sigma_0, alpha_draw, mubeta_draw, sigma_draw, gamma_draw, m, t, an, maxit=maxit_short, tol=epsilon_short)
        
        # 
        components = np.argsort(penloglikset)[::-1][:ninits]
        alpha_draw = alpha_draw[:,components]
        mubeta_draw = mubeta_draw[:,components]
        sigma_draw = sigma_draw[:,components]
        gamma_draw = gamma_draw[:,components]
        
        
        alpha_draw,mubeta_draw,sigma_draw,gamma_draw,penloglikset, loglikset, post = EM_optimization(y, x, z, p, q, sigma_0, alpha_draw, mubeta_draw, sigma_draw, gamma_draw, m, t, an, maxit=maxit, tol=epsilon)
        
        index = np.argmax(penloglikset)
        alpha_hat = alpha_draw[:,index]
        mubeta_hat = mubeta_draw[:,index]
        sigma_hat = sigma_draw[:,index]
        gamma_hat = gamma_draw[:,index]
        post = post[:, index]
        penloglik = penloglikset[index]
        loglik = loglikset[index]
        aic = -2 * loglik + 2 * npar
        bic = -2 * loglik + np.log(n) * npar
        
        return(penloglik)
    
        # return(penloglik, loglik, aic, bic, alpha_hat, mubeta_hat, sigma_hat, gamma_hat, post)

In [None]:
@njit(parallel=True)
def compute_lr_stat(y_lr, x_lr, z_lr, m, p, q, nrep):
    
    # Preallocate lr_stat as a 1D array (Numba-compatible)
    lr_stat = np.zeros(nrep, dtype=np.float64)

    for ii in prange(nrep):
        # Extract y, x, z from data_lr (passed as separate arrays in Numba)
        y = y_lr[ii]  # y for replication ii
        x = x_lr[ii]  # x for replication ii
        z = z_lr[ii]  # z for replication ii

        # Call regpanelmixPMLE for m components
        penloglik = regpanelmixPMLE(y,x,z, p, q, m, ninits=1)
        
        # Call regpanelmixPMLE for m+1 components
        penloglik_m1 = regpanelmixPMLE(y, x, z, p, q, m + 1, ninits=1)

        # Compute likelihood ratio statistic
        lr_stat[ii] = -2 * (penloglik_m1 - penloglik)
    return lr_stat

Connected to base (Python 3.12.7)

In [None]:
import numpy as np
from numba import njit, prange
from numba.typed import Dict, List
from numba.core import types

# Function to generate data

def generate_data(alpha, mu, sigma, gam, beta, N, T, M, p, q, x=None, z=None):
    # print(f"N = {N}")
    # print(f"T = {T}")

    R = np.zeros((N, M))
    if sum(alpha) != 1:
        
        alpha = np.array(alpha) / sum(alpha)

    if len(alpha) != M or len(mu) != M:
        raise ValueError("M must be the size of alpha and mu")
    
    prior = np.random.uniform(size=N)
    alpha_cum = np.cumsum([0] + list(alpha))
    
    if M > 1:
        for m in range(M):
            lb = alpha_cum[m]
            ub = alpha_cum[m + 1]
            R[:, m] = ((prior > lb) & (prior <= ub)).astype(int)
    else:
        R = np.ones((N, M))

    Y = np.zeros((T, N))
    
    if q != 0 and x is None:
        x = np.random.normal(size=(N * T, q))
    if p != 0 and z is None:
        z = np.random.normal(size=(N * T, p))
    
    mu_R = np.dot(R, mu)
    sigma_R = np.dot(R, sigma)
    u = np.random.normal(size=(T, N))
    
    for nn in range(N):
        y_nn = np.zeros(T)
        y_nn = mu_R[nn] + sigma_R[nn] * u[:, nn]
        
        if q > 1:
            beta_R = np.dot(R, beta)
            y_nn += np.dot(x[(T * nn):(T * (nn + 1)), :], beta_R[nn, :])
        elif q == 1:
            beta_R = np.dot(R, np.ravel(beta))
            y_nn += x[(T * nn):(T * (nn + 1)), 0] * beta_R[nn]
        
        if p > 1:
            y_nn += np.dot(z[(T * nn):(T * (nn + 1)), :], gam)
        elif p == 1:
            y_nn += z[(T * nn):(T * (nn + 1)), 0] * gam
        
        Y[:, nn] = y_nn
    
    if p == 0:
        z = None
    if q == 0:
        x = None

    data_dict = {}
    data_dict['Y'] = Y
    data_dict['Z'] = z
    data_dict['X'] = x
    return data_dict


@njit
def create_indicator_list(data_c, T, N, n_bins):
    """
    Create a list of indicator matrices based on quantiles for each time period.
    """
    indicator_list = List()
    for t in range(T):
        # Calculate quantiles manually
        quantiles = np.empty(n_bins + 1)
        sorted_data = np.sort(data_c[t, :])
        for i in range(n_bins + 1):
            if i == 0:
                quantiles[i] = -np.inf
            elif i == n_bins:
                quantiles[i] = np.inf
            else:
                quantiles[i] = sorted_data[int(i * N / n_bins)]

        # Create indicator matrix
        indicator_matrix = np.zeros((N, n_bins))
        for n in range(N):
            for b in range(n_bins):
                if quantiles[b] <= data_c[t, n] < quantiles[b + 1]:
                    indicator_matrix[n, b] = 1
                    break
        indicator_list.append(indicator_matrix)
    return indicator_list

from numba import njit
from numba.typed import List
import numpy as np


@njit
def calculate_P_matrix(data_c, weights, n_grid=3, n_bins=2):
    """
    Calculate P matrices and Sigma matrices for triplets in a Numba-compatible way.
    """
    T = data_c.shape[0]
    N = data_c.shape[1]
    
    # Create `indicator_list_Y` with 2 bins
    indicator_list_Y = create_indicator_list(data_c, T, N, n_bins=n_bins)
    
    # Create `indicator_list_Y_ngrid` with `n_grid` bins
    indicator_list_Y_ngrid = create_indicator_list(data_c, T, N, n_bins=n_grid)
    
    # Initialize the result lists
    P_k_list = List()
    Sigma_P_k_list = List()
    
    # Iterate over the t periods
    for k in range(T):
        # Compute the Kronecker product for each row manually
        result_matrix = np.zeros((N, (n_bins ** (T - 1))))
        for n in range(N):
            # Manually compute Kronecker product for the row
            kron_result = np.array([1.0])  # Start with scalar 1.0
            for t in range(T):
                if t != k:
                    kron_result = np.kron(kron_result, indicator_list_Y[t][n, :])
            result_matrix[n, :] = kron_result
        
        # Compute P_k
        P_k = (weights * indicator_list_Y_ngrid[k].T) @ result_matrix
        P_k_list.append(P_k)
        
        # Compute Sigma_P_k
        P_k_vec = P_k.T.flatten()
        W_P_s = np.diag(P_k_vec) - np.outer(P_k_vec, P_k_vec)
        Sigma_P_k_list.append(W_P_s)

    
    return {
        "P_k_list": P_k_list,
        "Sigma_P_k_list": Sigma_P_k_list
    }



@njit
def compute_matrix_sqrt(U, S, VT):
    # Compute the square root of singular values
    sqrt_singular_values = np.sqrt(S)
    
    # Reconstruct the square root matrix
    sqrt_mat = U @ np.diag(sqrt_singular_values) @ VT
    return sqrt_mat

# Wrapper function to handle SVD outside Numba
def matrix_sqrt_svd(mat):
    if not isinstance(mat, np.ndarray):
        raise ValueError("Input must be a matrix (NumPy array).")
    
    if mat.shape[0] != mat.shape[1]:
        raise ValueError("Input must be a square matrix.")
    
    # Perform SVD decomposition (outside Numba)
    U, S, VT = np.linalg.svd(mat)
    
    # Compute the square root matrix (inside Numba)
    sqrt_mat = compute_matrix_sqrt(U, S, VT)
    return sqrt_mat


@njit
def matrix_sqrt(A):
    """Compute the square root of a matrix using eigen-decomposition."""
    # Eigen-decomposition of the matrix
    vals, vecs = np.linalg.eigh(A)
    # Compute the square root of eigenvalues
    sqrt_vals = np.sqrt(vals)
    # Reconstruct the matrix square root
    sqrt_A = vecs @ np.diag(sqrt_vals) @ vecs.T
    return sqrt_A

@njit
def invert_matrix(mat, epsilon=1e-8):
    """
    Numba-compatible function to compute the inverse of a square matrix.
    If the determinant is close to zero, the matrix is regularized by adding
    epsilon to the diagonal before inversion.

    Parameters:
        mat (ndarray): Input square matrix.
        epsilon (float): Small value added to the diagonal for regularization.

    Returns:
        ndarray: Inverse of the matrix.
    """
    # Ensure the matrix is square
    if mat.shape[0] != mat.shape[1]:
        # Numba cannot raise exceptions, so we return an empty array for invalid input
        return np.zeros_like(mat)
    
    # Compute the determinant
    det_val = np.linalg.det(mat)
    
    # Regularize the matrix if the determinant is close to zero
    if abs(det_val) < epsilon:
        mat = mat + np.eye(mat.shape[0]) * epsilon
    
    # Compute and return the inverse
    return np.linalg.inv(mat)

@njit
def compute_A_q_o(U_22, U_12):
    """Compute A_q_o."""
    sqrt_U22 = matrix_sqrt(U_22 @ U_22.T)
    inv_U22_T = invert_matrix(U_22.T)
    A_q_o = np.transpose(sqrt_U22 @ inv_U22_T @ np.hstack((U_12.T, U_22.T)))
    return A_q_o

@njit
def compute_B_q_o(V_22, V_12):
    """Compute B_q_o."""
    sqrt_V22 = matrix_sqrt(V_22 @ V_22.T)
    inv_V22_T = invert_matrix(V_22.T)
    B_q_o = sqrt_V22 @ inv_V22_T @ np.hstack((V_12.T, V_22.T))
    return B_q_o

@njit
def compute_kron_BA_o(B_q_o, A_q_o):
    """Compute the Kronecker product of B_q_o and A_q_o.T."""
    return np.kron(B_q_o, A_q_o.T)

@njit
def matrix_svd_decomposition(P, m):
    """
    Perform SVD decomposition and compute A_q_o, B_q_o, and Kronecker product.
    """
    # Perform SVD outside the Numba function
    U, S, VT = np.linalg.svd(P, full_matrices=True)
    V = VT.T
    
    # Submatrices of U and V
    U_12 = U[:m, m:]
    V_12 = V[:m, m:]
    U_22 = U[m:, m:]
    V_22 = V[m:, m:]
    
    # Compute A_q_o and B_q_o using Numba-compiled functions
    A_q_o = compute_A_q_o(U_22, U_12)
    B_q_o = compute_B_q_o(V_22, V_12)
    
    # Compute the Kronecker product
    kron_BA_o = compute_kron_BA_o(B_q_o, A_q_o)
    
    # Ensure all arrays are 2D
    S = S.reshape(-1, 1)  # Convert S to a 2D column vector
    U = np.atleast_2d(U)
    V = np.atleast_2d(V)
    U_12 = np.atleast_2d(U_12)
    V_12 = np.atleast_2d(V_12)
    U_22 = np.atleast_2d(U_22)
    V_22 = np.atleast_2d(V_22)
    A_q_o = np.atleast_2d(A_q_o)
    B_q_o = np.atleast_2d(B_q_o)
    kron_BA_o = np.atleast_2d(kron_BA_o)

    # Create a Numba-compatible dictionary
    numba_dict = Dict.empty(
        key_type=types.unicode_type,  # Keys are strings
        value_type=types.float64[:, :],  # Values are 2D arrays
    )

    # Add key-value pairs
    numba_dict["D"] = S
    numba_dict["U"] = U
    numba_dict["V"] = V
    numba_dict["U_12"] = U_12
    numba_dict["V_12"] = V_12
    numba_dict["U_22"] = U_22
    numba_dict["V_22"] = V_22
    numba_dict["A_q_o"] = A_q_o
    numba_dict["B_q_o"] = B_q_o
    numba_dict["kron_BA_o"] = kron_BA_o

    return numba_dict


    
@njit
def compute_stat(P, Sigma_P, P_svd, m, n_size, lambda_c):
    """
    Compute statistical metrics for the "P" or "Q" transform case.
    """
    # Extract SVD components
    A_q_o = P_svd["A_q_o"]
    B_q_o = P_svd["B_q_o"]
    kron_BA_o = P_svd["kron_BA_o"]

    # Compute lambda_q
    lambda_q = A_q_o.T @ P @ B_q_o.T - lambda_c

    # Compute Omega_q
    Omega_q = kron_BA_o @ Sigma_P @ kron_BA_o.T

    # Compute rk_c
    lambda_q_flat = lambda_q.flatten()
    Omega_q_inv = invert_matrix(Omega_q)
    rk_c = n_size * (lambda_q_flat @ Omega_q_inv @ lambda_q_flat) 

    return lambda_q, Omega_q, rk_c

@njit
def compute_criteria(rk_c, r, n_size):
    """
    Compute AIC, BIC, and HQ criteria.
    """
    AIC_c = rk_c - 2 * r
    BIC_c = rk_c - np.log(n_size) * r
    HQ_c = rk_c - 2 * np.log(np.log(n_size)) * r
    return AIC_c, BIC_c, HQ_c

@njit
def construct_stat_KP(P, Sigma_P, m, n_size, lambda_c=0):
    """
    Construct statistical metrics for the Kronecker Product and return 
    a Numba-compatible typed dictionary.
    """
    # Perform SVD decomposition
    P_svd = matrix_svd_decomposition(P, m)

    # Compute stats using Numba
    lambda_q, Omega_q, rk_c = compute_stat(P, Sigma_P, P_svd, m, n_size, lambda_c)

    # Compute the rank (r)
    r = Omega_q.shape[0]

    # Compute AIC, BIC, and HQ using Numba
    AIC_c, BIC_c, HQ_c = compute_criteria(rk_c, r, n_size)

    # Create a Numba-compatible dictionary to store results
    result_dict = Dict.empty(
        key_type=types.unicode_type,  # Keys are strings
        value_type=types.float64[:, :],  # Values are 2D arrays
    )

    # Add results to the dictionary
    result_dict["rk_c"] = np.array([[rk_c]])  # Scalars must be converted to 2D arrays
    result_dict["lambda_c"] = lambda_q
    result_dict["Omega_q"] = Omega_q
    result_dict["AIC_c"] = np.array([[AIC_c]])
    result_dict["BIC_c"] = np.array([[BIC_c]])
    result_dict["HQ_c"] = np.array([[HQ_c]])

    return result_dict

@njit
def compute_quantile(data, q):
    """
    Compute the q-th quantile manually.
    This replaces np.quantile for Numba compatibility.
    """
    sorted_data = np.sort(data)  # Sort the data
    idx = int(q * (len(sorted_data) - 1))  # Find the index for the quantile
    return sorted_data[idx]

@njit
def max_along_axis_1(matrix):
    """
    Compute the maximum along axis 1 for a 2D array.
    This replaces np.max(axis=1) for Numba compatibility.
    """
    n_rows, n_cols = matrix.shape
    max_values = np.empty(n_rows)  # Array to store max values for each row
    for i in range(n_rows):
        max_values[i] = -np.inf  # Initialize with negative infinity
        for j in range(n_cols):
            if matrix[i, j] > max_values[i]:
                max_values[i] = matrix[i, j]
    return max_values

@njit
def mean_along_axis_1(matrix):
    """
    Compute the mean along axis 1 for a 2D array.
    This replaces np.mean(array, axis=1) for Numba compatibility.
    """
    n_rows, n_cols = matrix.shape
    mean_values = np.empty(n_rows)  # Array to store mean values for each row
    for i in range(n_rows):
        row_sum = 0.0
        for j in range(n_cols):
            row_sum += matrix[i, j]
        mean_values[i] = row_sum / n_cols  # Compute mean for the row
    return mean_values

@njit
def non_par_test(data_nopar, N, T, M, p, q, nrep, n_grid, BB, r_test):
    # Result array
    result_rk_each = np.zeros((nrep,2))
    for ii in range(nrep):
        # Generate synthetic data (replace with actual logic)
        data_c = data_nopar[ii]  # Example: Replace this with your `generate_data` logic
        # Initialize weights
        # Compute P and Sigma matrices
        data_P_W = calculate_P_matrix(data_c, weights_equal, n_grid=n_grid, n_bins=2)
        
        # Initialize results
        rk = np.zeros(T)
        lambda_c_list = List()
        omega_c = List()
        Sigma_P_list = List()
        P_k_list = List()
        
        # Loop through T periods to compute statistics
        for k in range(T):
            # Extract P_k and Sigma_P_k from the data_P_W object
            P_k = data_P_W["P_k_list"][k]
            Sigma_P_k = data_P_W["Sigma_P_k_list"][k]
            
            # Compute KP statistics for the k-th triplet
            stat_KP = construct_stat_KP(P_k, Sigma_P_k, r_test, N)
            
            # Store results
            rk[k] = stat_KP["rk_c"][0,0]
            lambda_c_list.append(stat_KP["lambda_c"])
            omega_c.append(stat_KP["Omega_q"])
            Sigma_P_list.append(Sigma_P_k)
            P_k_list.append(P_k)
        # Initialize result matrix
        rk_b = np.zeros((BB, T))
        
        # Smoothed Nonparametric Bootstrap
        ru = np.random.exponential(scale=1, size=(BB, N))  # Exponential random variables
        row_sums = ru.sum(axis=1).reshape(-1, 1)  # Reshape to keep dimensions
        ru /= row_sums
        
        for i in range(BB):
            # Calculate bootstrapped P and Sigma_P matrices
            data_P_W_b = calculate_P_matrix(data_c, ru[i, :], n_grid=n_grid, n_bins=2)
            
            for k in range(T):
                P_k = data_P_W_b['P_k_list'][k]
                Sigma_P_k = data_P_W_b['Sigma_P_k_list'][k]
                # Compute KP statistics for the k-th triplet
                rk_b[i, k] = construct_stat_KP(P_k, Sigma_P_k, r_test, N, lambda_c_list[k])['rk_c'][0,0]
        # Compute max and mean values for rk and rk_b
        rk_b_max = max_along_axis_1(rk_b)  # Maximum of rk_b along axis 1
        rk_b_max_95 = compute_quantile(rk_b_max, 0.95)  # 95th quantile of rk_b_max

        
        # Store results
        result_rk_each[ii, 0] = 1 * (rk.max() > rk_b_max_95)
        rk_mean = np.mean(rk)  # Mean of rk (Numba supports this)
        rk_b_mean = mean_along_axis_1(rk_b)  # Mean of rk_b along axis 1
        rk_b_mean_95 = compute_quantile(rk_b_mean, 0.95)  # 95th quantile of rk_b_mean
        result_rk_each[ii, 1] = 1 * (rk_mean > rk_b_mean_95)
    return result_rk_each

In [None]:
# ----------------------------------------------------------
# LR test functions
# ----------------------------------------------------------

@njit
def process_array_or_none(arr, nt):
    if arr is None:  # Check for None
        return np.zeros((nt,0),dtype=np.float64)  # Default behavior for None
    return arr  # Process the array if it's valid

@njit
def safe_solve(A, b):
    """
    Numba-compatible replacement for np.linalg.solve.
    Solves Ax = b using LU decomposition if the matrix is not singular.
    """
    if np.linalg.cond(A) < SINGULAR_EPS:
        raise ValueError("Matrix is singular or near-singular.")
    return np.linalg.solve(A, b)

@njit
def log_likelihood_normal(y, mu, sigma):
    """
    Calculate the log-likelihood of the data under a normal distribution.

    Parameters:
    - y: array-like, the observed data points
    - mu: float, the mean of the normal distribution
    - sigma: float, the standard deviation of the normal distribution

    Returns:
    - log_likelihood: float, the log-likelihood value
    """
    n = len(y)
    term1 = -n / 2 * np.log(2 * np.pi)  # Constant term
    term2 = -n * np.log(sigma)  # Log of the standard deviation
    term3 = -1 / (2 * sigma**2) * np.sum((y - mu) ** 2)  # Data fitting term
    return term1 + term2 + term3

@njit
def log_likelihood_array(y, mu, sigma):
    """
    Calculate the log-likelihood of each element in the data under a normal distribution.

    Parameters:
    - y: array-like, the observed data points
    - mu: float, the mean of the normal distribution
    - sigma: float, the standard deviation of the normal distribution

    Returns:
    - log_likelihoods: array, log-likelihood of each data point
    """
    # Precompute constants
    constant = -0.5 * np.log(2 * np.pi)
    variance = sigma ** 2

    # Initialize result array
    log_likelihoods = np.empty(len(y))

    # Compute log-likelihood for each data point
    for i in range(len(y)):
        log_likelihoods[i] = (
            constant 
            - np.log(sigma) 
            - ((y[i] - mu) ** 2) / (2 * variance)
        )
    
    return log_likelihoods

import numpy as np
from numba import njit

@njit
def compute_residuals(m, n, t, sigma_jn, ytilde, mubeta_jn):
    """
    Compute residuals for the EM optimization loop in a Numba-compatible way.

    Parameters:
    - m: Number of components (int)
    - n: Number of groups (int)
    - t: Number of time points per group (int)
    - sigma_jn: Array of current sigma values (1D array of floats, shape (m,))
    - ytilde: Adjusted response variable (1D array of floats, shape (n * t,))
    - mubeta_jn: Array of current beta means (1D array of floats, shape (m,))

    Returns:
    - r: Residuals array (2D array of floats, shape (m, n))
    """
    # Initialize the residuals array
    r = np.zeros((m, n), dtype=np.float64)

    # Loop over each component (m)
    for j in range(m):
        
        # Loop over each group (n)
        for i in range(n):
            sum_r_t = 0.0

            # Loop over each time point within the group (t)
            for k in range(t):
                idx = i * t + k  # Compute the flattened index
                diff = ytilde[idx] - mubeta_jn[j]
                r_t = (1.0 / sigma_jn[j]) * diff
                sum_r_t += 0.5 * (r_t**2)

            # Compute residual for group i and component j
            r[j, i] = t * np.log(sigma_jn[j]) + sum_r_t
    return r

@njit
def min_along_axis_0(r):
    # Get the shape of the array
    rows, cols = r.shape
    
    # Initialize an array to store the minimum values for each column
    min_vals = np.empty(cols)
    
    # Iterate through each column
    for j in range(cols):
        # Initialize the minimum value for the current column
        min_val = r[0, j]
        
        # Iterate through each row in the current column
        for i in range(1, rows):
            if r[i, j] < min_val:
                min_val = r[i, j]
        
        # Store the minimum value for the column
        min_vals[j] = min_val
    
    return min_vals

@njit
def solve_linear_system_safe(A, b):
    """
    Safely solve the linear system Ax = b.
    If A is singular or nearly singular, return a default solution (e.g., zeros).
    """
    # Check if the matrix is singular
    det = np.linalg.det(A)
    if abs(det) < 1e-12:  # Threshold for singularity
        # Handle singular matrix (e.g., return zeros or raise an error)
        return np.zeros_like(b)  # Return a vector of zeros
    else:
        # Solve the system using np.linalg.solve
        return np.linalg.solve(A, b)
    
@njit
def solve_least_squares(A, b):
    """Solve the least squares problem Ax = b using the normal equation."""
    AtA = A.T @ A  # Compute A^T * A
    Atb = A.T @ b  # Compute A^T * b
    return solve_linear_system_safe(AtA, Atb)


@njit
def generate_random_uniform(low, high, size):
    """Generate random uniform samples using Numba."""
    out = np.empty(size)
    for i in range(size[0]):
        for j in range(size[1]):
            out[i, j] = low + (high - low) * np.random.random()
    return out


SINGULAR_EPS = 1e-10  # Criteria for matrix singularity
M_LN_SQRT_2PI = 0.9189385332046727  # log(sqrt(2*pi))


@njit
def EM_optimization(y, x, z, p, q, sigma_0, alpha_draw, mubeta_draw, sigma_draw, gamma_draw, m, t, an, maxit=2000, tol=1e-8, tau = 0.5, epsilon=0.05):
    
    nt = len(y)
    n = nt // t

    ninits = alpha_draw.shape[1]
    # Handle x
    if q == 0:
        x1 = np.ones((nt, 1))
        q1 = 1
    else:
        x1 = np.zeros((nt, x.shape[1] + 1))
        x1[:, 0] = 1  # Add intercept
        x1[:, 1:] = x
        q1 = x1.shape[1]
    
    # Initialize variables
    lb = np.zeros(m)
    ub = np.zeros(m)
    l_j = np.zeros(m)
    w = np.zeros((m, nt))
    post = np.zeros((m * n, ninits))
    notcg = np.zeros(ninits)
    penloglikset = np.zeros(ninits)
    loglikset = np.zeros(ninits)
    
    
    for jn in range(ninits):
        alpha_jn = alpha_draw[:, jn]
        mubeta_jn = mubeta_draw[:, jn]
        sigma_jn = sigma_draw[:, jn]
        gamma_jn = gamma_draw[:, jn]  # Likely float64
    
        oldpenloglik = -np.inf
        emit = 0
        diff = 1.0
        sing = 0
        
        for iter_ii in range(maxit):
            ll = -nt * M_LN_SQRT_2PI
            
            if p > 0:
                ytilde = y - np.dot(z, gamma_jn)
            else:
                ytilde = y
            
            r = compute_residuals(m, n, t, sigma_jn, ytilde, mubeta_jn)
            
            minr = min_along_axis_0(r)
            
            # Initialize arrays
            l_j = np.zeros((m,n))  # Same shape as `r`
            sum_l_j = np.zeros(n)   # Sum along axis 0
            w = np.zeros((m,n))    # Weights
            ll = 0.0                # Log-likelihood accumulator

            # Compute l_j = alpha_jn[:, None] * exp(minr - r)
            for i in range(n):
                for j in range(m):
                    l_j[j, i] = alpha_jn[j] * np.exp(minr[i] - r[j,i])
            
            # Compute sum_l_j = np.sum(l_j, axis=0)
            for j in range(m):
                for i in range(n):
                    sum_l_j[i] += l_j[j, i]
            
            # Compute w = l_j / sum_l_j
            for i in range(n):
                for j in range(m):
                    w[j, i] = l_j[j, i] / sum_l_j[i]
            
            # Compute ll += np.sum(np.log(sum_l_j) - minr)
            for i in range(n):
                ll += np.log(sum_l_j[i]) - minr[i]
            
            penloglik = ll + np.log(2.0) + min(np.log(tau), np.log(1 - tau))
            
            for j in range(m):
                s0j = sigma_0[j] / sigma_jn[j]
                penloglik += -an * (s0j**2 - 2.0 * np.log(s0j) - 1.0)
                penloglik += min(np.log(alpha_jn[j]), np.log(1 - alpha_jn[j]))
            diff = penloglik - oldpenloglik
            oldpenloglik = penloglik
            emit += 1
            
            # Update parameters
            mubeta_jn_mat = np.zeros((m,q1),dtype=np.float64)
            wtilde = np.zeros(nt)
            for j in range(m):
                alpha_jn[j] = np.mean(w[j, :])
                wtilde = w[j, :].T
                w_j = np.zeros(nt)
                for i in range(n):
                    w_j[i * t : (i + 1) * t] = wtilde[i]
                xtilde = np.zeros((nt, q1))
                for ii in range(q1):
                    xtilde[:, ii] = w_j * x1[:, ii]
                # design_matrix = xtilde.T @ x1
                # solve_linear_system_safe(xtilde.T @ x1, xtilde.T @ ytilde)
                # xtilde.T @ ytilde
                mubeta_jn_mat[j,:] = solve_linear_system_safe(xtilde.T @ x1, xtilde.T @ ytilde)
                ssr_j = np.sum(w_j * (ytilde - x1 @ mubeta_jn_mat[j,:])**2)
                sigma_jn[j] = np.sqrt((ssr_j + 2.0 * an * sigma_0[j]**2) / (np.sum(w_j) + 2.0 * an))
                sigma_jn[j] = max(sigma_jn[j], epsilon * sigma_0[j])
            
            # update alpha
            total_alpha = np.sum(alpha_jn)
            for j in range(m):
                alpha_jn[j] = max(0.01, alpha_jn[j] / total_alpha)
            
            # update gamma
            if p > 0:
                ztilde = np.zeros((nt, p), dtype=np.float64) 
                zz = np.zeros((p, p), dtype=np.float64) 
                ze = np.zeros((p, 1), dtype=np.float64) 
                for j in range(m):
                    wtilde = w[j, :]
                    w_j = np.zeros(nt)
                    for i in range(n):
                        w_j[i * t : (i + 1) * t] = wtilde[i]
                    for ii in range(p):
                        ztilde[:, ii] = w_j * z[:, ii]
                    zz += ztilde.T @ z / (sigma_jn[j]**2)
                    ze += ztilde.T @( y - x1 @ mubeta_jn_mat[j,:]) / (sigma_jn[j]**2)
                gamma_jn = solve_linear_system_safe(zz,ze).flatten()
            
        penloglikset[jn] = penloglik
        loglikset[jn] = ll
        post[:, jn] = w.T.flatten()
        alpha_draw[:, jn] = alpha_jn
        mubeta_draw[:, jn] = mubeta_jn_mat.T.flatten()
        sigma_draw[:, jn] = sigma_jn
        if p > 0:
            gamma_draw[:, jn] = gamma_jn
    return(alpha_draw,mubeta_draw,sigma_draw,gamma_draw,penloglikset, loglikset ,post)

In [None]:
@njit
def regpanelmixPMLE(y,x,z, p, q, m, ninits=10, epsilon=1e-8, maxit=2000, epsilon_short=1e-2, maxit_short=500): 
    
    t,n = y.shape
    nt = n * t
    y = y.T.flatten()
    
    # y.reshape((n,t)).T - data_lr[0][0] # check equivalence
    # Handle x
    
    x1 = np.hstack((np.ones((nt, 1)), x))
    q1 = q + 1
    

    xz = np.hstack((x1, z))
    
    out_coef = solve_least_squares(xz, y)  # Replace np.linalg.lstsq
    residuals = y - xz @ out_coef
    stdR = np.std(residuals)
    npar = m - 1 + (q1 + 1) * m + p
    ninits_short = ninits * 10 * (q1 + p) * m
    
    if (m == 1) :
        mubeta = out_coef[:q1]
        if p > 0:
            gamma = out_coef[q1:(q1 + p)]
        else:
            gamma = np.array([0.0])
        res = y - xz @ out_coef
        sigma = np.sqrt(np.mean(res**2))
        loglik = log_likelihood_normal(res,0,sigma)

        aic = -2 * loglik + 2 * npar
        bic = -2 * loglik + np.log(n) * npar
        penloglik = loglik
        alpha = np.array([1])
        postprobs = np.ones(n)
    else: 
        # First draw random start point
        if p > 0:
            gamma = out_coef[q1:(q1 + p)]
            # Perform least squares regression with both x and z
            gamma_draw = generate_random_uniform(0.5, 1.5, (p, ninits_short)) * gamma
            mubeta_hat = out_coef[:q1]
            y = y - z @ gamma
        else:
            # Perform least squares regression with x only
            
            gamma = np.array([0.0])
            mubeta_hat = out_coef
            gamma_draw = np.zeros((1,ninits_short), dtype=np.float64)

        # Initialize alpha
        alpha_draw = generate_random_uniform(0, 1, (m, ninits_short))
        alpha_draw = (alpha_draw / np.sum(alpha_draw, axis=0))

        # Initialize mubeta
        if q > 0:
            minMU = np.min(y - x @ mubeta_hat[1:])
            maxMU = np.max(y - x @ mubeta_hat[1:])
            mubeta_draw = np.zeros((q1 * m, ninits_short))
            for j in range(m):
                mubeta_draw[q1 * j, :] = np.random.uniform(minMU, maxMU, size=ninits_short)
                for i in range(1, q1):
                    mubeta_draw[q1 * j + i, :] = mubeta_hat[i] * np.random.uniform(-2, 2, size=ninits_short)
        else:
            minMU = np.min(y)
            maxMU = np.max(y)
            mubeta_draw = np.zeros((q1 * m, ninits_short))
            for j in range(m):
                mubeta_draw[q1 * j, :] = np.random.uniform(minMU, maxMU, size=ninits_short)
        
        an = 1 / n    
        sigma_0 = np.full(m, stdR)
    
        # Initialize sigma
        sigma_draw = generate_random_uniform(0.01, 1, (m, ninits_short)) * stdR
        
        alpha_draw,mubeta_draw,sigma_draw,gamma_draw,penloglikset, loglikset, post = EM_optimization(y, x, z, p, q, sigma_0, alpha_draw, mubeta_draw, sigma_draw, gamma_draw, m, t, an, maxit=maxit_short, tol=epsilon_short)
        
        # 
        components = np.argsort(penloglikset)[::-1][:ninits]
        alpha_draw = alpha_draw[:,components]
        mubeta_draw = mubeta_draw[:,components]
        sigma_draw = sigma_draw[:,components]
        gamma_draw = gamma_draw[:,components]
        
        
        alpha_draw,mubeta_draw,sigma_draw,gamma_draw,penloglikset, loglikset, post = EM_optimization(y, x, z, p, q, sigma_0, alpha_draw, mubeta_draw, sigma_draw, gamma_draw, m, t, an, maxit=maxit, tol=epsilon)
        
        index = np.argmax(penloglikset)
        alpha_hat = alpha_draw[:,index]
        mubeta_hat = mubeta_draw[:,index]
        sigma_hat = sigma_draw[:,index]
        gamma_hat = gamma_draw[:,index]
        post = post[:, index]
        penloglik = penloglikset[index]
        loglik = loglikset[index]
        aic = -2 * loglik + 2 * npar
        bic = -2 * loglik + np.log(n) * npar
        
        return(penloglik)
    
        # return(penloglik, loglik, aic, bic, alpha_hat, mubeta_hat, sigma_hat, gamma_hat, post)

In [None]:
@njit(parallel=True)
def compute_lr_stat(y_lr, x_lr, z_lr, m, p, q, nrep):
    
    # Preallocate lr_stat as a 1D array (Numba-compatible)
    lr_stat = np.zeros(nrep, dtype=np.float64)

    for ii in prange(nrep):
        # Extract y, x, z from data_lr (passed as separate arrays in Numba)
        y = y_lr[ii]  # y for replication ii
        x = x_lr[ii]  # x for replication ii
        z = z_lr[ii]  # z for replication ii

        # Call regpanelmixPMLE for m components
        penloglik = regpanelmixPMLE(y,x,z, p, q, m, ninits=1)
        
        # Call regpanelmixPMLE for m+1 components
        penloglik_m1 = regpanelmixPMLE(y, x, z, p, q, m + 1, ninits=1)

        # Compute likelihood ratio statistic
        lr_stat[ii] = -2 * (penloglik_m1 - penloglik)
    return lr_stat

In [None]:
# Simulation
# ------------------------------------------
   
import time
# Input Parameters
Nset = [200, 400]
Tset = [3, 5, 8]
alphaset = [[0.5, 0.5], [0.2, 0.8]]
muset = [[-1, 1], [-0.5, 0.5]]
sigmaset = [[0.8, 1.2]]

# Test panel mixture
gam = None
beta = None
N = 200
T = 3
M = 2
p = 0
q = 0
nrep = 100
BB = 199

alpha = alphaset[0]
mu = muset[0]
sigma = sigmaset[0]
n_grid=3
r_test=2
# Generate data
weights_equal = np.full(N, 1 / N)

In [None]:
Data = [generate_data(alpha, mu, sigma, gam, beta, N, T, M, p, q) for _ in range(nrep)]

y_lr = [data["Y"] for data in Data]
x_lr = [process_array_or_none(data["X"], N*T) for data in Data]
z_lr = [process_array_or_none(data["Z"], N*T) for data in Data]

start_time = time.time()
lr_stat = compute_lr_stat(y_lr, x_lr, z_lr, M, p, q, nrep)
print("Excution time",time.time()- start_time)

  penloglik = regpanelmixPMLE(y,x,z, p, q, m, ninits=1)


Excution time 143.8285427093506


In [None]:
@jit(nopython=True, parallel=True)
def generate_data_numba(alpha, mu, sigma, gam, beta, N, T, M, p, q, x=None, z=None):
    R = np.zeros((N, M))
    
    # Normalize alpha if necessary
    alpha_sum = np.sum(alpha)
    if alpha_sum != 1:
        alpha = alpha / alpha_sum
    
    # Check input consistency - Numba doesn't support exceptions like Python
    if len(alpha) != M or len(mu) != M:
        raise ValueError("M must be the size of alpha and mu")
    
    # Generate prior and initialize R
    prior = np.random.uniform(size=N)
    alpha_cum = np.zeros(M + 1)
    for m in range(M):
        alpha_cum[m + 1] = alpha_cum[m] + alpha[m]
    
    if M > 1:
        for m in range(M):
            lb = alpha_cum[m]
            ub = alpha_cum[m + 1]
            for n in prange(N):
                R[n, m] = 1 if lb < prior[n] <= ub else 0
    else:
        R[:] = 1

    # Initialize output arrays
    Y = np.zeros((T, N))
    
    # Generate x and z if not provided
    if q != 0 and x is None:
        x = np.random.normal(size=(N * T, q))
    if p != 0 and z is None:
        z = np.random.normal(size=(N * T, p))
    
    # Precompute dot products
    mu_R = np.dot(R, mu)
    sigma_R = np.dot(R, sigma)
    beta_R = np.dot(R, beta) if q > 0 else np.zeros((N, q))
    
    # Generate Y
    u = np.random.normal(size=(T, N))
    for nn in prange(N):
        y_nn = np.zeros(T)
        y_nn = mu_R[nn] + sigma_R[nn] * u[:, nn]
        
        if q > 1:
            y_nn += np.dot(x[(T * nn):(T * (nn + 1)), :], beta_R[nn, :])
        elif q == 1:
            y_nn += x[(T * nn):(T * (nn + 1)), 0] * beta_R[nn]
        
        if p > 1:
            y_nn += np.dot(z[(T * nn):(T * (nn + 1)), :], gam)
        elif p == 1:
            y_nn += z[(T * nn):(T * (nn + 1)), 0] * gam
        
        Y[:, nn] = y_nn

    # Return arrays (Numba doesn't support dictionaries)
    return Y, z, x

NameError: name 'jit' is not defined

In [None]:
@njit(parallel=True)
def generate_data_numba(alpha, mu, sigma, gam, beta, N, T, M, p, q, x=None, z=None):
    R = np.zeros((N, M))
    
    # Normalize alpha if necessary
    alpha_sum = np.sum(alpha)
    if alpha_sum != 1:
        alpha = alpha / alpha_sum
    
    # Check input consistency - Numba doesn't support exceptions like Python
    if len(alpha) != M or len(mu) != M:
        raise ValueError("M must be the size of alpha and mu")
    
    # Generate prior and initialize R
    prior = np.random.uniform(size=N)
    alpha_cum = np.zeros(M + 1)
    for m in range(M):
        alpha_cum[m + 1] = alpha_cum[m] + alpha[m]
    
    if M > 1:
        for m in range(M):
            lb = alpha_cum[m]
            ub = alpha_cum[m + 1]
            for n in prange(N):
                R[n, m] = 1 if lb < prior[n] <= ub else 0
    else:
        R[:] = 1

    # Initialize output arrays
    Y = np.zeros((T, N))
    
    # Generate x and z if not provided
    if q != 0 and x is None:
        x = np.random.normal(size=(N * T, q))
    if p != 0 and z is None:
        z = np.random.normal(size=(N * T, p))
    
    # Precompute dot products
    mu_R = np.dot(R, mu)
    sigma_R = np.dot(R, sigma)
    beta_R = np.dot(R, beta) if q > 0 else np.zeros((N, q))
    
    # Generate Y
    u = np.random.normal(size=(T, N))
    for nn in prange(N):
        y_nn = np.zeros(T)
        y_nn = mu_R[nn] + sigma_R[nn] * u[:, nn]
        
        if q > 1:
            y_nn += np.dot(x[(T * nn):(T * (nn + 1)), :], beta_R[nn, :])
        elif q == 1:
            y_nn += x[(T * nn):(T * (nn + 1)), 0] * beta_R[nn]
        
        if p > 1:
            y_nn += np.dot(z[(T * nn):(T * (nn + 1)), :], gam)
        elif p == 1:
            y_nn += z[(T * nn):(T * (nn + 1)), 0] * gam
        
        Y[:, nn] = y_nn

    # Return arrays (Numba doesn't support dictionaries)
    return Y, z, x

In [None]:
generate_data_numba(alpha, mu, sigma, gam, beta, N, T, M, p, q)

TypingError: Failed in nopython mode pipeline (step: nopython frontend)
[1m[1m[1mNo implementation of function Function(<function sum at 0x7faa7c1b3e20>) found for signature:
 
 >>> sum(reflected list(float64)<iv=None>)
 
There are 2 candidate implementations:
[1m  - Of which 2 did not match due to:
  Overload of function 'sum': File: numba/core/typing/npydecl.py: Line 368.
    With argument(s): '(reflected list(float64)<iv=None>)':[0m
[1m   No match.[0m
[0m
[0m[1mDuring: resolving callee type: Function(<function sum at 0x7faa7c1b3e20>)[0m
[0m[1mDuring: typing of call at <ipython-input-8-d24ad7b2182e> (6)
[0m
[1m
File "<ipython-input-8-d24ad7b2182e>", line 6:[0m
[1mdef generate_data_numba(alpha, mu, sigma, gam, beta, N, T, M, p, q, x=None, z=None):
    <source elided>
    # Normalize alpha if necessary
[1m    alpha_sum = np.sum(alpha)
[0m    [1m^[0m[0m


In [None]:
# Function to generate data
# ----------------------------------------------------------


@njit(parallel=True)
def generate_data_numba(alpha, mu, sigma, gam, beta, N, T, M, p, q, x=None, z=None):
    R = np.zeros((N, M))
    
    # Normalize alpha if necessary
    alpha_sum = np.sum(alpha)
    if alpha_sum != 1:
        alpha = alpha / alpha_sum
    
    return(alpha)
    generate_data_numba(alpha, mu, sigma, gam, beta, N, T, M, p, q)

In [None]:
# Check input consistency - Numba doesn't support exceptions like Python
if len(alpha) != M or len(mu) != M:
    raise ValueError("M must be the size of alpha and mu")
return(alpha)
generate_data_numba(alpha, mu, sigma, gam, beta, N, T, M, p, q)
# Generate prior and initialize R
prior = np.random.uniform(size=N)
alpha_cum = np.zeros(M + 1)
for m in range(M):
    alpha_cum[m + 1] = alpha_cum[m] + alpha[m]

if M > 1:
    for m in range(M):
        lb = alpha_cum[m]
        ub = alpha_cum[m + 1]
        for n in prange(N):
            R[n, m] = 1 if lb < prior[n] <= ub else 0
else:
    R[:] = 1

# Initialize output arrays
Y = np.zeros((T, N))

# Generate x and z if not provided
if q != 0 and x is None:
    x = np.random.normal(size=(N * T, q))
if p != 0 and z is None:
    z = np.random.normal(size=(N * T, p))

# Precompute dot products
mu_R = np.dot(R, mu)
sigma_R = np.dot(R, sigma)
beta_R = np.dot(R, beta) if q > 0 else np.zeros((N, q))

# Generate Y
u = np.random.normal(size=(T, N))
for nn in prange(N):
    y_nn = np.zeros(T)
    y_nn = mu_R[nn] + sigma_R[nn] * u[:, nn]
    
    if q > 1:
        y_nn += np.dot(x[(T * nn):(T * (nn + 1)), :], beta_R[nn, :])
    elif q == 1:
        y_nn += x[(T * nn):(T * (nn + 1)), 0] * beta_R[nn]
    
    if p > 1:
        y_nn += np.dot(z[(T * nn):(T * (nn + 1)), :], gam)
    elif p == 1:
        y_nn += z[(T * nn):(T * (nn + 1)), 0] * gam
    
    Y[:, nn] = y_nn

# Return arrays (Numba doesn't support dictionaries)
return Y, z, x

SyntaxError: 'return' outside function (<ipython-input-11-2622cee17816>, line 5)

In [None]:
# Function to generate data
# ----------------------------------------------------------


@njit(parallel=True)
def generate_data_numba(alpha, mu, sigma, gam, beta, N, T, M, p, q, x=None, z=None):
    R = np.zeros((N, M))
    
    # Normalize alpha if necessary
    alpha_sum = np.sum(alpha)
    if alpha_sum != 1:
        alpha = alpha / alpha_sum
    
    # Check input consistency - Numba doesn't support exceptions like Python
    if len(alpha) != M or len(mu) != M:
        raise ValueError("M must be the size of alpha and mu")
    
    # Generate prior and initialize R
    prior = np.random.uniform(size=N)
    alpha_cum = np.zeros(M + 1)
    for m in range(M):
        alpha_cum[m + 1] = alpha_cum[m] + alpha[m]
    
    if M > 1:
        for m in range(M):
            lb = alpha_cum[m]
            ub = alpha_cum[m + 1]
            for n in prange(N):
                R[n, m] = 1 if lb < prior[n] <= ub else 0
    else:
        R[:] = 1

    return(alpha)
    generate_data_numba(alpha, mu, sigma, gam, beta, N, T, M, p, q)

In [None]:
# Function to generate data
# ----------------------------------------------------------


@njit(parallel=True)
def generate_data_numba(alpha, mu, sigma, gam, beta, N, T, M, p, q, x=None, z=None):
    R = np.zeros((N, M))
    
    # Normalize alpha if necessary
    alpha_sum = np.sum(alpha)
    if alpha_sum != 1:
        alpha = alpha / alpha_sum
    
    # Check input consistency - Numba doesn't support exceptions like Python
    if len(alpha) != M or len(mu) != M:
        raise ValueError("M must be the size of alpha and mu")
    
    # Generate prior and initialize R
    prior = np.random.uniform(size=N)
    alpha_cum = np.zeros(M + 1)
    for m in range(M):
        alpha_cum[m + 1] = alpha_cum[m] + alpha[m]
    
    if M > 1:
        for m in range(M):
            lb = alpha_cum[m]
            ub = alpha_cum[m + 1]
            for n in prange(N):
                R[n, m] = 1 if lb < prior[n] <= ub else 0
    else:
        R[:] = 1

    # Initialize output arrays
    Y = np.zeros((T, N))
    
    # Generate x and z if not provided
    if q != 0 and x is None:
        x = np.random.normal(size=(N * T, q))
    if p != 0 and z is None:
        z = np.random.normal(size=(N * T, p))
    
    return(alpha)
    generate_data_numba(alpha, mu, sigma, gam, beta, N, T, M, p, q)

In [None]:
# Function to generate data
# ----------------------------------------------------------


@njit(parallel=True)
def generate_data_numba(alpha, mu, sigma, gam, beta, N, T, M, p, q, x=None, z=None):
    R = np.zeros((N, M))
    
    # Normalize alpha if necessary
    alpha_sum = np.sum(alpha)
    if alpha_sum != 1:
        alpha = alpha / alpha_sum
    
    # Check input consistency - Numba doesn't support exceptions like Python
    if len(alpha) != M or len(mu) != M:
        raise ValueError("M must be the size of alpha and mu")
    
    # Generate prior and initialize R
    prior = np.random.uniform(size=N)
    alpha_cum = np.zeros(M + 1)
    for m in range(M):
        alpha_cum[m + 1] = alpha_cum[m] + alpha[m]
    
    if M > 1:
        for m in range(M):
            lb = alpha_cum[m]
            ub = alpha_cum[m + 1]
            for n in prange(N):
                R[n, m] = 1 if lb < prior[n] <= ub else 0
    else:
        R[:] = 1

    # Initialize output arrays
    Y = np.zeros((T, N))
    
    # Generate x and z if not provided
    if q != 0 and x is None:
        x = np.random.normal(size=(N * T, q))
    if p != 0 and z is None:
        z = np.random.normal(size=(N * T, p))
    
    # Precompute dot products
    mu_R = np.dot(R, mu)
    sigma_R = np.dot(R, sigma)
    beta_R = np.dot(R, beta) if q > 0 else np.zeros((N, q))
    return(alpha)
    generate_data_numba(alpha, mu, sigma, gam, beta, N, T, M, p, q)

In [None]:
R = np.zeros((N, M))

# Normalize alpha if necessary
alpha_sum = np.sum(alpha)
if alpha_sum != 1:
    alpha = alpha / alpha_sum

# Check input consistency - Numba doesn't support exceptions like Python
if len(alpha) != M or len(mu) != M:
    raise ValueError("M must be the size of alpha and mu")

# Generate prior and initialize R
prior = np.random.uniform(size=N)
alpha_cum = np.zeros(M + 1)
for m in range(M):
    alpha_cum[m + 1] = alpha_cum[m] + alpha[m]

if M > 1:
    for m in range(M):
        lb = alpha_cum[m]
        ub = alpha_cum[m + 1]
        for n in prange(N):
            R[n, m] = 1 if lb < prior[n] <= ub else 0
else:
    R[:] = 1

# Initialize output arrays
Y = np.zeros((T, N))

# Generate x and z if not provided
if q != 0 and x is None:
    x = np.random.normal(size=(N * T, q))
if p != 0 and z is None:
    z = np.random.normal(size=(N * T, p))

# Precompute dot products
mu_R = np.dot(R, mu)
sigma_R = np.dot(R, sigma)
beta_R = np.dot(R, beta) if q > 0 else np.zeros((N, q))

In [None]:
mu_R

array([ 1.,  1., -1., -1., -1.,  1.,  1., -1., -1.,  1., -1.,  1.,  1.,
        1.,  1., -1., -1.,  1.,  1., -1.,  1.,  1., -1.,  1.,  1.,  1.,
       -1.,  1., -1., -1., -1., -1.,  1., -1., -1., -1.,  1., -1.,  1.,
       -1.,  1.,  1.,  1.,  1., -1., -1., -1.,  1.,  1.,  1., -1.,  1.,
        1., -1., -1.,  1.,  1., -1.,  1., -1.,  1.,  1., -1.,  1., -1.,
        1.,  1.,  1., -1.,  1.,  1.,  1.,  1.,  1.,  1., -1., -1., -1.,
       -1.,  1.,  1., -1., -1., -1.,  1.,  1.,  1.,  1., -1.,  1.,  1.,
        1.,  1., -1., -1.,  1., -1., -1.,  1.,  1.,  1., -1., -1.,  1.,
        1., -1., -1.,  1., -1., -1., -1.,  1.,  1.,  1., -1.,  1.,  1.,
       -1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1., -1., -1., -1., -1.,
       -1.,  1., -1., -1., -1.,  1., -1.,  1.,  1., -1.,  1.,  1., -1.,
       -1., -1.,  1., -1.,  1.,  1., -1.,  1., -1.,  1., -1.,  1.,  1.,
        1.,  1., -1.,  1.,  1.,  1., -1., -1.,  1.,  1.,  1., -1., -1.,
       -1., -1., -1.,  1., -1., -1., -1., -1.,  1., -1.,  1.,  1

In [None]:
R

array([[0., 1.],
       [0., 1.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [1., 0.],
       [1., 0.],
       [0., 1.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [1., 0.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [1., 0.],
       [0., 1.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [0., 1.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [0., 1.],
       [1., 0.],
       [0., 1.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [1., 0.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [1., 0.],
       [0., 1.

In [None]:
R @ mu

array([ 1.,  1., -1., -1., -1.,  1.,  1., -1., -1.,  1., -1.,  1.,  1.,
        1.,  1., -1., -1.,  1.,  1., -1.,  1.,  1., -1.,  1.,  1.,  1.,
       -1.,  1., -1., -1., -1., -1.,  1., -1., -1., -1.,  1., -1.,  1.,
       -1.,  1.,  1.,  1.,  1., -1., -1., -1.,  1.,  1.,  1., -1.,  1.,
        1., -1., -1.,  1.,  1., -1.,  1., -1.,  1.,  1., -1.,  1., -1.,
        1.,  1.,  1., -1.,  1.,  1.,  1.,  1.,  1.,  1., -1., -1., -1.,
       -1.,  1.,  1., -1., -1., -1.,  1.,  1.,  1.,  1., -1.,  1.,  1.,
        1.,  1., -1., -1.,  1., -1., -1.,  1.,  1.,  1., -1., -1.,  1.,
        1., -1., -1.,  1., -1., -1., -1.,  1.,  1.,  1., -1.,  1.,  1.,
       -1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1., -1., -1., -1., -1.,
       -1.,  1., -1., -1., -1.,  1., -1.,  1.,  1., -1.,  1.,  1., -1.,
       -1., -1.,  1., -1.,  1.,  1., -1.,  1., -1.,  1., -1.,  1.,  1.,
        1.,  1., -1.,  1.,  1.,  1., -1., -1.,  1.,  1.,  1., -1., -1.,
       -1., -1., -1.,  1., -1., -1., -1., -1.,  1., -1.,  1.,  1

In [None]:
R @ mu - mu_R

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [None]:
# Function to generate data
# ----------------------------------------------------------


@njit(parallel=True)
def generate_data_numba(alpha, mu, sigma, gam, beta, N, T, M, p, q, x=None, z=None):
    R = np.zeros((N, M))
    
    # Normalize alpha if necessary
    alpha_sum = np.sum(alpha)
    if alpha_sum != 1:
        alpha = alpha / alpha_sum
    
    # Check input consistency - Numba doesn't support exceptions like Python
    if len(alpha) != M or len(mu) != M:
        raise ValueError("M must be the size of alpha and mu")
    
    # Generate prior and initialize R
    prior = np.random.uniform(size=N)
    alpha_cum = np.zeros(M + 1)
    for m in range(M):
        alpha_cum[m + 1] = alpha_cum[m] + alpha[m]
    
    if M > 1:
        for m in range(M):
            lb = alpha_cum[m]
            ub = alpha_cum[m + 1]
            for n in prange(N):
                R[n, m] = 1 if lb < prior[n] <= ub else 0
    else:
        R[:] = 1

    # Initialize output arrays
    Y = np.zeros((T, N))
    
    # Generate x and z if not provided
    if q != 0 and x is None:
        x = np.random.normal(size=(N * T, q))
    if p != 0 and z is None:
        z = np.random.normal(size=(N * T, p))
    
    # Precompute dot products
    mu_R = R @ mu -
    sigma_R = R @ sigma
    beta_R = R @ beta  if q > 0 else np.zeros((N, q))
    return(alpha, beta_R)
    generate_data_numba(alpha, mu, sigma, gam, beta, N, T, M, p, q)

SyntaxError: invalid syntax (<ipython-input-20-ba246926879a>, line 44)

In [None]:
# Function to generate data
# ----------------------------------------------------------


@njit(parallel=True)
def generate_data_numba(alpha, mu, sigma, gam, beta, N, T, M, p, q, x=None, z=None):
    R = np.zeros((N, M))
    
    # Normalize alpha if necessary
    alpha_sum = np.sum(alpha)
    if alpha_sum != 1:
        alpha = alpha / alpha_sum
    
    # Check input consistency - Numba doesn't support exceptions like Python
    if len(alpha) != M or len(mu) != M:
        raise ValueError("M must be the size of alpha and mu")
    
    # Generate prior and initialize R
    prior = np.random.uniform(size=N)
    alpha_cum = np.zeros(M + 1)
    for m in range(M):
        alpha_cum[m + 1] = alpha_cum[m] + alpha[m]
    
    if M > 1:
        for m in range(M):
            lb = alpha_cum[m]
            ub = alpha_cum[m + 1]
            for n in prange(N):
                R[n, m] = 1 if lb < prior[n] <= ub else 0
    else:
        R[:] = 1

    # Initialize output arrays
    Y = np.zeros((T, N))
    
    # Generate x and z if not provided
    if q != 0 and x is None:
        x = np.random.normal(size=(N * T, q))
    if p != 0 and z is None:
        z = np.random.normal(size=(N * T, p))
    
    # Precompute dot products
    mu_R = R @ mu 
    sigma_R = R @ sigma
    beta_R = R @ beta  if q > 0 else np.zeros((N, q))
    return(alpha, beta_R)
    generate_data_numba(alpha, mu, sigma, gam, beta, N, T, M, p, q)

In [None]:
# Function to generate data
# ----------------------------------------------------------


@njit(parallel=True)
def generate_data_numba(alpha, mu, sigma, gam, beta, N, T, M, p, q, x=None, z=None):
    R = np.zeros((N, M))
    
    # Normalize alpha if necessary
    alpha_sum = np.sum(alpha)
    if alpha_sum != 1:
        alpha = alpha / alpha_sum
    
    # Check input consistency - Numba doesn't support exceptions like Python
    if len(alpha) != M or len(mu) != M:
        raise ValueError("M must be the size of alpha and mu")
    
    # Generate prior and initialize R
    prior = np.random.uniform(size=N)
    alpha_cum = np.zeros(M + 1)
    for m in range(M):
        alpha_cum[m + 1] = alpha_cum[m] + alpha[m]
    
    if M > 1:
        for m in range(M):
            lb = alpha_cum[m]
            ub = alpha_cum[m + 1]
            for n in prange(N):
                R[n, m] = 1 if lb < prior[n] <= ub else 0
    else:
        R[:] = 1

    # Initialize output arrays
    Y = np.zeros((T, N))
    
    # Generate x and z if not provided
    if q != 0 and x is None:
        x = np.random.normal(size=(N * T, q))
    if p != 0 and z is None:
        z = np.random.normal(size=(N * T, p))
    
    # Precompute dot products
    mu_R = R @ mu 
    sigma_R = R @ sigma
    beta_R = R @ beta  if q > 0 else np.zeros((N, q))
    # Generate Y
    u = np.random.normal(size=(T, N))
    for nn in prange(N):
        y_nn = np.zeros(T)
        y_nn = mu_R[nn] + sigma_R[nn] * u[:, nn]
        
        if q > 1:
            y_nn += np.dot(x[(T * nn):(T * (nn + 1)), :], beta_R[nn, :])
        elif q == 1:
            y_nn += x[(T * nn):(T * (nn + 1)), 0] * beta_R[nn]
        
        if p > 1:
            y_nn += np.dot(z[(T * nn):(T * (nn + 1)), :], gam)
        elif p == 1:
            y_nn += z[(T * nn):(T * (nn + 1)), 0] * gam
        
        Y[:, nn] = y_nn

    # Return arrays (Numba doesn't support dictionaries)
    return(alpha, beta_R)
    generate_data_numba(alpha, mu, sigma, gam, beta, N, T, M, p, q)

In [None]:
# Function to generate data
# ----------------------------------------------------------


@njit(parallel=True)
def generate_data_numba(alpha, mu, sigma, gam, beta, N, T, M, p, q, x=None, z=None):
    R = np.zeros((N, M))
    
    # Normalize alpha if necessary
    alpha_sum = np.sum(alpha)
    if alpha_sum != 1:
        alpha = alpha / alpha_sum
    
    # Check input consistency - Numba doesn't support exceptions like Python
    if len(alpha) != M or len(mu) != M:
        raise ValueError("M must be the size of alpha and mu")
    
    # Generate prior and initialize R
    prior = np.random.uniform(size=N)
    alpha_cum = np.zeros(M + 1)
    for m in range(M):
        alpha_cum[m + 1] = alpha_cum[m] + alpha[m]
    
    if M > 1:
        for m in range(M):
            lb = alpha_cum[m]
            ub = alpha_cum[m + 1]
            for n in prange(N):
                R[n, m] = 1 if lb < prior[n] <= ub else 0
    else:
        R[:] = 1

    # Initialize output arrays
    Y = np.zeros((T, N))
    
    # Generate x and z if not provided
    if q != 0 and x is None:
        x = np.random.normal(size=(N * T, q))
    if p != 0 and z is None:
        z = np.random.normal(size=(N * T, p))
    
    # Precompute dot products
    mu_R = R @ mu 
    sigma_R = R @ sigma
    beta_R = R @ beta  if q > 0 else np.zeros((N, q))
    # Generate Y
    u = np.random.normal(size=(T, N))
    for nn in prange(N):
        y_nn = np.zeros(T)
        y_nn = mu_R[nn] + sigma_R[nn] * u[:, nn]
        
        if q > 1:
            y_nn += np.dot(x[(T * nn):(T * (nn + 1)), :], beta_R[nn, :])
        elif q == 1:
            y_nn += x[(T * nn):(T * (nn + 1)), 0] * beta_R[nn]
        
        if p > 1:
            y_nn += np.dot(z[(T * nn):(T * (nn + 1)), :], gam)
        elif p == 1:
            y_nn += z[(T * nn):(T * (nn + 1)), 0] * gam
        
        Y[:, nn] = y_nn

    return Y, z, x
    
generate_data_numba(alpha, mu, sigma, gam, beta, N, T, M, p, q)

TypingError: Failed in nopython mode pipeline (step: nopython frontend)
[1m[1m[1mNo implementation of function Function(<function sum at 0x7faa7c1b3e20>) found for signature:
 
 >>> sum(reflected list(float64)<iv=None>)
 
There are 2 candidate implementations:
[1m    - Of which 2 did not match due to:
    Overload of function 'sum': File: numba/core/typing/npydecl.py: Line 368.
      With argument(s): '(reflected list(float64)<iv=None>)':[0m
[1m     No match.[0m
[0m
[0m[1mDuring: resolving callee type: Function(<function sum at 0x7faa7c1b3e20>)[0m
[0m[1mDuring: typing of call at <ipython-input-23-5e5ff4a45b78> (11)
[0m
[1m
File "<ipython-input-23-5e5ff4a45b78>", line 11:[0m
[1mdef generate_data_numba(alpha, mu, sigma, gam, beta, N, T, M, p, q, x=None, z=None):
    <source elided>
    # Normalize alpha if necessary
[1m    alpha_sum = np.sum(alpha)
[0m    [1m^[0m[0m


In [None]:
# Function to generate data
# ----------------------------------------------------------


@njit(parallel=True)
def generate_data_numba(alpha, mu, sigma, gam, beta, N, T, M, p, q, x=None, z=None):
    R = np.zeros((N, M))
    
    # Normalize alpha if necessary
    alpha_sum = np.sum(alpha)
    if alpha_sum != 1:
        alpha = alpha / alpha_sum
    
    # Check input consistency - Numba doesn't support exceptions like Python
    if len(alpha) != M or len(mu) != M:
        raise ValueError("M must be the size of alpha and mu")
    
    # Generate prior and initialize R
    prior = np.random.uniform(size=N)
    alpha_cum = np.zeros(M + 1)
    for m in range(M):
        alpha_cum[m + 1] = alpha_cum[m] + alpha[m]
    
    if M > 1:
        for m in range(M):
            lb = alpha_cum[m]
            ub = alpha_cum[m + 1]
            for n in prange(N):
                R[n, m] = 1 if lb < prior[n] <= ub else 0
    else:
        R[:] = 1

    # Initialize output arrays
    Y = np.zeros((T, N))
    
    # Generate x and z if not provided
    if q != 0 and x is None:
        x = np.random.normal(size=(N * T, q))
    if p != 0 and z is None:
        z = np.random.normal(size=(N * T, p))
    
    # Precompute dot products
    mu_R = R @ mu 
    sigma_R = R @ sigma
    beta_R = R @ beta  if q > 0 else np.zeros((N, q))
    # Generate Y
    u = np.random.normal(size=(T, N))
    for nn in prange(N):
        y_nn = np.zeros(T)
        y_nn = mu_R[nn] + sigma_R[nn] * u[:, nn]
        
        if q > 1:
            y_nn += np.dot(x[(T * nn):(T * (nn + 1)), :], beta_R[nn, :])
        elif q == 1:
            y_nn += x[(T * nn):(T * (nn + 1)), 0] * beta_R[nn]
        
        if p > 1:
            y_nn += np.dot(z[(T * nn):(T * (nn + 1)), :], gam)
        elif p == 1:
            y_nn += z[(T * nn):(T * (nn + 1)), 0] * gam
        
        Y[:, nn] = y_nn

    # Return arrays (Numba doesn't support dictionaries)
    return(alpha, beta_R)
generate_data_numba(alpha, mu, sigma, gam, beta, N, T, M, p, q)

TypingError: Failed in nopython mode pipeline (step: nopython frontend)
[1m[1m[1mNo implementation of function Function(<function sum at 0x7faa7c1b3e20>) found for signature:
 
 >>> sum(reflected list(float64)<iv=None>)
 
There are 2 candidate implementations:
[1m      - Of which 2 did not match due to:
      Overload of function 'sum': File: numba/core/typing/npydecl.py: Line 368.
        With argument(s): '(reflected list(float64)<iv=None>)':[0m
[1m       No match.[0m
[0m
[0m[1mDuring: resolving callee type: Function(<function sum at 0x7faa7c1b3e20>)[0m
[0m[1mDuring: typing of call at <ipython-input-24-316f00aa9ab2> (11)
[0m
[1m
File "<ipython-input-24-316f00aa9ab2>", line 11:[0m
[1mdef generate_data_numba(alpha, mu, sigma, gam, beta, N, T, M, p, q, x=None, z=None):
    <source elided>
    # Normalize alpha if necessary
[1m    alpha_sum = np.sum(alpha)
[0m    [1m^[0m[0m


In [None]:
# Function to generate data
# ----------------------------------------------------------


@njit(parallel=True)
def generate_data_numba(alpha, mu, sigma, gam, beta, N, T, M, p, q, x=None, z=None):
    R = np.zeros((N, M))
    
    # Normalize alpha if necessary
    alpha_sum = np.sum(alpha)
    if alpha_sum != 1:
        alpha = alpha / alpha_sum
    
    # Check input consistency - Numba doesn't support exceptions like Python
    if len(alpha) != M or len(mu) != M:
        raise ValueError("M must be the size of alpha and mu")
    
    # Generate prior and initialize R
    prior = np.random.uniform(size=N)
    alpha_cum = np.zeros(M + 1)
    for m in range(M):
        alpha_cum[m + 1] = alpha_cum[m] + alpha[m]
    
    if M > 1:
        for m in range(M):
            lb = alpha_cum[m]
            ub = alpha_cum[m + 1]
            for n in prange(N):
                R[n, m] = 1 if lb < prior[n] <= ub else 0
    else:
        R[:] = 1


    # Return arrays (Numba doesn't support dictionaries)
    return(alpha, beta_R)
generate_data_numba(alpha, mu, sigma, gam, beta, N, T, M, p, q)

TypingError: Failed in nopython mode pipeline (step: nopython frontend)
[1m[1m[1mNo implementation of function Function(<function sum at 0x7faa7c1b3e20>) found for signature:
 
 >>> sum(reflected list(float64)<iv=None>)
 
There are 2 candidate implementations:
[1m      - Of which 2 did not match due to:
      Overload of function 'sum': File: numba/core/typing/npydecl.py: Line 368.
        With argument(s): '(reflected list(float64)<iv=None>)':[0m
[1m       No match.[0m
[0m
[0m[1mDuring: resolving callee type: Function(<function sum at 0x7faa7c1b3e20>)[0m
[0m[1mDuring: typing of call at <ipython-input-25-0687d1f121e9> (11)
[0m
[1m
File "<ipython-input-25-0687d1f121e9>", line 11:[0m
[1mdef generate_data_numba(alpha, mu, sigma, gam, beta, N, T, M, p, q, x=None, z=None):
    <source elided>
    # Normalize alpha if necessary
[1m    alpha_sum = np.sum(alpha)
[0m    [1m^[0m[0m


In [None]:
# Function to generate data
# ----------------------------------------------------------


@njit(parallel=True)
def generate_data_numba(alpha, mu, sigma, gam, beta, N, T, M, p, q, x=None, z=None):
    R = np.zeros((N, M))

    # Return arrays (Numba doesn't support dictionaries)
    return(alpha)
generate_data_numba(alpha, mu, sigma, gam, beta, N, T, M, p, q)

[0.5, 0.5]

In [None]:
alpha

[0.5, 0.5]

In [None]:
# Input Parameters
Nset = [200, 400]
Tset = [3, 5, 8]
alphaset = [np.array([0.5, 0.5]), np.array([0.2, 0.8])]
muset = [np.array([-1, 1]), np.array([-0.5, 0.5])]
sigmaset = [np.array([0.8, 1.2])]

# Test panel mixture
gam = None
beta = None
N = 200
T = 3
M = 2
p = 0
q = 0
nrep = 100
BB = 199

alpha = alphaset[0]
mu = muset[0]
sigma = sigmaset[0]

In [None]:
# Function to generate data
# ----------------------------------------------------------


@njit(parallel=True)
def generate_data_numba(alpha, mu, sigma, gam, beta, N, T, M, p, q, x=None, z=None):
    R = np.zeros((N, M))

    
    # Normalize alpha if necessary
    alpha_sum = np.sum(alpha)
    if alpha_sum != 1:
        alpha = alpha / alpha_sum
    
    # Return arrays (Numba doesn't support dictionaries)
    return(alpha)
generate_data_numba(alpha, mu, sigma, gam, beta, N, T, M, p, q)

array([0.5, 0.5])

In [None]:
# Function to generate data
# ----------------------------------------------------------


@njit(parallel=True)
def generate_data_numba(alpha, mu, sigma, gam, beta, N, T, M, p, q, x=None, z=None):
    R = np.zeros((N, M))

    
    # Normalize alpha if necessary
    alpha_sum = np.sum(alpha)
    if alpha_sum != 1:
        alpha = alpha / alpha_sum
    
    # Check input consistency - Numba doesn't support exceptions like Python
    if len(alpha) != M or len(mu) != M:
        raise ValueError("M must be the size of alpha and mu")
    
    # Generate prior and initialize R
    prior = np.random.uniform(size=N)
    alpha_cum = np.zeros(M + 1)
    for m in range(M):
        alpha_cum[m + 1] = alpha_cum[m] + alpha[m]
    
    if M > 1:
        for m in range(M):
            lb = alpha_cum[m]
            ub = alpha_cum[m + 1]
            for n in prange(N):
                R[n, m] = 1 if lb < prior[n] <= ub else 0
    else:
        R[:] = 1

    # Return arrays (Numba doesn't support dictionaries)
    return(alpha)
generate_data_numba(alpha, mu, sigma, gam, beta, N, T, M, p, q)

TypingError: Failed in nopython mode pipeline (step: nopython frontend)
[1m[1m[1mNo implementation of function Function(<built-in method uniform of numpy.random.mtrand.RandomState object at 0x7faa6d7de240>) found for signature:
 
 >>> uniform(size=int64)
 
There are 6 candidate implementations:
[1m      - Of which 2 did not match due to:
      Overload in function 'np_uniform_impl0': File: numba/cpython/randomimpl.py: Line 619.
        With argument(s): '(size=int64)':[0m
[1m       Rejected as the implementation raised a specific error:
         TypingError: [1mgot an unexpected keyword argument 'size'[0m[0m
  raised from /root/anaconda3/lib/python3.12/site-packages/numba/core/typing/templates.py:783
[1m      - Of which 4 did not match due to:
      Overload in function 'np_uniform_impl2': File: numba/cpython/randomimpl.py: Line 637.
        With argument(s): '(size=int64)':[0m
[1m       Rejected as the implementation raised a specific error:
         TypingError: [1mmissing a required argument: 'low'[0m[0m
  raised from /root/anaconda3/lib/python3.12/site-packages/numba/core/typing/templates.py:783
[0m
[0m[1mDuring: resolving callee type: Function(<built-in method uniform of numpy.random.mtrand.RandomState object at 0x7faa6d7de240>)[0m
[0m[1mDuring: typing of call at <ipython-input-30-66e1fa9c3acb> (21)
[0m
[1m
File "<ipython-input-30-66e1fa9c3acb>", line 21:[0m
[1mdef generate_data_numba(alpha, mu, sigma, gam, beta, N, T, M, p, q, x=None, z=None):
    <source elided>
    # Generate prior and initialize R
[1m    prior = np.random.uniform(size=N)
[0m    [1m^[0m[0m


In [None]:
np.random.random(size=N)

array([0.05486657, 0.48544634, 0.11770228, 0.00275318, 0.45893672,
       0.4279323 , 0.83394386, 0.71842633, 0.76986939, 0.57963677,
       0.72512929, 0.33194689, 0.72772934, 0.90293866, 0.98464747,
       0.30589893, 0.12172198, 0.31570338, 0.85335825, 0.92526933,
       0.50383889, 0.70637283, 0.56750045, 0.03003692, 0.56395622,
       0.96799003, 0.3483314 , 0.61350875, 0.57224161, 0.75337718,
       0.26779367, 0.0717495 , 0.4036937 , 0.32521692, 0.67358571,
       0.91227091, 0.37494134, 0.22630346, 0.16157528, 0.62896972,
       0.83751853, 0.20237587, 0.34281155, 0.01373276, 0.06076596,
       0.03826587, 0.30390989, 0.14472935, 0.74355127, 0.13934399,
       0.09165264, 0.78554686, 0.73144029, 0.38340521, 0.29414608,
       0.61928922, 0.0760338 , 0.88745928, 0.84083942, 0.45093343,
       0.20980079, 0.19426735, 0.85165182, 0.39080786, 0.80492643,
       0.64850031, 0.53818605, 0.27119755, 0.88558376, 0.35764662,
       0.58863036, 0.72318173, 0.660378  , 0.43198411, 0.24343

In [None]:
# Function to generate data
# ----------------------------------------------------------


@njit(parallel=True)
def generate_data_numba(alpha, mu, sigma, gam, beta, N, T, M, p, q, x=None, z=None):
    R = np.zeros((N, M))

    
    # Normalize alpha if necessary
    alpha_sum = np.sum(alpha)
    if alpha_sum != 1:
        alpha = alpha / alpha_sum
    
    # Check input consistency - Numba doesn't support exceptions like Python
    if len(alpha) != M or len(mu) != M:
        raise ValueError("M must be the size of alpha and mu")
    
    # Generate prior and initialize R
    prior = np.random.random(size=N)
    alpha_cum = np.zeros(M + 1)
    for m in range(M):
        alpha_cum[m + 1] = alpha_cum[m] + alpha[m]
    
    if M > 1:
        for m in range(M):
            lb = alpha_cum[m]
            ub = alpha_cum[m + 1]
            for n in prange(N):
                R[n, m] = 1 if lb < prior[n] <= ub else 0
    else:
        R[:] = 1

    # Return arrays (Numba doesn't support dictionaries)
    return(alpha)
generate_data_numba(alpha, mu, sigma, gam, beta, N, T, M, p, q)

array([0.5, 0.5])

In [None]:
# Function to generate data
# ----------------------------------------------------------


@njit(parallel=True)
def generate_data_numba(alpha, mu, sigma, gam, beta, N, T, M, p, q, x=None, z=None):
    R = np.zeros((N, M))

    
    # Normalize alpha if necessary
    alpha_sum = np.sum(alpha)
    if alpha_sum != 1:
        alpha = alpha / alpha_sum
    
    # Check input consistency - Numba doesn't support exceptions like Python
    if len(alpha) != M or len(mu) != M:
        raise ValueError("M must be the size of alpha and mu")
    
    # Generate prior and initialize R
    prior = np.random.random(size=N)
    alpha_cum = np.zeros(M + 1)
    for m in range(M):
        alpha_cum[m + 1] = alpha_cum[m] + alpha[m]
    
    if M > 1:
        for m in range(M):
            lb = alpha_cum[m]
            ub = alpha_cum[m + 1]
            for n in prange(N):
                R[n, m] = 1 if lb < prior[n] <= ub else 0
    else:
        R[:] = 1

    # Return arrays (Numba doesn't support dictionaries)
    return(R)
generate_data_numba(alpha, mu, sigma, gam, beta, N, T, M, p, q)

array([[1., 0.],
       [0., 1.],
       [1., 0.],
       [1., 0.],
       [0., 1.],
       [1., 0.],
       [0., 1.],
       [1., 0.],
       [0., 1.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [1., 0.],
       [1., 0.],
       [0., 1.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [1., 0.],
       [0., 1.],
       [1., 0.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [1., 0.],
       [1., 0.

In [None]:
# Function to generate data
# ----------------------------------------------------------


@njit(parallel=True)
def generate_data_numba(alpha, mu, sigma, gam, beta, N, T, M, p, q, x=None, z=None):
    R = np.zeros((N, M))

    
    # Normalize alpha if necessary
    alpha_sum = np.sum(alpha)
    if alpha_sum != 1:
        alpha = alpha / alpha_sum
    
    # Check input consistency - Numba doesn't support exceptions like Python
    if len(alpha) != M or len(mu) != M:
        raise ValueError("M must be the size of alpha and mu")
    
    # Generate prior and initialize R
    prior = np.random.random(size=N)
    alpha_cum = np.zeros(M + 1)
    for m in range(M):
        alpha_cum[m + 1] = alpha_cum[m] + alpha[m]
    
    if M > 1:
        for m in range(M):
            lb = alpha_cum[m]
            ub = alpha_cum[m + 1]
            for n in prange(N):
                R[n, m] = 1 if lb < prior[n] <= ub else 0
    else:
        R[:] = 1

    # Initialize output arrays
    Y = np.zeros((T, N))
    
    # Generate x and z if not provided
    if q != 0 and x is None:
        x = np.random.normal(size=(N * T, q))
    if p != 0 and z is None:
        z = np.random.normal(size=(N * T, p))
    
    # Return arrays (Numba doesn't support dictionaries)
    return(R)
generate_data_numba(alpha, mu, sigma, gam, beta, N, T, M, p, q)

TypingError: Failed in nopython mode pipeline (step: nopython frontend)
[1m[1m[1mNo implementation of function Function(<built-in method normal of numpy.random.mtrand.RandomState object at 0x7faa6d7de240>) found for signature:
 
 >>> normal(size=UniTuple(int64 x 2))
 
There are 8 candidate implementations:
[1m      - Of which 2 did not match due to:
      Overload in function 'np_gauss_impl0': File: numba/cpython/randomimpl.py: Line 287.
        With argument(s): '(size=UniTuple(int64 x 2))':[0m
[1m       Rejected as the implementation raised a specific error:
         TypingError: [1mgot an unexpected keyword argument 'size'[0m[0m
  raised from /root/anaconda3/lib/python3.12/site-packages/numba/core/typing/templates.py:783
[1m      - Of which 6 did not match due to:
      Overload in function 'np_gauss_impl1': File: numba/cpython/randomimpl.py: Line 293.
        With argument(s): '(size=UniTuple(int64 x 2))':[0m
[1m       Rejected as the implementation raised a specific error:
         TypingError: [1mmissing a required argument: 'loc'[0m[0m
  raised from /root/anaconda3/lib/python3.12/site-packages/numba/core/typing/templates.py:783
[0m
[0m[1mDuring: resolving callee type: Function(<built-in method normal of numpy.random.mtrand.RandomState object at 0x7faa6d7de240>)[0m
[0m[1mDuring: typing of call at <ipython-input-34-359a84999ab2> (40)
[0m
[1m
File "<ipython-input-34-359a84999ab2>", line 40:[0m
[1mdef generate_data_numba(alpha, mu, sigma, gam, beta, N, T, M, p, q, x=None, z=None):
    <source elided>
    if q != 0 and x is None:
[1m        x = np.random.normal(size=(N * T, q))
[0m        [1m^[0m[0m


In [None]:
# Function to generate data
# ----------------------------------------------------------


@njit(parallel=True)
def generate_data_numba(alpha, mu, sigma, gam, beta, N, T, M, p, q, x=None, z=None):
    R = np.zeros((N, M))

    
    # Normalize alpha if necessary
    alpha_sum = np.sum(alpha)
    if alpha_sum != 1:
        alpha = alpha / alpha_sum
    
    # Check input consistency - Numba doesn't support exceptions like Python
    if len(alpha) != M or len(mu) != M:
        raise ValueError("M must be the size of alpha and mu")
    
    # Generate prior and initialize R
    prior = np.random.random(size=N)
    alpha_cum = np.zeros(M + 1)
    for m in range(M):
        alpha_cum[m + 1] = alpha_cum[m] + alpha[m]
    
    if M > 1:
        for m in range(M):
            lb = alpha_cum[m]
            ub = alpha_cum[m + 1]
            for n in prange(N):
                R[n, m] = 1 if lb < prior[n] <= ub else 0
    else:
        R[:] = 1

    # Initialize output arrays
    Y = np.zeros((T, N))
    
    # Generate x and z if not provided
    if q != 0 and x is None:
        x = np.empty((N * T, q))
        for i in range(N * T):
            for j in range(q):
                x[i, j] = np.random.normal()  # Generate one value at a time
    if p != 0 and z is None:
        z = np.empty((N * T, p))
        for i in range(N * T):
            for j in range(p):
                z[i, j] = np.random.normal()  # Generate one value at a time
    
    # Return arrays (Numba doesn't support dictionaries)
    return(R)
generate_data_numba(alpha, mu, sigma, gam, beta, N, T, M, p, q)

array([[0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [0., 1.],
       [1., 0.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [1., 0.],
       [1., 0.],
       [0., 1.],
       [1., 0.],
       [0., 1.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [1., 0.],
       [0., 1.],
       [1., 0.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [0., 1.],
       [1., 0.],
       [0., 1.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [1., 0.],
       [0., 1.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [1., 0.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.

In [None]:
# Function to generate data
# ----------------------------------------------------------


@njit(parallel=True)
def generate_data_numba(alpha, mu, sigma, gam, beta, N, T, M, p, q, x=None, z=None):
    R = np.zeros((N, M))

    
    # Normalize alpha if necessary
    alpha_sum = np.sum(alpha)
    if alpha_sum != 1:
        alpha = alpha / alpha_sum
    
    # Check input consistency - Numba doesn't support exceptions like Python
    if len(alpha) != M or len(mu) != M:
        raise ValueError("M must be the size of alpha and mu")
    
    # Generate prior and initialize R
    prior = np.random.random(size=N)
    alpha_cum = np.zeros(M + 1)
    for m in range(M):
        alpha_cum[m + 1] = alpha_cum[m] + alpha[m]
    
    if M > 1:
        for m in range(M):
            lb = alpha_cum[m]
            ub = alpha_cum[m + 1]
            for n in prange(N):
                R[n, m] = 1 if lb < prior[n] <= ub else 0
    else:
        R[:] = 1

    # Initialize output arrays
    Y = np.zeros((T, N))
    
    # Generate x and z if not provided
    if q != 0 and x is None:
        x = np.empty((N * T, q))
        for i in range(N * T):
            for j in range(q):
                x[i, j] = np.random.normal()  # Generate one value at a time
    if p != 0 and z is None:
        z = np.empty((N * T, p))
        for i in range(N * T):
            for j in range(p):
                z[i, j] = np.random.normal()  # Generate one value at a time
    
    # Precompute dot products
    mu_R = R @ mu 
    sigma_R = R @ sigma
    beta_R = R @ beta  if q > 0 else np.zeros((N, q))
    # Return arrays (Numba doesn't support dictionaries)
    return(R)
generate_data_numba(alpha, mu, sigma, gam, beta, N, T, M, p, q)

TypingError: Failed in nopython mode pipeline (step: nopython frontend)
[1mFailed in nopython mode pipeline (step: nopython frontend)
[1m[1m[1mNo implementation of function Function(<intrinsic _impl>) found for signature:
 
 >>> _impl(array(float64, 2d, C), array(int64, 1d, C))
 
There are 2 candidate implementations:
[1m  - Of which 2 did not match due to:
  Intrinsic in function 'dot_2_impl.<locals>._impl': File: numba/np/linalg.py: Line 543.
    With argument(s): '(array(float64, 2d, C), array(int64, 1d, C))':[0m
[1m   Rejected as the implementation raised a specific error:
     TypingError: [1m'@' arguments must all have the same dtype[0m[0m
  raised from /root/anaconda3/lib/python3.12/site-packages/numba/np/linalg.py:563
[0m
[0m[1mDuring: resolving callee type: Function(<intrinsic _impl>)[0m
[0m[1mDuring: typing of call at /root/anaconda3/lib/python3.12/site-packages/numba/np/linalg.py (582)
[0m
[1m
File "../anaconda3/lib/python3.12/site-packages/numba/np/linalg.py", line 582:[0m
[1m            def _dot2_codegen(context, builder, sig, args):
                <source elided>

[1m        return lambda left, right: _impl(left, right)
[0m        [1m^[0m[0m

[0m[1mDuring: typing of intrinsic-call at <ipython-input-36-08d9bcb6073d> (51)[0m
[1m
File "<ipython-input-36-08d9bcb6073d>", line 51:[0m
[1mdef generate_data_numba(alpha, mu, sigma, gam, beta, N, T, M, p, q, x=None, z=None):
    <source elided>
    # Precompute dot products
[1m    mu_R = R @ mu 
[0m    [1m^[0m[0m


In [None]:
mu

array([-1,  1])

In [None]:
sigma


array([0.8, 1.2])

In [None]:
# Function to generate data
# ----------------------------------------------------------


@njit(parallel=True)
def generate_data_numba(alpha, mu, sigma, gam, beta, N, T, M, p, q, x=None, z=None):
    R = np.zeros((N, M))

    
    # Normalize alpha if necessary
    alpha_sum = np.sum(alpha)
    if alpha_sum != 1:
        alpha = alpha / alpha_sum
    
    # Check input consistency - Numba doesn't support exceptions like Python
    if len(alpha) != M or len(mu) != M:
        raise ValueError("M must be the size of alpha and mu")
    
    # Generate prior and initialize R
    prior = np.random.random(size=N)
    alpha_cum = np.zeros(M + 1)
    for m in range(M):
        alpha_cum[m + 1] = alpha_cum[m] + alpha[m]
    
    if M > 1:
        for m in range(M):
            lb = alpha_cum[m]
            ub = alpha_cum[m + 1]
            for n in prange(N):
                R[n, m] = 1 if lb < prior[n] <= ub else 0
    else:
        R[:] = 1

    # Initialize output arrays
    Y = np.zeros((T, N))
    
    # Generate x and z if not provided
    if q != 0 and x is None:
        x = np.empty((N * T, q))
        for i in range(N * T):
            for j in range(q):
                x[i, j] = np.random.normal()  # Generate one value at a time
    if p != 0 and z is None:
        z = np.empty((N * T, p))
        for i in range(N * T):
            for j in range(p):
                z[i, j] = np.random.normal()  # Generate one value at a time
    
    # Precompute dot products
    mu_R = np.dot(R, mu)  # Use np.dot for matrix multiplication
    sigma_R = np.dot(R, sigma)  # Use np.dot for matrix multiplication
    beta_R = np.dot(R, beta) if q > 0 else np.zeros((N, q))
   
    # Return arrays (Numba doesn't support dictionaries)
    return(R)
generate_data_numba(alpha, mu, sigma, gam, beta, N, T, M, p, q)

TypingError: Failed in nopython mode pipeline (step: nopython frontend)
[1m[1m[1mNo implementation of function Function(<built-in function dot>) found for signature:
 
 >>> dot(array(float64, 2d, C), array(int64, 1d, C))
 
There are 4 candidate implementations:
[1m      - Of which 2 did not match due to:
      Overload in function 'dot_2': File: numba/np/linalg.py: Line 525.
        With argument(s): '(array(float64, 2d, C), array(int64, 1d, C))':[0m
[1m       Rejected as the implementation raised a specific error:
         TypingError: Failed in nopython mode pipeline (step: nopython frontend)
       [1m[1m[1mNo implementation of function Function(<intrinsic _impl>) found for signature:
        
        >>> _impl(array(float64, 2d, C), array(int64, 1d, C))
        
       There are 2 candidate implementations:
       [1m  - Of which 2 did not match due to:
         Intrinsic in function 'dot_2_impl.<locals>._impl': File: numba/np/linalg.py: Line 543.
           With argument(s): '(array(float64, 2d, C), array(int64, 1d, C))':[0m
       [1m   Rejected as the implementation raised a specific error:
            TypingError: [1mnp.dot() arguments must all have the same dtype[0m[0m
         raised from /root/anaconda3/lib/python3.12/site-packages/numba/np/linalg.py:563
       [0m
       [0m[1mDuring: resolving callee type: Function(<intrinsic _impl>)[0m
       [0m[1mDuring: typing of call at /root/anaconda3/lib/python3.12/site-packages/numba/np/linalg.py (582)
       [0m
       [1m
       File "../anaconda3/lib/python3.12/site-packages/numba/np/linalg.py", line 582:[0m
       [1m            def _dot2_codegen(context, builder, sig, args):
                       <source elided>
       
       [1m        return lambda left, right: _impl(left, right)
       [0m        [1m^[0m[0m
[0m
  raised from /root/anaconda3/lib/python3.12/site-packages/numba/core/typeinfer.py:1091
[1m      - Of which 2 did not match due to:
      Overload in function 'dot_3': File: numba/np/linalg.py: Line 784.
        With argument(s): '(array(float64, 2d, C), array(int64, 1d, C))':[0m
[1m       Rejected as the implementation raised a specific error:
         TypingError: [1mmissing a required argument: 'out'[0m[0m
  raised from /root/anaconda3/lib/python3.12/site-packages/numba/core/typing/templates.py:783
[0m
[0m[1mDuring: resolving callee type: Function(<built-in function dot>)[0m
[0m[1mDuring: typing of call at <ipython-input-39-b980e60f51b6> (51)
[0m
[1m
File "<ipython-input-39-b980e60f51b6>", line 51:[0m
[1mdef generate_data_numba(alpha, mu, sigma, gam, beta, N, T, M, p, q, x=None, z=None):
    <source elided>
    # Precompute dot products
[1m    mu_R = np.dot(R, mu)  # Use np.dot for matrix multiplication
[0m    [1m^[0m[0m


In [None]:
muset = [np.array([-1.0, 1.0]), np.array([-0.5, 0.5])]

In [None]:
mu = muset[0]

In [None]:
# Function to generate data
# ----------------------------------------------------------


@njit(parallel=True)
def generate_data_numba(alpha, mu, sigma, gam, beta, N, T, M, p, q, x=None, z=None):
    R = np.zeros((N, M))

    
    # Normalize alpha if necessary
    alpha_sum = np.sum(alpha)
    if alpha_sum != 1:
        alpha = alpha / alpha_sum
    
    # Check input consistency - Numba doesn't support exceptions like Python
    if len(alpha) != M or len(mu) != M:
        raise ValueError("M must be the size of alpha and mu")
    
    # Generate prior and initialize R
    prior = np.random.random(size=N)
    alpha_cum = np.zeros(M + 1)
    for m in range(M):
        alpha_cum[m + 1] = alpha_cum[m] + alpha[m]
    
    if M > 1:
        for m in range(M):
            lb = alpha_cum[m]
            ub = alpha_cum[m + 1]
            for n in prange(N):
                R[n, m] = 1 if lb < prior[n] <= ub else 0
    else:
        R[:] = 1

    # Initialize output arrays
    Y = np.zeros((T, N))
    
    # Generate x and z if not provided
    if q != 0 and x is None:
        x = np.empty((N * T, q))
        for i in range(N * T):
            for j in range(q):
                x[i, j] = np.random.normal()  # Generate one value at a time
    if p != 0 and z is None:
        z = np.empty((N * T, p))
        for i in range(N * T):
            for j in range(p):
                z[i, j] = np.random.normal()  # Generate one value at a time
    
    # Precompute dot products
    mu_R = np.dot(R, mu)  # Use np.dot for matrix multiplication
    sigma_R = np.dot(R, sigma)  # Use np.dot for matrix multiplication
    beta_R = np.dot(R, beta) if q > 0 else np.zeros((N, q))
   
    # Return arrays (Numba doesn't support dictionaries)
    return(R)
generate_data_numba(alpha, mu, sigma, gam, beta, N, T, M, p, q)

TypingError: Failed in nopython mode pipeline (step: nopython frontend)
[1m[1m[1mNo implementation of function Function(<built-in function dot>) found for signature:
 
 >>> dot(array(float64, 2d, C), none)
 
There are 4 candidate implementations:
[1m      - Of which 2 did not match due to:
      Overload of function 'dot': File: numba/np/linalg.py: Line 525.
        With argument(s): '(array(float64, 2d, C), none)':[0m
[1m       No match.[0m
[1m      - Of which 2 did not match due to:
      Overload in function 'dot_3': File: numba/np/linalg.py: Line 784.
        With argument(s): '(array(float64, 2d, C), none)':[0m
[1m       Rejected as the implementation raised a specific error:
         TypingError: [1mmissing a required argument: 'out'[0m[0m
  raised from /root/anaconda3/lib/python3.12/site-packages/numba/core/typing/templates.py:783
[0m
[0m[1mDuring: resolving callee type: Function(<built-in function dot>)[0m
[0m[1mDuring: typing of call at <ipython-input-42-b980e60f51b6> (53)
[0m
[1m
File "<ipython-input-42-b980e60f51b6>", line 53:[0m
[1mdef generate_data_numba(alpha, mu, sigma, gam, beta, N, T, M, p, q, x=None, z=None):
    <source elided>
    sigma_R = np.dot(R, sigma)  # Use np.dot for matrix multiplication
[1m    beta_R = np.dot(R, beta) if q > 0 else np.zeros((N, q))
[0m    [1m^[0m[0m


In [None]:
beta_R

array([], shape=(200, 0), dtype=float64)

In [None]:
beta

In [None]:
beta = np.array([[0],[0]])

In [None]:
beta = np.array([[0.0],[0.0]])

In [None]:
gamma = np.array([0.0])

In [None]:
# Function to generate data
# ----------------------------------------------------------


@njit(parallel=True)
def generate_data_numba(alpha, mu, sigma, gam, beta, N, T, M, p, q, x=None, z=None):
    R = np.zeros((N, M))

    
    # Normalize alpha if necessary
    alpha_sum = np.sum(alpha)
    if alpha_sum != 1:
        alpha = alpha / alpha_sum
    
    # Check input consistency - Numba doesn't support exceptions like Python
    if len(alpha) != M or len(mu) != M:
        raise ValueError("M must be the size of alpha and mu")
    
    # Generate prior and initialize R
    prior = np.random.random(size=N)
    alpha_cum = np.zeros(M + 1)
    for m in range(M):
        alpha_cum[m + 1] = alpha_cum[m] + alpha[m]
    
    if M > 1:
        for m in range(M):
            lb = alpha_cum[m]
            ub = alpha_cum[m + 1]
            for n in prange(N):
                R[n, m] = 1 if lb < prior[n] <= ub else 0
    else:
        R[:] = 1

    # Initialize output arrays
    Y = np.zeros((T, N))
    
    # Generate x and z if not provided
    if q != 0 and x is None:
        x = np.empty((N * T, q))
        for i in range(N * T):
            for j in range(q):
                x[i, j] = np.random.normal()  # Generate one value at a time
    if p != 0 and z is None:
        z = np.empty((N * T, p))
        for i in range(N * T):
            for j in range(p):
                z[i, j] = np.random.normal()  # Generate one value at a time
    
    # Precompute dot products
    mu_R = np.dot(R, mu)  # Use np.dot for matrix multiplication
    sigma_R = np.dot(R, sigma)  # Use np.dot for matrix multiplication
    beta_R = np.dot(R, beta) 
   
    # Return arrays (Numba doesn't support dictionaries)
    return(R)
generate_data_numba(alpha, mu, sigma, gam, beta, N, T, M, p, q)

array([[0., 1.],
       [0., 1.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [1., 0.],
       [1., 0.],
       [0., 1.],
       [1., 0.],
       [0., 1.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [0., 1.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [1., 0.],
       [0., 1.],
       [1., 0.],
       [0., 1.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [0., 1.],
       [1., 0.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [0., 1.],
       [1., 0.],
       [1., 0.],
       [1., 0.

In [None]:
beta

array([[0.],
       [0.]])

In [None]:
q


0

In [None]:
p 

0

In [None]:
# Function to generate data
# ----------------------------------------------------------


@njit(parallel=True)
def generate_data_numba(alpha, mu, sigma, gam, beta, N, T, M, p, q, x=None, z=None):
    R = np.zeros((N, M))

    
    # Normalize alpha if necessary
    alpha_sum = np.sum(alpha)
    if alpha_sum != 1:
        alpha = alpha / alpha_sum
    
    # Check input consistency - Numba doesn't support exceptions like Python
    if len(alpha) != M or len(mu) != M:
        raise ValueError("M must be the size of alpha and mu")
    
    # Generate prior and initialize R
    prior = np.random.random(size=N)
    alpha_cum = np.zeros(M + 1)
    for m in range(M):
        alpha_cum[m + 1] = alpha_cum[m] + alpha[m]
    
    if M > 1:
        for m in range(M):
            lb = alpha_cum[m]
            ub = alpha_cum[m + 1]
            for n in prange(N):
                R[n, m] = 1 if lb < prior[n] <= ub else 0
    else:
        R[:] = 1

    # Initialize output arrays
    Y = np.zeros((T, N))
    
    # Generate x and z if not provided
    if q != 0 and x is None:
        x = np.empty((N * T, q))
        for i in range(N * T):
            for j in range(q):
                x[i, j] = np.random.normal()  # Generate one value at a time
    if p != 0 and z is None:
        z = np.empty((N * T, p))
        for i in range(N * T):
            for j in range(p):
                z[i, j] = np.random.normal()  # Generate one value at a time
    
    # Precompute dot products
    mu_R = np.dot(R, mu)  # Use np.dot for matrix multiplication
    sigma_R = np.dot(R, sigma)  # Use np.dot for matrix multiplication
    beta_R = np.dot(R, beta) 
   
    u = np.random.normal(size=(T, N))
    # Return arrays (Numba doesn't support dictionaries)
    return(R)
generate_data_numba(alpha, mu, sigma, gam, beta, N, T, M, p, q)

TypingError: Failed in nopython mode pipeline (step: nopython frontend)
[1m[1m[1mNo implementation of function Function(<built-in method normal of numpy.random.mtrand.RandomState object at 0x7faa6d7de240>) found for signature:
 
 >>> normal(size=UniTuple(int64 x 2))
 
There are 8 candidate implementations:
[1m      - Of which 2 did not match due to:
      Overload in function 'np_gauss_impl0': File: numba/cpython/randomimpl.py: Line 287.
        With argument(s): '(size=UniTuple(int64 x 2))':[0m
[1m       Rejected as the implementation raised a specific error:
         TypingError: [1mgot an unexpected keyword argument 'size'[0m[0m
  raised from /root/anaconda3/lib/python3.12/site-packages/numba/core/typing/templates.py:783
[1m      - Of which 6 did not match due to:
      Overload in function 'np_gauss_impl1': File: numba/cpython/randomimpl.py: Line 293.
        With argument(s): '(size=UniTuple(int64 x 2))':[0m
[1m       Rejected as the implementation raised a specific error:
         TypingError: [1mmissing a required argument: 'loc'[0m[0m
  raised from /root/anaconda3/lib/python3.12/site-packages/numba/core/typing/templates.py:783
[0m
[0m[1mDuring: resolving callee type: Function(<built-in method normal of numpy.random.mtrand.RandomState object at 0x7faa6d7de240>)[0m
[0m[1mDuring: typing of call at <ipython-input-52-fa36a011eba6> (55)
[0m
[1m
File "<ipython-input-52-fa36a011eba6>", line 55:[0m
[1mdef generate_data_numba(alpha, mu, sigma, gam, beta, N, T, M, p, q, x=None, z=None):
    <source elided>
   
[1m    u = np.random.normal(size=(T, N))
[0m    [1m^[0m[0m


In [None]:
x = np.zeros((nt, 1))

NameError: name 'nt' is not defined

In [None]:
# Generate x and z if not provided
if q != 0:
    x = np.empty((N * T, q))
    for i in range(N * T):
        for j in range(q):
            x[i, j] = np.random.normal()  # Generate one value at a time
else:
    x = np.zeros((N * T, 1), dtype=np.float64)
    
if p != 0:
    z = np.empty((N * T, p))
    for i in range(N * T):
        for j in range(p):
            z[i, j] = np.random.normal()  # Generate one value at a time
else:
    z = np.zeros((N * T, 1), dtype=np.float64)

In [None]:
# Function to generate data
# ----------------------------------------------------------


@njit(parallel=True)
def generate_data_numba(alpha, mu, sigma, gam, beta, N, T, M, p, q):
    R = np.zeros((N, M))

    
    # Normalize alpha if necessary
    alpha_sum = np.sum(alpha)
    if alpha_sum != 1:
        alpha = alpha / alpha_sum
    
    # Check input consistency - Numba doesn't support exceptions like Python
    if len(alpha) != M or len(mu) != M:
        raise ValueError("M must be the size of alpha and mu")
    
    # Generate prior and initialize R
    prior = np.random.random(size=N)
    alpha_cum = np.zeros(M + 1)
    for m in range(M):
        alpha_cum[m + 1] = alpha_cum[m] + alpha[m]
    
    if M > 1:
        for m in range(M):
            lb = alpha_cum[m]
            ub = alpha_cum[m + 1]
            for n in prange(N):
                R[n, m] = 1 if lb < prior[n] <= ub else 0
    else:
        R[:] = 1

    # Initialize output arrays
    Y = np.zeros((T, N))
    
    # Generate x and z if not provided
    if q != 0:
        x = np.empty((N * T, q))
        for i in range(N * T):
            for j in range(q):
                x[i, j] = np.random.normal()  # Generate one value at a time
    else:
        x = np.zeros((N * T, 1), dtype=np.float64)
        
    if p != 0:
        z = np.empty((N * T, p))
        for i in range(N * T):
            for j in range(p):
                z[i, j] = np.random.normal()  # Generate one value at a time
    else:
        z = np.zeros((N * T, 1), dtype=np.float64)
        
    # Precompute dot products
    mu_R = np.dot(R, mu)  # Use np.dot for matrix multiplication
    sigma_R = np.dot(R, sigma)  # Use np.dot for matrix multiplication
    beta_R = np.dot(R, beta) 
   
    # Generate u array (workaround for np.random.normal with size)
    u = np.empty((T, N))
    for t in range(T):
        for n in range(N):
            u[t, n] = np.random.normal()  # Generate one value at a time

    # Generate Y
    for nn in prange(N):
        y_nn = np.zeros(T)
        y_nn = mu_R[nn] + sigma_R[nn] * u[:, nn]
        
        if q > 1:
            y_nn += np.dot(x[(T * nn):(T * (nn + 1)), :], beta_R[nn, :])
        elif q == 1:
            y_nn += x[(T * nn):(T * (nn + 1)), 0] * beta_R[nn]
        
        if p > 1:
            y_nn += np.dot(z[(T * nn):(T * (nn + 1)), :], gam)
        elif p == 1:
            y_nn += z[(T * nn):(T * (nn + 1)), 0] * gam
        
        Y[:, nn] = y_nn    
    return(R)
generate_data_numba(alpha, mu, sigma, gam, beta, N, T, M, p, q)

  y_nn += np.dot(x[(T * nn):(T * (nn + 1)), :], beta_R[nn, :])


TypingError: Failed in nopython mode pipeline (step: nopython frontend)
[1m[1m[1mNo implementation of function Function(<built-in function dot>) found for signature:
 
 >>> dot(array(float64, 2d, A), none)
 
There are 4 candidate implementations:
[1m      - Of which 2 did not match due to:
      Overload of function 'dot': File: numba/np/linalg.py: Line 525.
        With argument(s): '(array(float64, 2d, A), none)':[0m
[1m       No match.[0m
[1m      - Of which 2 did not match due to:
      Overload in function 'dot_3': File: numba/np/linalg.py: Line 784.
        With argument(s): '(array(float64, 2d, A), none)':[0m
[1m       Rejected as the implementation raised a specific error:
         TypingError: [1mmissing a required argument: 'out'[0m[0m
  raised from /root/anaconda3/lib/python3.12/site-packages/numba/core/typing/templates.py:783
[0m
[0m[1mDuring: resolving callee type: Function(<built-in function dot>)[0m
[0m[1mDuring: typing of call at <ipython-input-55-6e54e60e4af3> (77)
[0m
[1m
File "<ipython-input-55-6e54e60e4af3>", line 77:[0m
[1mdef generate_data_numba(alpha, mu, sigma, gam, beta, N, T, M, p, q):
    <source elided>
        if p > 1:
[1m            y_nn += np.dot(z[(T * nn):(T * (nn + 1)), :], gam)
[0m            [1m^[0m[0m


In [None]:
beta_R

array([], shape=(200, 0), dtype=float64)

In [None]:
R = np.zeros((N, M))


# Normalize alpha if necessary
alpha_sum = np.sum(alpha)
if alpha_sum != 1:
    alpha = alpha / alpha_sum

# Check input consistency - Numba doesn't support exceptions like Python
if len(alpha) != M or len(mu) != M:
    raise ValueError("M must be the size of alpha and mu")

# Generate prior and initialize R
prior = np.random.random(size=N)
alpha_cum = np.zeros(M + 1)
for m in range(M):
    alpha_cum[m + 1] = alpha_cum[m] + alpha[m]

if M > 1:
    for m in range(M):
        lb = alpha_cum[m]
        ub = alpha_cum[m + 1]
        for n in prange(N):
            R[n, m] = 1 if lb < prior[n] <= ub else 0
else:
    R[:] = 1

# Initialize output arrays
Y = np.zeros((T, N))

# Generate x and z if not provided
if q != 0:
    x = np.empty((N * T, q))
    for i in range(N * T):
        for j in range(q):
            x[i, j] = np.random.normal()  # Generate one value at a time
else:
    x = np.zeros((N * T, 1), dtype=np.float64)
    
if p != 0:
    z = np.empty((N * T, p))
    for i in range(N * T):
        for j in range(p):
            z[i, j] = np.random.normal()  # Generate one value at a time
else:
    z = np.zeros((N * T, 1), dtype=np.float64)
    
# Precompute dot products
mu_R = np.dot(R, mu)  # Use np.dot for matrix multiplication
sigma_R = np.dot(R, sigma)  # Use np.dot for matrix multiplication
beta_R = np.dot(R, beta) 
   
# Generate u array (workaround for np.random.normal with size)
u = np.empty((T, N))
for t in range(T):
    for n in range(N):
        u[t, n] = np.random.normal()  # Generate one value at a time

In [None]:
beta_R[nn, :]

NameError: name 'nn' is not defined

In [None]:
beta_R

array([[0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],

In [None]:
gamma

array([0.])

In [None]:
# Function to generate data
# ----------------------------------------------------------


@njit(parallel=True)
def generate_data_numba(alpha, mu, sigma, gamma, beta, N, T, M, p, q):
    R = np.zeros((N, M))

    
    # Normalize alpha if necessary
    alpha_sum = np.sum(alpha)
    if alpha_sum != 1:
        alpha = alpha / alpha_sum
    
    # Check input consistency - Numba doesn't support exceptions like Python
    if len(alpha) != M or len(mu) != M:
        raise ValueError("M must be the size of alpha and mu")
    
    # Generate prior and initialize R
    prior = np.random.random(size=N)
    alpha_cum = np.zeros(M + 1)
    for m in range(M):
        alpha_cum[m + 1] = alpha_cum[m] + alpha[m]
    
    if M > 1:
        for m in range(M):
            lb = alpha_cum[m]
            ub = alpha_cum[m + 1]
            for n in prange(N):
                R[n, m] = 1 if lb < prior[n] <= ub else 0
    else:
        R[:] = 1

    # Initialize output arrays
    Y = np.zeros((T, N))
    
    # Generate x and z if not provided
    if q != 0:
        x = np.empty((N * T, q))
        for i in range(N * T):
            for j in range(q):
                x[i, j] = np.random.normal()  # Generate one value at a time
    else:
        x = np.zeros((N * T, 1), dtype=np.float64)
        
    if p != 0:
        z = np.empty((N * T, p))
        for i in range(N * T):
            for j in range(p):
                z[i, j] = np.random.normal()  # Generate one value at a time
    else:
        z = np.zeros((N * T, 1), dtype=np.float64)
        
    # Precompute dot products
    mu_R = np.dot(R, mu)  # Use np.dot for matrix multiplication
    sigma_R = np.dot(R, sigma)  # Use np.dot for matrix multiplication
    beta_R = np.dot(R, beta) 
   
    # Generate u array (workaround for np.random.normal with size)
    u = np.empty((T, N))
    for t in range(T):
        for n in range(N):
            u[t, n] = np.random.normal()  # Generate one value at a time

    # Generate Y
    for nn in prange(N):
        y_nn = np.zeros(T)
        y_nn = mu_R[nn] + sigma_R[nn] * u[:, nn]
        
        y_nn += x[(T * nn):(T * (nn + 1)), :] @ beta_R[nn, :]
        y_nn += z[(T * nn):(T * (nn + 1)), 0] @ gam
        
        Y[:, nn] = y_nn    
    return(R)
generate_data_numba(alpha, mu, sigma, gam, beta, N, T, M, p, q)

  y_nn += x[(T * nn):(T * (nn + 1)), :] @ beta_R[nn, :]


TypingError: Failed in nopython mode pipeline (step: nopython frontend)
[1m[1mNo implementation of function Function(<built-in function matmul>) found for signature:
 
 >>> matmul(array(float64, 1d, A), none)
 
There are 2 candidate implementations:
[1m      - Of which 2 did not match due to:
      Overload of function 'matmul': File: numba/np/linalg.py: Line 533.
        With argument(s): '(array(float64, 1d, A), none)':[0m
[1m       No match.[0m
[0m
[0m[1mDuring: typing of intrinsic-call at <ipython-input-61-68be518caca9> (72)[0m
[1m
File "<ipython-input-61-68be518caca9>", line 72:[0m
[1mdef generate_data_numba(alpha, mu, sigma, gamma, beta, N, T, M, p, q):
    <source elided>
        y_nn += x[(T * nn):(T * (nn + 1)), :] @ beta_R[nn, :]
[1m        y_nn += z[(T * nn):(T * (nn + 1)), 0] @ gam
[0m        [1m^[0m[0m


In [None]:
x[(T * nn):(T * (nn + 1)), :] @ beta_R[nn, :]

NameError: name 'nn' is not defined

In [None]:
nn = 0 

In [None]:
x[(T * nn):(T * (nn + 1)), :] @ beta_R[nn, :]

array([0., 0., 0.])

In [None]:
y_nn = np.zeros(T)
y_nn = mu_R[nn] + sigma_R[nn] * u[:, nn]

y_nn += x[(T * nn):(T * (nn + 1)), :] @ beta_R[nn, :]
y_nn += z[(T * nn):(T * (nn + 1)), 0] @ gam

ValueError: matmul: Input operand 1 does not have enough dimensions (has 0, gufunc core with signature (n?,k),(k,m?)->(n?,m?) requires 1)

In [None]:
# Function to generate data
# ----------------------------------------------------------


@njit(parallel=True)
def generate_data_numba(alpha, mu, sigma, gamma, beta, N, T, M, p, q):
    R = np.zeros((N, M))

    
    # Normalize alpha if necessary
    alpha_sum = np.sum(alpha)
    if alpha_sum != 1:
        alpha = alpha / alpha_sum
    
    # Check input consistency - Numba doesn't support exceptions like Python
    if len(alpha) != M or len(mu) != M:
        raise ValueError("M must be the size of alpha and mu")
    
    # Generate prior and initialize R
    prior = np.random.random(size=N)
    alpha_cum = np.zeros(M + 1)
    for m in range(M):
        alpha_cum[m + 1] = alpha_cum[m] + alpha[m]
    
    if M > 1:
        for m in range(M):
            lb = alpha_cum[m]
            ub = alpha_cum[m + 1]
            for n in prange(N):
                R[n, m] = 1 if lb < prior[n] <= ub else 0
    else:
        R[:] = 1

    # Initialize output arrays
    Y = np.zeros((T, N))
    
    # Generate x and z if not provided
    if q != 0:
        x = np.empty((N * T, q))
        for i in range(N * T):
            for j in range(q):
                x[i, j] = np.random.normal()  # Generate one value at a time
    else:
        x = np.zeros((N * T, 1), dtype=np.float64)
        
    if p != 0:
        z = np.empty((N * T, p))
        for i in range(N * T):
            for j in range(p):
                z[i, j] = np.random.normal()  # Generate one value at a time
    else:
        z = np.zeros((N * T, 1), dtype=np.float64)
        
    # Precompute dot products
    mu_R = np.dot(R, mu)  # Use np.dot for matrix multiplication
    sigma_R = np.dot(R, sigma)  # Use np.dot for matrix multiplication
    beta_R = np.dot(R, beta) 
   
    # Generate u array (workaround for np.random.normal with size)
    u = np.empty((T, N))
    for t in range(T):
        for n in range(N):
            u[t, n] = np.random.normal()  # Generate one value at a time

    # Generate Y
    for nn in prange(N):
        y_nn = np.zeros(T)
        y_nn = mu_R[nn] + sigma_R[nn] * u[:, nn]
        
        y_nn += x[(T * nn):(T * (nn + 1)), :] @ beta_R[nn, :]
        y_nn += z[(T * nn):(T * (nn + 1)), :] @ gamma
        
        Y[:, nn] = y_nn    
    return(R)
generate_data_numba(alpha, mu, sigma, gamma, beta, N, T, M, p, q)

  y_nn += x[(T * nn):(T * (nn + 1)), :] @ beta_R[nn, :]


array([[0., 1.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [1., 0.],
       [1., 0.],
       [0., 1.],
       [1., 0.],
       [0., 1.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [0., 1.],
       [1., 0.],
       [0., 1.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [1., 0.],
       [1., 0.],
       [0., 1.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [0., 1.],
       [1., 0.],
       [0., 1.],
       [1., 0.],
       [0., 1.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [1., 0.],
       [0., 1.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [1., 0.

In [None]:
# Function to generate data
# ----------------------------------------------------------


@njit(parallel=True)
def generate_data_numba(alpha, mu, sigma, gamma, beta, N, T, M, p, q):
    R = np.zeros((N, M))

    
    # Normalize alpha if necessary
    alpha_sum = np.sum(alpha)
    if alpha_sum != 1:
        alpha = alpha / alpha_sum
    
    # Check input consistency - Numba doesn't support exceptions like Python
    if len(alpha) != M or len(mu) != M:
        raise ValueError("M must be the size of alpha and mu")
    
    # Generate prior and initialize R
    prior = np.random.random(size=N)
    alpha_cum = np.zeros(M + 1)
    for m in range(M):
        alpha_cum[m + 1] = alpha_cum[m] + alpha[m]
    
    if M > 1:
        for m in range(M):
            lb = alpha_cum[m]
            ub = alpha_cum[m + 1]
            for n in prange(N):
                R[n, m] = 1 if lb < prior[n] <= ub else 0
    else:
        R[:] = 1

    # Initialize output arrays
    Y = np.zeros((T, N))
    
    # Generate x and z if not provided
    if q != 0:
        x = np.empty((N * T, q))
        for i in range(N * T):
            for j in range(q):
                x[i, j] = np.random.normal()  # Generate one value at a time
    else:
        x = np.zeros((N * T, 1), dtype=np.float64)
        
    if p != 0:
        z = np.empty((N * T, p))
        for i in range(N * T):
            for j in range(p):
                z[i, j] = np.random.normal()  # Generate one value at a time
    else:
        z = np.zeros((N * T, 1), dtype=np.float64)
        
    # Precompute dot products
    mu_R = np.dot(R, mu)  # Use np.dot for matrix multiplication
    sigma_R = np.dot(R, sigma)  # Use np.dot for matrix multiplication
    beta_R = np.dot(R, beta) 
   
    # Generate u array (workaround for np.random.normal with size)
    u = np.empty((T, N))
    for t in range(T):
        for n in range(N):
            u[t, n] = np.random.normal()  # Generate one value at a time

    # Generate Y
    for nn in prange(N):
        y_nn = np.zeros(T)
        y_nn = mu_R[nn] + sigma_R[nn] * u[:, nn]
        
        y_nn += x[(T * nn):(T * (nn + 1)), :] @ beta_R[nn, :]
        y_nn += z[(T * nn):(T * (nn + 1)), :] @ gamma
        
        Y[:, nn] = y_nn    
    return(Y, z, x)

Y, z, x = generate_data_numba(alpha, mu, sigma, gamma, beta, N, T, M, p, q)

In [None]:
z

array([[0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],

In [None]:
x

array([[0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],

In [None]:
# Function to generate data
# ----------------------------------------------------------


@njit(parallel=True)
def generate_data(alpha, mu, sigma, gamma, beta, N, T, M, p, q):
    R = np.zeros((N, M))

    
    # Normalize alpha if necessary
    alpha_sum = np.sum(alpha)
    if alpha_sum != 1:
        alpha = alpha / alpha_sum
    
    # Check input consistency - Numba doesn't support exceptions like Python
    if len(alpha) != M or len(mu) != M:
        raise ValueError("M must be the size of alpha and mu")
    
    # Generate prior and initialize R
    prior = np.random.random(size=N)
    alpha_cum = np.zeros(M + 1)
    for m in range(M):
        alpha_cum[m + 1] = alpha_cum[m] + alpha[m]
    
    if M > 1:
        for m in range(M):
            lb = alpha_cum[m]
            ub = alpha_cum[m + 1]
            for n in prange(N):
                R[n, m] = 1 if lb < prior[n] <= ub else 0
    else:
        R[:] = 1

    # Initialize output arrays
    Y = np.zeros((T, N))
    
    # Generate x and z if not provided
    if q != 0:
        x = np.empty((N * T, q))
        for i in range(N * T):
            for j in range(q):
                x[i, j] = np.random.normal()  # Generate one value at a time
    else:
        x = np.zeros((N * T, 1), dtype=np.float64)
        
    if p != 0:
        z = np.empty((N * T, p))
        for i in range(N * T):
            for j in range(p):
                z[i, j] = np.random.normal()  # Generate one value at a time
    else:
        z = np.zeros((N * T, 1), dtype=np.float64)
        
    # Precompute dot products
    mu_R = np.dot(R, mu)  # Use np.dot for matrix multiplication
    sigma_R = np.dot(R, sigma)  # Use np.dot for matrix multiplication
    beta_R = np.dot(R, beta) 
   
    # Generate u array (workaround for np.random.normal with size)
    u = np.empty((T, N))
    for t in range(T):
        for n in range(N):
            u[t, n] = np.random.normal()  # Generate one value at a time

    # Generate Y
    for nn in prange(N):
        y_nn = np.zeros(T)
        y_nn = mu_R[nn] + sigma_R[nn] * u[:, nn]
        
        y_nn += x[(T * nn):(T * (nn + 1)), :] @ beta_R[nn, :]
        y_nn += z[(T * nn):(T * (nn + 1)), :] @ gamma
        
        Y[:, nn] = y_nn    
    return(Y, z, x)

# Y, z, x = generate_data(alpha, mu, sigma, gamma, beta, N, T, M, p, q)

In [None]:
Data = [generate_data(alpha, mu, sigma, gamma, beta, N, T, M, p, q) for _ in range(nrep)]

In [None]:
data_nopar = [data[0] for data in Data]

In [None]:
data_nopar

[array([[ 1.57747827e-01,  1.59030702e+00, -6.51700648e-01,
         -7.71793801e-01,  7.69132329e-01, -1.83889442e+00,
         -2.41177259e+00,  3.13320204e+00,  5.08823785e-01,
          1.16059898e+00,  9.63970807e-01,  4.81164292e-02,
         -5.97828771e-01, -3.53543026e-01, -2.72543020e-01,
          2.72837835e+00, -6.83572907e-02,  1.42497830e+00,
          3.83534180e+00, -1.63488545e+00,  1.94866390e+00,
          2.75405949e+00, -1.15171348e+00,  6.75581145e-01,
         -1.62493933e+00, -1.75122874e+00, -7.47743077e-01,
          8.39849777e-01,  3.39641762e-01, -5.82215764e-01,
          7.82485161e-01, -2.17602851e+00, -2.57666738e-01,
         -6.27662252e-01, -1.13859913e+00,  6.37447876e-02,
         -6.21545957e-01,  1.33630001e-01, -3.24165447e-01,
         -1.21864336e+00, -4.42746849e-01,  1.09148224e+00,
         -1.24615272e+00, -8.53063557e-01,  6.31307736e-01,
         -1.72626739e+00, -2.67735013e-01,  9.72304051e-01,
          4.18827152e-01, -8.05595050e-0

In [None]:
result_rk_each = NonParTestParallel(data_nopar, N, T, M, p, q, nrep, n_grid, BB, r_test)

NameError: name 'NonParTestParallel' is not defined

In [None]:
import numpy as np
from numba import njit, prange
from numba.typed import Dict, List
from numba.core import types

In [None]:
# Functions for Numba
# ----------------------------------------------------------
@njit
def invert_matrix(mat, epsilon=1e-8):
    """
    Numba-compatible function to compute the inverse of a square matrix.
    If the determinant is close to zero, the matrix is regularized by adding
    epsilon to the diagonal before inversion.

    Parameters:
        mat (ndarray): Input square matrix.
        epsilon (float): Small value added to the diagonal for regularization.

    Returns:
        ndarray: Inverse of the matrix.
    """
    # Ensure the matrix is square
    if mat.shape[0] != mat.shape[1]:
        # Numba cannot raise exceptions, so we return an empty array for invalid input
        return np.zeros_like(mat)
    
    # Compute the determinant
    det_val = np.linalg.det(mat)
    
    # Regularize the matrix if the determinant is close to zero
    if abs(det_val) < epsilon:
        mat = mat + np.eye(mat.shape[0]) * epsilon
    
    # Compute and return the inverse
    return np.linalg.inv(mat)


@njit
def min_along_axis_0(r):
    # Get the shape of the array
    rows, cols = r.shape
    
    # Initialize an array to store the minimum values for each column
    min_vals = np.empty(cols)
    
    # Iterate through each column
    for j in range(cols):
        # Initialize the minimum value for the current column
        min_val = r[0, j]
        
        # Iterate through each row in the current column
        for i in range(1, rows):
            if r[i, j] < min_val:
                min_val = r[i, j]
        
        # Store the minimum value for the column
        min_vals[j] = min_val
    
    return min_vals


@njit
def max_along_axis_1(matrix):
    """
    Compute the maximum along axis 1 for a 2D array.
    This replaces np.max(axis=1) for Numba compatibility.
    """
    n_rows, n_cols = matrix.shape
    max_values = np.empty(n_rows)  # Array to store max values for each row
    for i in range(n_rows):
        max_values[i] = -np.inf  # Initialize with negative infinity
        for j in range(n_cols):
            if matrix[i, j] > max_values[i]:
                max_values[i] = matrix[i, j]
    return max_values

@njit
def mean_along_axis_1(matrix):
    """
    Compute the mean along axis 1 for a 2D array.
    This replaces np.mean(array, axis=1) for Numba compatibility.
    """
    n_rows, n_cols = matrix.shape
    mean_values = np.empty(n_rows)  # Array to store mean values for each row
    for i in range(n_rows):
        row_sum = 0.0
        for j in range(n_cols):
            row_sum += matrix[i, j]
        mean_values[i] = row_sum / n_cols  # Compute mean for the row
    return mean_values


@njit
def compute_quantile(data, q):
    """
    Compute the q-th quantile manually.
    This replaces np.quantile for Numba compatibility.
    """
    sorted_data = np.sort(data)  # Sort the data
    idx = int(q * (len(sorted_data) - 1))  # Find the index for the quantile
    return sorted_data[idx]



@njit
def solve_linear_system_safe(A, b):
    """
    Safely solve the linear system Ax = b.
    If A is singular or nearly singular, return a default solution (e.g., zeros).
    """
    # Check if the matrix is singular
    det = np.linalg.det(A)
    if abs(det) < 1e-12:  # Threshold for singularity
        # Handle singular matrix (e.g., return zeros or raise an error)
        return np.zeros_like(b)  # Return a vector of zeros
    else:
        # Solve the system using np.linalg.solve
        return np.linalg.solve(A, b)
    
@njit
def solve_least_squares(A, b):
    """Solve the least squares problem Ax = b using the normal equation."""
    AtA = A.T @ A  # Compute A^T * A
    Atb = A.T @ b  # Compute A^T * b
    return solve_linear_system_safe(AtA, Atb)


@njit
def generate_random_uniform(low, high, size):
    """Generate random uniform samples using Numba."""
    out = np.empty(size)
    for i in range(size[0]):
        for j in range(size[1]):
            out[i, j] = low + (high - low) * np.random.random()
    return out

In [None]:
# Function to generate data
# ----------------------------------------------------------


@njit(parallel=True)
def generate_data(alpha, mu, sigma, gamma, beta, N, T, M, p, q):
    R = np.zeros((N, M))

    
    # Normalize alpha if necessary
    alpha_sum = np.sum(alpha)
    if alpha_sum != 1:
        alpha = alpha / alpha_sum
    
    # Check input consistency - Numba doesn't support exceptions like Python
    if len(alpha) != M or len(mu) != M:
        raise ValueError("M must be the size of alpha and mu")
    
    # Generate prior and initialize R
    prior = np.random.random(size=N)
    alpha_cum = np.zeros(M + 1)
    for m in range(M):
        alpha_cum[m + 1] = alpha_cum[m] + alpha[m]
    
    if M > 1:
        for m in range(M):
            lb = alpha_cum[m]
            ub = alpha_cum[m + 1]
            for n in prange(N):
                R[n, m] = 1 if lb < prior[n] <= ub else 0
    else:
        R[:] = 1

    # Initialize output arrays
    Y = np.zeros((T, N))
    
    # Generate x and z if not provided
    if q != 0:
        x = np.empty((N * T, q))
        for i in range(N * T):
            for j in range(q):
                x[i, j] = np.random.normal()  # Generate one value at a time
    else:
        x = np.zeros((N * T, 1), dtype=np.float64)
        
    if p != 0:
        z = np.empty((N * T, p))
        for i in range(N * T):
            for j in range(p):
                z[i, j] = np.random.normal()  # Generate one value at a time
    else:
        z = np.zeros((N * T, 1), dtype=np.float64)
        
    # Precompute dot products
    mu_R = np.dot(R, mu)  # Use np.dot for matrix multiplication
    sigma_R = np.dot(R, sigma)  # Use np.dot for matrix multiplication
    beta_R = np.dot(R, beta) 
   
    # Generate u array (workaround for np.random.normal with size)
    u = np.empty((T, N))
    for t in range(T):
        for n in range(N):
            u[t, n] = np.random.normal()  # Generate one value at a time

    # Generate Y
    for nn in prange(N):
        y_nn = np.zeros(T)
        y_nn = mu_R[nn] + sigma_R[nn] * u[:, nn]
        
        y_nn += x[(T * nn):(T * (nn + 1)), :] @ beta_R[nn, :]
        y_nn += z[(T * nn):(T * (nn + 1)), :] @ gamma
        
        Y[:, nn] = y_nn    
    return(Y, z, x)

In [None]:
def generate_data(alpha, mu, sigma, gam, beta, N, T, M, p, q, x=None, z=None):
    # print(f"N = {N}")
    # print(f"T = {T}")

    R = np.zeros((N, M))
    if sum(alpha) != 1:
        
        alpha = np.array(alpha) / sum(alpha)

    if len(alpha) != M or len(mu) != M:
        raise ValueError("M must be the size of alpha and mu")
    
    prior = np.random.uniform(size=N)
    alpha_cum = np.cumsum([0] + list(alpha))
    
    if M > 1:
        for m in range(M):
            lb = alpha_cum[m]
            ub = alpha_cum[m + 1]
            R[:, m] = ((prior > lb) & (prior <= ub)).astype(int)
    else:
        R = np.ones((N, M))

    Y = np.zeros((T, N))
    
    if q != 0 and x is None:
        x = np.random.normal(size=(N * T, q))
    if p != 0 and z is None:
        z = np.random.normal(size=(N * T, p))
    
    mu_R = np.dot(R, mu)
    sigma_R = np.dot(R, sigma)
    u = np.random.normal(size=(T, N))
    
    for nn in range(N):
        y_nn = np.zeros(T)
        y_nn = mu_R[nn] + sigma_R[nn] * u[:, nn]
        
        if q > 1:
            beta_R = np.dot(R, beta)
            y_nn += np.dot(x[(T * nn):(T * (nn + 1)), :], beta_R[nn, :])
        elif q == 1:
            beta_R = np.dot(R, np.ravel(beta))
            y_nn += x[(T * nn):(T * (nn + 1)), 0] * beta_R[nn]
        
        if p > 1:
            y_nn += np.dot(z[(T * nn):(T * (nn + 1)), :], gam)
        elif p == 1:
            y_nn += z[(T * nn):(T * (nn + 1)), 0] * gam
        
        Y[:, nn] = y_nn
    
    if p == 0:
        z = None
    if q == 0:
        x = None

    data_dict = {}
    data_dict['Y'] = Y
    data_dict['Z'] = z
    data_dict['X'] = x
    return data_dict

In [None]:
# Nonparametric test
# ----------------------------------------------------------

@njit
def create_indicator_list(data_c, T, N, n_bins):
    """
    Create a list of indicator matrices based on quantiles for each time period.
    """
    indicator_list = List()
    for t in range(T):
        # Calculate quantiles manually
        quantiles = np.empty(n_bins + 1)
        sorted_data = np.sort(data_c[t, :])
        for i in range(n_bins + 1):
            if i == 0:
                quantiles[i] = -np.inf
            elif i == n_bins:
                quantiles[i] = np.inf
            else:
                quantiles[i] = sorted_data[int(i * N / n_bins)]

        # Create indicator matrix
        indicator_matrix = np.zeros((N, n_bins))
        for n in range(N):
            for b in range(n_bins):
                if quantiles[b] <= data_c[t, n] < quantiles[b + 1]:
                    indicator_matrix[n, b] = 1
                    break
        indicator_list.append(indicator_matrix)
    return indicator_list

from numba import njit
from numba.typed import List
import numpy as np


@njit
def calculate_P_matrix(data_c, weights, n_grid=3, n_bins=2):
    """
    Calculate P matrices and Sigma matrices for triplets in a Numba-compatible way.
    """
    T = data_c.shape[0]
    N = data_c.shape[1]
    
    # Create `indicator_list_Y` with 2 bins
    indicator_list_Y = create_indicator_list(data_c, T, N, n_bins=n_bins)
    
    # Create `indicator_list_Y_ngrid` with `n_grid` bins
    indicator_list_Y_ngrid = create_indicator_list(data_c, T, N, n_bins=n_grid)
    
    # Initialize the result lists
    P_k_list = List()
    Sigma_P_k_list = List()
    
    # Iterate over the t periods
    for k in range(T):
        # Compute the Kronecker product for each row manually
        result_matrix = np.zeros((N, (n_bins ** (T - 1))))
        for n in range(N):
            # Manually compute Kronecker product for the row
            kron_result = np.array([1.0])  # Start with scalar 1.0
            for t in range(T):
                if t != k:
                    kron_result = np.kron(kron_result, indicator_list_Y[t][n, :])
            result_matrix[n, :] = kron_result
        
        # Compute P_k
        P_k = (weights * indicator_list_Y_ngrid[k].T) @ result_matrix
        P_k_list.append(P_k)
        
        # Compute Sigma_P_k
        P_k_vec = P_k.T.flatten()
        W_P_s = np.diag(P_k_vec) - np.outer(P_k_vec, P_k_vec)
        Sigma_P_k_list.append(W_P_s)

    
    return {
        "P_k_list": P_k_list,
        "Sigma_P_k_list": Sigma_P_k_list
    }



@njit
def compute_matrix_sqrt(U, S, VT):
    # Compute the square root of singular values
    sqrt_singular_values = np.sqrt(S)
    
    # Reconstruct the square root matrix
    sqrt_mat = U @ np.diag(sqrt_singular_values) @ VT
    return sqrt_mat

# Wrapper function to handle SVD outside Numba
def matrix_sqrt_svd(mat):
    if not isinstance(mat, np.ndarray):
        raise ValueError("Input must be a matrix (NumPy array).")
    
    if mat.shape[0] != mat.shape[1]:
        raise ValueError("Input must be a square matrix.")
    
    # Perform SVD decomposition (outside Numba)
    U, S, VT = np.linalg.svd(mat)
    
    # Compute the square root matrix (inside Numba)
    sqrt_mat = compute_matrix_sqrt(U, S, VT)
    return sqrt_mat


@njit
def matrix_sqrt(A):
    """Compute the square root of a matrix using eigen-decomposition."""
    # Eigen-decomposition of the matrix
    vals, vecs = np.linalg.eigh(A)
    # Compute the square root of eigenvalues
    sqrt_vals = np.sqrt(vals)
    # Reconstruct the matrix square root
    sqrt_A = vecs @ np.diag(sqrt_vals) @ vecs.T
    return sqrt_A



@njit
def compute_A_q_o(U_22, U_12):
    """Compute A_q_o."""
    sqrt_U22 = matrix_sqrt(U_22 @ U_22.T)
    inv_U22_T = invert_matrix(U_22.T)
    A_q_o = np.transpose(sqrt_U22 @ inv_U22_T @ np.hstack((U_12.T, U_22.T)))
    return A_q_o

@njit
def compute_B_q_o(V_22, V_12):
    """Compute B_q_o."""
    sqrt_V22 = matrix_sqrt(V_22 @ V_22.T)
    inv_V22_T = invert_matrix(V_22.T)
    B_q_o = sqrt_V22 @ inv_V22_T @ np.hstack((V_12.T, V_22.T))
    return B_q_o

@njit
def compute_kron_BA_o(B_q_o, A_q_o):
    """Compute the Kronecker product of B_q_o and A_q_o.T."""
    return np.kron(B_q_o, A_q_o.T)

@njit
def matrix_svd_decomposition(P, m):
    """
    Perform SVD decomposition and compute A_q_o, B_q_o, and Kronecker product.
    """
    # Perform SVD outside the Numba function
    U, S, VT = np.linalg.svd(P, full_matrices=True)
    V = VT.T
    
    # Submatrices of U and V
    U_12 = U[:m, m:]
    V_12 = V[:m, m:]
    U_22 = U[m:, m:]
    V_22 = V[m:, m:]
    
    # Compute A_q_o and B_q_o using Numba-compiled functions
    A_q_o = compute_A_q_o(U_22, U_12)
    B_q_o = compute_B_q_o(V_22, V_12)
    
    # Compute the Kronecker product
    kron_BA_o = compute_kron_BA_o(B_q_o, A_q_o)
    
    # Ensure all arrays are 2D
    S = S.reshape(-1, 1)  # Convert S to a 2D column vector
    U = np.atleast_2d(U)
    V = np.atleast_2d(V)
    U_12 = np.atleast_2d(U_12)
    V_12 = np.atleast_2d(V_12)
    U_22 = np.atleast_2d(U_22)
    V_22 = np.atleast_2d(V_22)
    A_q_o = np.atleast_2d(A_q_o)
    B_q_o = np.atleast_2d(B_q_o)
    kron_BA_o = np.atleast_2d(kron_BA_o)

    # Create a Numba-compatible dictionary
    numba_dict = Dict.empty(
        key_type=types.unicode_type,  # Keys are strings
        value_type=types.float64[:, :],  # Values are 2D arrays
    )

    # Add key-value pairs
    numba_dict["D"] = S
    numba_dict["U"] = U
    numba_dict["V"] = V
    numba_dict["U_12"] = U_12
    numba_dict["V_12"] = V_12
    numba_dict["U_22"] = U_22
    numba_dict["V_22"] = V_22
    numba_dict["A_q_o"] = A_q_o
    numba_dict["B_q_o"] = B_q_o
    numba_dict["kron_BA_o"] = kron_BA_o

    return numba_dict


    
@njit
def compute_rk_stat_given_P(P, Sigma_P, P_svd, m, n_size, lambda_c):
    """
    Compute statistical metrics for the "P" or "Q" transform case.
    """
    # Extract SVD components
    A_q_o = P_svd["A_q_o"]
    B_q_o = P_svd["B_q_o"]
    kron_BA_o = P_svd["kron_BA_o"]

    # Compute lambda_q
    lambda_q = A_q_o.T @ P @ B_q_o.T - lambda_c

    # Compute Omega_q
    Omega_q = kron_BA_o @ Sigma_P @ kron_BA_o.T

    # Compute rk_c
    lambda_q_flat = lambda_q.flatten()
    Omega_q_inv = invert_matrix(Omega_q)
    rk_c = n_size * (lambda_q_flat @ Omega_q_inv @ lambda_q_flat) 

    return lambda_q, Omega_q, rk_c

@njit
def compute_rk_information_criteria(rk_c, r, n_size):
    """
    Compute AIC, BIC, and HQ criteria.
    """
    AIC_c = rk_c - 2 * r
    BIC_c = rk_c - np.log(n_size) * r
    HQ_c = rk_c - 2 * np.log(np.log(n_size)) * r
    return AIC_c, BIC_c, HQ_c

@njit
def construct_stat_KP(P, Sigma_P, m, n_size, lambda_c=0):
    """
    Construct statistical metrics for the Kronecker Product and return 
    a Numba-compatible typed dictionary.
    """
    # Perform SVD decomposition
    P_svd = matrix_svd_decomposition(P, m)

    # Compute stats using Numba
    lambda_q, Omega_q, rk_c = compute_rk_stat_given_P(P, Sigma_P, P_svd, m, n_size, lambda_c)

    # Compute the rank (r)
    r = Omega_q.shape[0]

    # Compute AIC, BIC, and HQ using Numba
    AIC_c, BIC_c, HQ_c = compute_rk_information_criteria(rk_c, r, n_size)

    # Create a Numba-compatible dictionary to store results
    result_dict = Dict.empty(
        key_type=types.unicode_type,  # Keys are strings
        value_type=types.float64[:, :],  # Values are 2D arrays
    )

    # Add results to the dictionary
    result_dict["rk_c"] = np.array([[rk_c]])  # Scalars must be converted to 2D arrays
    result_dict["lambda_c"] = lambda_q
    result_dict["Omega_q"] = Omega_q
    result_dict["AIC_c"] = np.array([[AIC_c]])
    result_dict["BIC_c"] = np.array([[BIC_c]])
    result_dict["HQ_c"] = np.array([[HQ_c]])

    return result_dict


@njit
def NonParTestParallel(data_nopar, N, T, M, p, q, nrep, n_grid, BB, r_test):
    # Result array
    result_rk_each = np.zeros((nrep,2))
    for ii in range(nrep):
        # Generate synthetic data (replace with actual logic)
        data_c = data_nopar[ii]  # Example: Replace this with your `generate_data` logic
        # Initialize weights
        # Compute P and Sigma matrices
        data_P_W = calculate_P_matrix(data_c, weights_equal, n_grid=n_grid, n_bins=2)
        
        # Initialize results
        rk = np.zeros(T)
        lambda_c_list = List()
        omega_c = List()
        Sigma_P_list = List()
        P_k_list = List()
        
        # Loop through T periods to compute statistics
        for k in range(T):
            # Extract P_k and Sigma_P_k from the data_P_W object
            P_k = data_P_W["P_k_list"][k]
            Sigma_P_k = data_P_W["Sigma_P_k_list"][k]
            
            # Compute KP statistics for the k-th triplet
            stat_KP = construct_stat_KP(P_k, Sigma_P_k, r_test, N)
            
            # Store results
            rk[k] = stat_KP["rk_c"][0,0]
            lambda_c_list.append(stat_KP["lambda_c"])
            omega_c.append(stat_KP["Omega_q"])
            Sigma_P_list.append(Sigma_P_k)
            P_k_list.append(P_k)
        # Initialize result matrix
        rk_b = np.zeros((BB, T))
        
        # Smoothed Nonparametric Bootstrap
        ru = np.random.exponential(scale=1, size=(BB, N))  # Exponential random variables
        row_sums = ru.sum(axis=1).reshape(-1, 1)  # Reshape to keep dimensions
        ru /= row_sums
        
        for i in range(BB):
            # Calculate bootstrapped P and Sigma_P matrices
            data_P_W_b = calculate_P_matrix(data_c, ru[i, :], n_grid=n_grid, n_bins=2)
            
            for k in range(T):
                P_k = data_P_W_b['P_k_list'][k]
                Sigma_P_k = data_P_W_b['Sigma_P_k_list'][k]
                # Compute KP statistics for the k-th triplet
                rk_b[i, k] = construct_stat_KP(P_k, Sigma_P_k, r_test, N, lambda_c_list[k])['rk_c'][0,0]
        # Compute max and mean values for rk and rk_b
        rk_b_max = max_along_axis_1(rk_b)  # Maximum of rk_b along axis 1
        rk_b_max_95 = compute_quantile(rk_b_max, 0.95)  # 95th quantile of rk_b_max

        
        # Store results
        result_rk_each[ii, 0] = 1 * (rk.max() > rk_b_max_95)
        rk_mean = np.mean(rk)  # Mean of rk (Numba supports this)
        rk_b_mean = mean_along_axis_1(rk_b)  # Mean of rk_b along axis 1
        rk_b_mean_95 = compute_quantile(rk_b_mean, 0.95)  # 95th quantile of rk_b_mean
        result_rk_each[ii, 1] = 1 * (rk_mean > rk_b_mean_95)
    return result_rk_each

In [None]:
# LR test functions
# ----------------------------------------------------------


SINGULAR_EPS = 1e-10  # Criteria for matrix singularity
M_LN_SQRT_2PI = 0.9189385332046727  # log(sqrt(2*pi))

@njit
def process_array_or_none(arr, nt):
    if arr is None:  # Check for None
        return np.zeros((nt,0),dtype=np.float64)  # Default behavior for None
    return arr  # Process the array if it's valid


@njit
def log_likelihood_normal(y, mu, sigma):
    """
    Calculate the log-likelihood of the data under a normal distribution.

    Parameters:
    - y: array-like, the observed data points
    - mu: float, the mean of the normal distribution
    - sigma: float, the standard deviation of the normal distribution

    Returns:
    - log_likelihood: float, the log-likelihood value
    """
    n = len(y)
    term1 = -n / 2 * np.log(2 * np.pi)  # Constant term
    term2 = -n * np.log(sigma)  # Log of the standard deviation
    term3 = -1 / (2 * sigma**2) * np.sum((y - mu) ** 2)  # Data fitting term
    return term1 + term2 + term3

@njit
def log_likelihood_array(y, mu, sigma):
    """
    Calculate the log-likelihood of each element in the data under a normal distribution.

    Parameters:
    - y: array-like, the observed data points
    - mu: float, the mean of the normal distribution
    - sigma: float, the standard deviation of the normal distribution

    Returns:
    - log_likelihoods: array, log-likelihood of each data point
    """
    # Precompute constants
    constant = -0.5 * np.log(2 * np.pi)
    variance = sigma ** 2

    # Initialize result array
    log_likelihoods = np.empty(len(y))

    # Compute log-likelihood for each data point
    for i in range(len(y)):
        log_likelihoods[i] = (
            constant 
            - np.log(sigma) 
            - ((y[i] - mu) ** 2) / (2 * variance)
        )
    
    return log_likelihoods


@njit
def compute_residual_normal_reg(m, n, t, sigma_jn, ytilde, mubeta_jn):
    """
    Compute residuals for the EM optimization loop in a Numba-compatible way.

    Parameters:
    - m: Number of components (int)
    - n: Number of groups (int)
    - t: Number of time points per group (int)
    - sigma_jn: Array of current sigma values (1D array of floats, shape (m,))
    - ytilde: Adjusted response variable (1D array of floats, shape (n * t,))
    - mubeta_jn: Array of current beta means (1D array of floats, shape (m,))

    Returns:
    - r: Residuals array (2D array of floats, shape (m, n))
    """
    # Initialize the residuals array
    r = np.zeros((m, n), dtype=np.float64)

    # Loop over each component (m)
    for j in range(m):
        
        # Loop over each group (n)
        for i in range(n):
            sum_r_t = 0.0

            # Loop over each time point within the group (t)
            for k in range(t):
                idx = i * t + k  # Compute the flattened index
                diff = ytilde[idx] - mubeta_jn[j]
                r_t = (1.0 / sigma_jn[j]) * diff
                sum_r_t += 0.5 * (r_t**2)

            # Compute residual for group i and component j
            r[j, i] = t * np.log(sigma_jn[j]) + sum_r_t
    return r


@njit
def EM_optimization(y, x, z, p, q, sigma_0, alpha_draw, mubeta_draw, sigma_draw, gamma_draw, m, t, an, maxit=2000, tol=1e-8, tau = 0.5, epsilon=0.05):
    
    nt = len(y)
    n = nt // t

    ninits = alpha_draw.shape[1]
    # Handle x
    if q == 0:
        x1 = np.ones((nt, 1))
        q1 = 1
    else:
        x1 = np.zeros((nt, x.shape[1] + 1))
        x1[:, 0] = 1  # Add intercept
        x1[:, 1:] = x
        q1 = x1.shape[1]
    
    # Initialize variables
    lb = np.zeros(m)
    ub = np.zeros(m)
    l_j = np.zeros(m)
    w = np.zeros((m, nt))
    post = np.zeros((m * n, ninits))
    notcg = np.zeros(ninits)
    penloglikset = np.zeros(ninits)
    loglikset = np.zeros(ninits)
    
    
    for jn in range(ninits):
        alpha_jn = alpha_draw[:, jn]
        mubeta_jn = mubeta_draw[:, jn]
        sigma_jn = sigma_draw[:, jn]
        gamma_jn = gamma_draw[:, jn]  # Likely float64
    
        oldpenloglik = -np.inf
        emit = 0
        diff = 1.0
        sing = 0
        
        for iter_ii in range(maxit):
            ll = -nt * M_LN_SQRT_2PI
            
            if p > 0:
                ytilde = y - np.dot(z, gamma_jn)
            else:
                ytilde = y
            
            r = compute_residual_normal_reg(m, n, t, sigma_jn, ytilde, mubeta_jn)
            
            minr = min_along_axis_0(r)
            
            # Initialize arrays
            l_j = np.zeros((m,n))  # Same shape as `r`
            sum_l_j = np.zeros(n)   # Sum along axis 0
            w = np.zeros((m,n))    # Weights
            ll = 0.0                # Log-likelihood accumulator

            # Compute l_j = alpha_jn[:, None] * exp(minr - r)
            for i in range(n):
                for j in range(m):
                    l_j[j, i] = alpha_jn[j] * np.exp(minr[i] - r[j,i])
            
            # Compute sum_l_j = np.sum(l_j, axis=0)
            for j in range(m):
                for i in range(n):
                    sum_l_j[i] += l_j[j, i]
            
            # Compute w = l_j / sum_l_j
            for i in range(n):
                for j in range(m):
                    w[j, i] = l_j[j, i] / sum_l_j[i]
            
            # Compute ll += np.sum(np.log(sum_l_j) - minr)
            for i in range(n):
                ll += np.log(sum_l_j[i]) - minr[i]
            
            penloglik = ll + np.log(2.0) + min(np.log(tau), np.log(1 - tau))
            
            for j in range(m):
                s0j = sigma_0[j] / sigma_jn[j]
                penloglik += -an * (s0j**2 - 2.0 * np.log(s0j) - 1.0)
                penloglik += min(np.log(alpha_jn[j]), np.log(1 - alpha_jn[j]))
            diff = penloglik - oldpenloglik
            oldpenloglik = penloglik
            emit += 1
            
            # Update parameters
            mubeta_jn_mat = np.zeros((m,q1),dtype=np.float64)
            wtilde = np.zeros(nt)
            for j in range(m):
                alpha_jn[j] = np.mean(w[j, :])
                wtilde = w[j, :].T
                w_j = np.zeros(nt)
                for i in range(n):
                    w_j[i * t : (i + 1) * t] = wtilde[i]
                xtilde = np.zeros((nt, q1))
                for ii in range(q1):
                    xtilde[:, ii] = w_j * x1[:, ii]
                # design_matrix = xtilde.T @ x1
                # solve_linear_system_safe(xtilde.T @ x1, xtilde.T @ ytilde)
                # xtilde.T @ ytilde
                mubeta_jn_mat[j,:] = solve_linear_system_safe(xtilde.T @ x1, xtilde.T @ ytilde)
                ssr_j = np.sum(w_j * (ytilde - x1 @ mubeta_jn_mat[j,:])**2)
                sigma_jn[j] = np.sqrt((ssr_j + 2.0 * an * sigma_0[j]**2) / (np.sum(w_j) + 2.0 * an))
                sigma_jn[j] = max(sigma_jn[j], epsilon * sigma_0[j])
            
            # update alpha
            total_alpha = np.sum(alpha_jn)
            for j in range(m):
                alpha_jn[j] = max(0.01, alpha_jn[j] / total_alpha)
            
            # update gamma
            if p > 0:
                ztilde = np.zeros((nt, p), dtype=np.float64) 
                zz = np.zeros((p, p), dtype=np.float64) 
                ze = np.zeros((p, 1), dtype=np.float64) 
                for j in range(m):
                    wtilde = w[j, :]
                    w_j = np.zeros(nt)
                    for i in range(n):
                        w_j[i * t : (i + 1) * t] = wtilde[i]
                    for ii in range(p):
                        ztilde[:, ii] = w_j * z[:, ii]
                    zz += ztilde.T @ z / (sigma_jn[j]**2)
                    ze += ztilde.T @( y - x1 @ mubeta_jn_mat[j,:]) / (sigma_jn[j]**2)
                gamma_jn = solve_linear_system_safe(zz,ze).flatten()
            
        penloglikset[jn] = penloglik
        loglikset[jn] = ll
        post[:, jn] = w.T.flatten()
        alpha_draw[:, jn] = alpha_jn
        mubeta_draw[:, jn] = mubeta_jn_mat.T.flatten()
        sigma_draw[:, jn] = sigma_jn
        if p > 0:
            gamma_draw[:, jn] = gamma_jn
    return(alpha_draw,mubeta_draw,sigma_draw,gamma_draw,penloglikset, loglikset ,post)

In [None]:
@njit
def regpanelmixPMLE(y,x,z, p, q, m, ninits=10, epsilon=1e-8, maxit=2000, epsilon_short=1e-2, maxit_short=500): 
    
    t,n = y.shape
    nt = n * t
    y = y.T.flatten()
    
    # y.reshape((n,t)).T - data_lr[0][0] # check equivalence
    # Handle x
    
    x1 = np.hstack((np.ones((nt, 1)), x))
    q1 = q + 1
    

    xz = np.hstack((x1, z))
    
    out_coef = solve_least_squares(xz, y)  # Replace np.linalg.lstsq
    residuals = y - xz @ out_coef
    stdR = np.std(residuals)
    npar = m - 1 + (q1 + 1) * m + p
    ninits_short = ninits * 10 * (q1 + p) * m
    
    if (m == 1) :
        mubeta = out_coef[:q1]
        if p > 0:
            gamma = out_coef[q1:(q1 + p)]
        else:
            gamma = np.array([0.0])
        res = y - xz @ out_coef
        sigma = np.sqrt(np.mean(res**2))
        loglik = log_likelihood_normal(res,0,sigma)

        aic = -2 * loglik + 2 * npar
        bic = -2 * loglik + np.log(n) * npar
        penloglik = loglik
        alpha = np.array([1])
        postprobs = np.ones(n)
    else: 
        # First draw random start point
        if p > 0:
            gamma = out_coef[q1:(q1 + p)]
            # Perform least squares regression with both x and z
            gamma_draw = generate_random_uniform(0.5, 1.5, (p, ninits_short)) * gamma
            mubeta_hat = out_coef[:q1]
            y = y - z @ gamma
        else:
            # Perform least squares regression with x only
            
            gamma = np.array([0.0])
            mubeta_hat = out_coef
            gamma_draw = np.zeros((1,ninits_short), dtype=np.float64)

        # Initialize alpha
        alpha_draw = generate_random_uniform(0, 1, (m, ninits_short))
        alpha_draw = (alpha_draw / np.sum(alpha_draw, axis=0))

        # Initialize mubeta
        if q > 0:
            minMU = np.min(y - x @ mubeta_hat[1:])
            maxMU = np.max(y - x @ mubeta_hat[1:])
            mubeta_draw = np.zeros((q1 * m, ninits_short))
            for j in range(m):
                mubeta_draw[q1 * j, :] = np.random.uniform(minMU, maxMU, size=ninits_short)
                for i in range(1, q1):
                    mubeta_draw[q1 * j + i, :] = mubeta_hat[i] * np.random.uniform(-2, 2, size=ninits_short)
        else:
            minMU = np.min(y)
            maxMU = np.max(y)
            mubeta_draw = np.zeros((q1 * m, ninits_short))
            for j in range(m):
                mubeta_draw[q1 * j, :] = np.random.uniform(minMU, maxMU, size=ninits_short)
        
        an = 1 / n    
        sigma_0 = np.full(m, stdR)
    
        # Initialize sigma
        sigma_draw = generate_random_uniform(0.01, 1, (m, ninits_short)) * stdR
        
        alpha_draw,mubeta_draw,sigma_draw,gamma_draw,penloglikset, loglikset, post = EM_optimization(y, x, z, p, q, sigma_0, alpha_draw, mubeta_draw, sigma_draw, gamma_draw, m, t, an, maxit=maxit_short, tol=epsilon_short)
        
        # 
        components = np.argsort(penloglikset)[::-1][:ninits]
        alpha_draw = alpha_draw[:,components]
        mubeta_draw = mubeta_draw[:,components]
        sigma_draw = sigma_draw[:,components]
        gamma_draw = gamma_draw[:,components]
        
        
        alpha_draw,mubeta_draw,sigma_draw,gamma_draw,penloglikset, loglikset, post = EM_optimization(y, x, z, p, q, sigma_0, alpha_draw, mubeta_draw, sigma_draw, gamma_draw, m, t, an, maxit=maxit, tol=epsilon)
        
        index = np.argmax(penloglikset)
        alpha_hat = alpha_draw[:,index]
        mubeta_hat = mubeta_draw[:,index]
        sigma_hat = sigma_draw[:,index]
        gamma_hat = gamma_draw[:,index]
        post = post[:, index]
        penloglik = penloglikset[index]
        loglik = loglikset[index]
        aic = -2 * loglik + 2 * npar
        bic = -2 * loglik + np.log(n) * npar
        
        return(penloglik)
    
        # return(penloglik, loglik, aic, bic, alpha_hat, mubeta_hat, sigma_hat, gamma_hat, post)

In [None]:
@njit(parallel=True)
def compute_lr_stat(y_lr, x_lr, z_lr, m, p, q, nrep):
    
    # Preallocate lr_stat as a 1D array (Numba-compatible)
    lr_stat = np.zeros(nrep, dtype=np.float64)

    for ii in prange(nrep):
        # Extract y, x, z from data_lr (passed as separate arrays in Numba)
        y = y_lr[ii]  # y for replication ii
        x = x_lr[ii]  # x for replication ii
        z = z_lr[ii]  # z for replication ii

        # Call regpanelmixPMLE for m components
        penloglik = regpanelmixPMLE(y,x,z, p, q, m, ninits=1)
        
        # Call regpanelmixPMLE for m+1 components
        penloglik_m1 = regpanelmixPMLE(y, x, z, p, q, m + 1, ninits=1)

        # Compute likelihood ratio statistic
        lr_stat[ii] = -2 * (penloglik_m1 - penloglik)
    return lr_stat

In [None]:
# Simulation
# ------------------------------------------
   
import time
# Input Parameters
Nset = [200, 400]
Tset = [3, 5, 8]
alphaset = [np.array([0.5, 0.5]), np.array([0.2, 0.8])]
muset = [np.array([-1.0, 1.0]), np.array([-0.5, 0.5])]
sigmaset = [np.array([0.8, 1.2])]
beta = np.array([[0.0],[0.0]])
gamma = np.array([0.0])

# Test panel mixture
N = 200
T = 3
M = 2
p = 0
q = 0
nrep = 100
BB = 199

alpha = alphaset[0]
mu = muset[0]
sigma = sigmaset[0]
n_grid=3
r_test=2
# Generate data
weights_equal = np.full(N, 1 / N)

In [None]:
result_rk_each = NonParTestParallel(data_nopar, N, T, M, p, q, nrep, n_grid, BB, r_test)

  stat_KP = construct_stat_KP(P_k, Sigma_P_k, r_test, N)
  stat_KP = construct_stat_KP(P_k, Sigma_P_k, r_test, N)
  stat_KP = construct_stat_KP(P_k, Sigma_P_k, r_test, N)
  stat_KP = construct_stat_KP(P_k, Sigma_P_k, r_test, N)


In [None]:
# Loop over parameters
for alpha in alphaset:
    for mu in muset:       
        start_time = time.time()
        result_rk_each = np.zeros((nrep,2))
        Data = [generate_data(alpha, mu, sigma, gamma, beta, N, T, M, p, q) for _ in range(nrep)]
        data_nopar = [data[0] for data in Data]
        # Nonparametric test
        result_rk_each = NonParTestParallel(data_nopar, N, T, M, p, q, nrep, n_grid, BB, r_test)
        print("Excution time",time.time()- start_time)
        simulation_result_matrix[count,:] = result_rk_each.mean(axis=0)
        count+=1 
print(simulation_result_matrix)

KeyError: 0

In [None]:
beta

array([[0.],
       [0.]])

In [None]:
gamma

array([0.])

In [None]:
sigma

array([0.8, 1.2])

In [None]:
# Loop over parameters
for alpha in alphaset:
    for mu in muset:       
        start_time = time.time()
        result_rk_each = np.zeros((nrep,2))
        Data = [generate_data(alpha, mu, sigma, gamma, beta, N, T, M, p, q) for _ in range(nrep)]
        data_nopar = [data[0] for data in Data]
        # Nonparametric test
        result_rk_each = NonParTestParallel(data_nopar, N, T, M, p, q, nrep, n_grid, BB, r_test)
        print("Excution time",time.time()- start_time)
        simulation_result_matrix[count,:] = result_rk_each.mean(axis=0)
        count+=1 
print(simulation_result_matrix)

KeyError: 0

In [None]:
generate_data(alpha, mu, sigma, gamma, beta, N, T, M, p, q) for _ in range(nrep)

SyntaxError: invalid syntax (<ipython-input-90-b07a44ec02a5>, line 1)

In [None]:
generate_data(alpha, mu, sigma, gamma, beta, N, T, M, p, q)

{'Y': array([[ 0.55938868,  0.75295271, -1.59735568,  1.9377656 , -0.26925169,
          1.45164204, -1.38511682,  2.86013277,  2.38249853, -1.36494074,
          1.80088113, -0.64963326,  1.89716878,  0.97512372, -1.08778767,
          2.1484804 , -0.8344804 ,  1.44312751, -0.44556173,  1.50290104,
         -0.02704006, -1.1263802 ,  3.77951883,  1.07468073, -2.32383632,
          0.78133943, -1.27389909, -0.4438059 ,  0.73760353,  2.55225652,
         -0.8040908 ,  2.94431612,  0.44091451,  3.76015015, -0.36094213,
         -2.43693092, -1.07738881,  1.95037813,  1.35300752,  2.47293605,
          0.87419315, -0.35230297,  2.13151967, -0.0831498 ,  0.58029449,
         -0.71286602,  1.56839081,  1.13715637,  1.12284964, -2.07956649,
          1.43366781, -1.18372422,  1.23213235, -1.67264692, -1.68454921,
         -0.35216297,  0.51865705, -0.35101215,  2.97111626, -3.05063052,
          2.3941865 ,  0.19471007, -1.14280085,  0.1293662 , -1.83867184,
          0.07053212, -0.17230397

In [None]:
import numpy as np
from numba import njit, prange
from numba.typed import Dict, List
from numba.core import types

In [None]:
# Functions for Numba
# ----------------------------------------------------------
@njit
def invert_matrix(mat, epsilon=1e-8):
    """
    Numba-compatible function to compute the inverse of a square matrix.
    If the determinant is close to zero, the matrix is regularized by adding
    epsilon to the diagonal before inversion.

    Parameters:
        mat (ndarray): Input square matrix.
        epsilon (float): Small value added to the diagonal for regularization.

    Returns:
        ndarray: Inverse of the matrix.
    """
    # Ensure the matrix is square
    if mat.shape[0] != mat.shape[1]:
        # Numba cannot raise exceptions, so we return an empty array for invalid input
        return np.zeros_like(mat)
    
    # Compute the determinant
    det_val = np.linalg.det(mat)
    
    # Regularize the matrix if the determinant is close to zero
    if abs(det_val) < epsilon:
        mat = mat + np.eye(mat.shape[0]) * epsilon
    
    # Compute and return the inverse
    return np.linalg.inv(mat)


@njit
def min_along_axis_0(r):
    # Get the shape of the array
    rows, cols = r.shape
    
    # Initialize an array to store the minimum values for each column
    min_vals = np.empty(cols)
    
    # Iterate through each column
    for j in range(cols):
        # Initialize the minimum value for the current column
        min_val = r[0, j]
        
        # Iterate through each row in the current column
        for i in range(1, rows):
            if r[i, j] < min_val:
                min_val = r[i, j]
        
        # Store the minimum value for the column
        min_vals[j] = min_val
    
    return min_vals


@njit
def max_along_axis_1(matrix):
    """
    Compute the maximum along axis 1 for a 2D array.
    This replaces np.max(axis=1) for Numba compatibility.
    """
    n_rows, n_cols = matrix.shape
    max_values = np.empty(n_rows)  # Array to store max values for each row
    for i in range(n_rows):
        max_values[i] = -np.inf  # Initialize with negative infinity
        for j in range(n_cols):
            if matrix[i, j] > max_values[i]:
                max_values[i] = matrix[i, j]
    return max_values

@njit
def mean_along_axis_1(matrix):
    """
    Compute the mean along axis 1 for a 2D array.
    This replaces np.mean(array, axis=1) for Numba compatibility.
    """
    n_rows, n_cols = matrix.shape
    mean_values = np.empty(n_rows)  # Array to store mean values for each row
    for i in range(n_rows):
        row_sum = 0.0
        for j in range(n_cols):
            row_sum += matrix[i, j]
        mean_values[i] = row_sum / n_cols  # Compute mean for the row
    return mean_values


@njit
def compute_quantile(data, q):
    """
    Compute the q-th quantile manually.
    This replaces np.quantile for Numba compatibility.
    """
    sorted_data = np.sort(data)  # Sort the data
    idx = int(q * (len(sorted_data) - 1))  # Find the index for the quantile
    return sorted_data[idx]



@njit
def solve_linear_system_safe(A, b):
    """
    Safely solve the linear system Ax = b.
    If A is singular or nearly singular, return a default solution (e.g., zeros).
    """
    # Check if the matrix is singular
    det = np.linalg.det(A)
    if abs(det) < 1e-12:  # Threshold for singularity
        # Handle singular matrix (e.g., return zeros or raise an error)
        return np.zeros_like(b)  # Return a vector of zeros
    else:
        # Solve the system using np.linalg.solve
        return np.linalg.solve(A, b)
    
@njit
def solve_least_squares(A, b):
    """Solve the least squares problem Ax = b using the normal equation."""
    AtA = A.T @ A  # Compute A^T * A
    Atb = A.T @ b  # Compute A^T * b
    return solve_linear_system_safe(AtA, Atb)


@njit
def generate_random_uniform(low, high, size):
    """Generate random uniform samples using Numba."""
    out = np.empty(size)
    for i in range(size[0]):
        for j in range(size[1]):
            out[i, j] = low + (high - low) * np.random.random()
    return out

In [None]:
# Function to generate data
# ----------------------------------------------------------


@njit(parallel=True)
def generate_data(alpha, mu, sigma, gamma, beta, N, T, M, p, q):
    R = np.zeros((N, M))

    
    # Normalize alpha if necessary
    alpha_sum = np.sum(alpha)
    if alpha_sum != 1:
        alpha = alpha / alpha_sum
    
    # Check input consistency - Numba doesn't support exceptions like Python
    if len(alpha) != M or len(mu) != M:
        raise ValueError("M must be the size of alpha and mu")
    
    # Generate prior and initialize R
    prior = np.random.random(size=N)
    alpha_cum = np.zeros(M + 1)
    for m in range(M):
        alpha_cum[m + 1] = alpha_cum[m] + alpha[m]
    
    if M > 1:
        for m in range(M):
            lb = alpha_cum[m]
            ub = alpha_cum[m + 1]
            for n in prange(N):
                R[n, m] = 1 if lb < prior[n] <= ub else 0
    else:
        R[:] = 1

    # Initialize output arrays
    Y = np.zeros((T, N))
    
    # Generate x and z if not provided
    if q != 0:
        x = np.empty((N * T, q))
        for i in range(N * T):
            for j in range(q):
                x[i, j] = np.random.normal()  # Generate one value at a time
    else:
        x = np.zeros((N * T, 1), dtype=np.float64)
        
    if p != 0:
        z = np.empty((N * T, p))
        for i in range(N * T):
            for j in range(p):
                z[i, j] = np.random.normal()  # Generate one value at a time
    else:
        z = np.zeros((N * T, 1), dtype=np.float64)
        
    # Precompute dot products
    mu_R = np.dot(R, mu)  # Use np.dot for matrix multiplication
    sigma_R = np.dot(R, sigma)  # Use np.dot for matrix multiplication
    beta_R = np.dot(R, beta) 
   
    # Generate u array (workaround for np.random.normal with size)
    u = np.empty((T, N))
    for t in range(T):
        for n in range(N):
            u[t, n] = np.random.normal()  # Generate one value at a time

    # Generate Y
    for nn in prange(N):
        y_nn = np.zeros(T)
        y_nn = mu_R[nn] + sigma_R[nn] * u[:, nn]
        
        y_nn += x[(T * nn):(T * (nn + 1)), :] @ beta_R[nn, :]
        y_nn += z[(T * nn):(T * (nn + 1)), :] @ gamma
        
        Y[:, nn] = y_nn    
    return(Y, z, x)

In [None]:
# Nonparametric test
# ----------------------------------------------------------

@njit
def create_indicator_list(data_c, T, N, n_bins):
    """
    Create a list of indicator matrices based on quantiles for each time period.
    """
    indicator_list = List()
    for t in range(T):
        # Calculate quantiles manually
        quantiles = np.empty(n_bins + 1)
        sorted_data = np.sort(data_c[t, :])
        for i in range(n_bins + 1):
            if i == 0:
                quantiles[i] = -np.inf
            elif i == n_bins:
                quantiles[i] = np.inf
            else:
                quantiles[i] = sorted_data[int(i * N / n_bins)]

        # Create indicator matrix
        indicator_matrix = np.zeros((N, n_bins))
        for n in range(N):
            for b in range(n_bins):
                if quantiles[b] <= data_c[t, n] < quantiles[b + 1]:
                    indicator_matrix[n, b] = 1
                    break
        indicator_list.append(indicator_matrix)
    return indicator_list

from numba import njit
from numba.typed import List
import numpy as np


@njit
def calculate_P_matrix(data_c, weights, n_grid=3, n_bins=2):
    """
    Calculate P matrices and Sigma matrices for triplets in a Numba-compatible way.
    """
    T = data_c.shape[0]
    N = data_c.shape[1]
    
    # Create `indicator_list_Y` with 2 bins
    indicator_list_Y = create_indicator_list(data_c, T, N, n_bins=n_bins)
    
    # Create `indicator_list_Y_ngrid` with `n_grid` bins
    indicator_list_Y_ngrid = create_indicator_list(data_c, T, N, n_bins=n_grid)
    
    # Initialize the result lists
    P_k_list = List()
    Sigma_P_k_list = List()
    
    # Iterate over the t periods
    for k in range(T):
        # Compute the Kronecker product for each row manually
        result_matrix = np.zeros((N, (n_bins ** (T - 1))))
        for n in range(N):
            # Manually compute Kronecker product for the row
            kron_result = np.array([1.0])  # Start with scalar 1.0
            for t in range(T):
                if t != k:
                    kron_result = np.kron(kron_result, indicator_list_Y[t][n, :])
            result_matrix[n, :] = kron_result
        
        # Compute P_k
        P_k = (weights * indicator_list_Y_ngrid[k].T) @ result_matrix
        P_k_list.append(P_k)
        
        # Compute Sigma_P_k
        P_k_vec = P_k.T.flatten()
        W_P_s = np.diag(P_k_vec) - np.outer(P_k_vec, P_k_vec)
        Sigma_P_k_list.append(W_P_s)

    
    return {
        "P_k_list": P_k_list,
        "Sigma_P_k_list": Sigma_P_k_list
    }



@njit
def compute_matrix_sqrt(U, S, VT):
    # Compute the square root of singular values
    sqrt_singular_values = np.sqrt(S)
    
    # Reconstruct the square root matrix
    sqrt_mat = U @ np.diag(sqrt_singular_values) @ VT
    return sqrt_mat

# Wrapper function to handle SVD outside Numba
def matrix_sqrt_svd(mat):
    if not isinstance(mat, np.ndarray):
        raise ValueError("Input must be a matrix (NumPy array).")
    
    if mat.shape[0] != mat.shape[1]:
        raise ValueError("Input must be a square matrix.")
    
    # Perform SVD decomposition (outside Numba)
    U, S, VT = np.linalg.svd(mat)
    
    # Compute the square root matrix (inside Numba)
    sqrt_mat = compute_matrix_sqrt(U, S, VT)
    return sqrt_mat


@njit
def matrix_sqrt(A):
    """Compute the square root of a matrix using eigen-decomposition."""
    # Eigen-decomposition of the matrix
    vals, vecs = np.linalg.eigh(A)
    # Compute the square root of eigenvalues
    sqrt_vals = np.sqrt(vals)
    # Reconstruct the matrix square root
    sqrt_A = vecs @ np.diag(sqrt_vals) @ vecs.T
    return sqrt_A



@njit
def compute_A_q_o(U_22, U_12):
    """Compute A_q_o."""
    sqrt_U22 = matrix_sqrt(U_22 @ U_22.T)
    inv_U22_T = invert_matrix(U_22.T)
    A_q_o = np.transpose(sqrt_U22 @ inv_U22_T @ np.hstack((U_12.T, U_22.T)))
    return A_q_o

@njit
def compute_B_q_o(V_22, V_12):
    """Compute B_q_o."""
    sqrt_V22 = matrix_sqrt(V_22 @ V_22.T)
    inv_V22_T = invert_matrix(V_22.T)
    B_q_o = sqrt_V22 @ inv_V22_T @ np.hstack((V_12.T, V_22.T))
    return B_q_o

@njit
def compute_kron_BA_o(B_q_o, A_q_o):
    """Compute the Kronecker product of B_q_o and A_q_o.T."""
    return np.kron(B_q_o, A_q_o.T)

@njit
def matrix_svd_decomposition(P, m):
    """
    Perform SVD decomposition and compute A_q_o, B_q_o, and Kronecker product.
    """
    # Perform SVD outside the Numba function
    U, S, VT = np.linalg.svd(P, full_matrices=True)
    V = VT.T
    
    # Submatrices of U and V
    U_12 = U[:m, m:]
    V_12 = V[:m, m:]
    U_22 = U[m:, m:]
    V_22 = V[m:, m:]
    
    # Compute A_q_o and B_q_o using Numba-compiled functions
    A_q_o = compute_A_q_o(U_22, U_12)
    B_q_o = compute_B_q_o(V_22, V_12)
    
    # Compute the Kronecker product
    kron_BA_o = compute_kron_BA_o(B_q_o, A_q_o)
    
    # Ensure all arrays are 2D
    S = S.reshape(-1, 1)  # Convert S to a 2D column vector
    U = np.atleast_2d(U)
    V = np.atleast_2d(V)
    U_12 = np.atleast_2d(U_12)
    V_12 = np.atleast_2d(V_12)
    U_22 = np.atleast_2d(U_22)
    V_22 = np.atleast_2d(V_22)
    A_q_o = np.atleast_2d(A_q_o)
    B_q_o = np.atleast_2d(B_q_o)
    kron_BA_o = np.atleast_2d(kron_BA_o)

    # Create a Numba-compatible dictionary
    numba_dict = Dict.empty(
        key_type=types.unicode_type,  # Keys are strings
        value_type=types.float64[:, :],  # Values are 2D arrays
    )

    # Add key-value pairs
    numba_dict["D"] = S
    numba_dict["U"] = U
    numba_dict["V"] = V
    numba_dict["U_12"] = U_12
    numba_dict["V_12"] = V_12
    numba_dict["U_22"] = U_22
    numba_dict["V_22"] = V_22
    numba_dict["A_q_o"] = A_q_o
    numba_dict["B_q_o"] = B_q_o
    numba_dict["kron_BA_o"] = kron_BA_o

    return numba_dict


    
@njit
def compute_rk_stat_given_P(P, Sigma_P, P_svd, m, n_size, lambda_c):
    """
    Compute statistical metrics for the "P" or "Q" transform case.
    """
    # Extract SVD components
    A_q_o = P_svd["A_q_o"]
    B_q_o = P_svd["B_q_o"]
    kron_BA_o = P_svd["kron_BA_o"]

    # Compute lambda_q
    lambda_q = A_q_o.T @ P @ B_q_o.T - lambda_c

    # Compute Omega_q
    Omega_q = kron_BA_o @ Sigma_P @ kron_BA_o.T

    # Compute rk_c
    lambda_q_flat = lambda_q.flatten()
    Omega_q_inv = invert_matrix(Omega_q)
    rk_c = n_size * (lambda_q_flat @ Omega_q_inv @ lambda_q_flat) 

    return lambda_q, Omega_q, rk_c

@njit
def compute_rk_information_criteria(rk_c, r, n_size):
    """
    Compute AIC, BIC, and HQ criteria.
    """
    AIC_c = rk_c - 2 * r
    BIC_c = rk_c - np.log(n_size) * r
    HQ_c = rk_c - 2 * np.log(np.log(n_size)) * r
    return AIC_c, BIC_c, HQ_c

@njit
def construct_stat_KP(P, Sigma_P, m, n_size, lambda_c=0):
    """
    Construct statistical metrics for the Kronecker Product and return 
    a Numba-compatible typed dictionary.
    """
    # Perform SVD decomposition
    P_svd = matrix_svd_decomposition(P, m)

    # Compute stats using Numba
    lambda_q, Omega_q, rk_c = compute_rk_stat_given_P(P, Sigma_P, P_svd, m, n_size, lambda_c)

    # Compute the rank (r)
    r = Omega_q.shape[0]

    # Compute AIC, BIC, and HQ using Numba
    AIC_c, BIC_c, HQ_c = compute_rk_information_criteria(rk_c, r, n_size)

    # Create a Numba-compatible dictionary to store results
    result_dict = Dict.empty(
        key_type=types.unicode_type,  # Keys are strings
        value_type=types.float64[:, :],  # Values are 2D arrays
    )

    # Add results to the dictionary
    result_dict["rk_c"] = np.array([[rk_c]])  # Scalars must be converted to 2D arrays
    result_dict["lambda_c"] = lambda_q
    result_dict["Omega_q"] = Omega_q
    result_dict["AIC_c"] = np.array([[AIC_c]])
    result_dict["BIC_c"] = np.array([[BIC_c]])
    result_dict["HQ_c"] = np.array([[HQ_c]])

    return result_dict


@njit
def NonParTestParallel(data_nopar, N, T, M, p, q, nrep, n_grid, BB, r_test):
    # Result array
    result_rk_each = np.zeros((nrep,2))
    for ii in range(nrep):
        # Generate synthetic data (replace with actual logic)
        data_c = data_nopar[ii]  # Example: Replace this with your `generate_data` logic
        # Initialize weights
        # Compute P and Sigma matrices
        data_P_W = calculate_P_matrix(data_c, weights_equal, n_grid=n_grid, n_bins=2)
        
        # Initialize results
        rk = np.zeros(T)
        lambda_c_list = List()
        omega_c = List()
        Sigma_P_list = List()
        P_k_list = List()
        
        # Loop through T periods to compute statistics
        for k in range(T):
            # Extract P_k and Sigma_P_k from the data_P_W object
            P_k = data_P_W["P_k_list"][k]
            Sigma_P_k = data_P_W["Sigma_P_k_list"][k]
            
            # Compute KP statistics for the k-th triplet
            stat_KP = construct_stat_KP(P_k, Sigma_P_k, r_test, N)
            
            # Store results
            rk[k] = stat_KP["rk_c"][0,0]
            lambda_c_list.append(stat_KP["lambda_c"])
            omega_c.append(stat_KP["Omega_q"])
            Sigma_P_list.append(Sigma_P_k)
            P_k_list.append(P_k)
        # Initialize result matrix
        rk_b = np.zeros((BB, T))
        
        # Smoothed Nonparametric Bootstrap
        ru = np.random.exponential(scale=1, size=(BB, N))  # Exponential random variables
        row_sums = ru.sum(axis=1).reshape(-1, 1)  # Reshape to keep dimensions
        ru /= row_sums
        
        for i in range(BB):
            # Calculate bootstrapped P and Sigma_P matrices
            data_P_W_b = calculate_P_matrix(data_c, ru[i, :], n_grid=n_grid, n_bins=2)
            
            for k in range(T):
                P_k = data_P_W_b['P_k_list'][k]
                Sigma_P_k = data_P_W_b['Sigma_P_k_list'][k]
                # Compute KP statistics for the k-th triplet
                rk_b[i, k] = construct_stat_KP(P_k, Sigma_P_k, r_test, N, lambda_c_list[k])['rk_c'][0,0]
        # Compute max and mean values for rk and rk_b
        rk_b_max = max_along_axis_1(rk_b)  # Maximum of rk_b along axis 1
        rk_b_max_95 = compute_quantile(rk_b_max, 0.95)  # 95th quantile of rk_b_max

        
        # Store results
        result_rk_each[ii, 0] = 1 * (rk.max() > rk_b_max_95)
        rk_mean = np.mean(rk)  # Mean of rk (Numba supports this)
        rk_b_mean = mean_along_axis_1(rk_b)  # Mean of rk_b along axis 1
        rk_b_mean_95 = compute_quantile(rk_b_mean, 0.95)  # 95th quantile of rk_b_mean
        result_rk_each[ii, 1] = 1 * (rk_mean > rk_b_mean_95)
    return result_rk_each

In [None]:
# LR test functions
# ----------------------------------------------------------


SINGULAR_EPS = 1e-10  # Criteria for matrix singularity
M_LN_SQRT_2PI = 0.9189385332046727  # log(sqrt(2*pi))

@njit
def process_array_or_none(arr, nt):
    if arr is None:  # Check for None
        return np.zeros((nt,0),dtype=np.float64)  # Default behavior for None
    return arr  # Process the array if it's valid


@njit
def log_likelihood_normal(y, mu, sigma):
    """
    Calculate the log-likelihood of the data under a normal distribution.

    Parameters:
    - y: array-like, the observed data points
    - mu: float, the mean of the normal distribution
    - sigma: float, the standard deviation of the normal distribution

    Returns:
    - log_likelihood: float, the log-likelihood value
    """
    n = len(y)
    term1 = -n / 2 * np.log(2 * np.pi)  # Constant term
    term2 = -n * np.log(sigma)  # Log of the standard deviation
    term3 = -1 / (2 * sigma**2) * np.sum((y - mu) ** 2)  # Data fitting term
    return term1 + term2 + term3

@njit
def log_likelihood_array(y, mu, sigma):
    """
    Calculate the log-likelihood of each element in the data under a normal distribution.

    Parameters:
    - y: array-like, the observed data points
    - mu: float, the mean of the normal distribution
    - sigma: float, the standard deviation of the normal distribution

    Returns:
    - log_likelihoods: array, log-likelihood of each data point
    """
    # Precompute constants
    constant = -0.5 * np.log(2 * np.pi)
    variance = sigma ** 2

    # Initialize result array
    log_likelihoods = np.empty(len(y))

    # Compute log-likelihood for each data point
    for i in range(len(y)):
        log_likelihoods[i] = (
            constant 
            - np.log(sigma) 
            - ((y[i] - mu) ** 2) / (2 * variance)
        )
    
    return log_likelihoods


@njit
def compute_residual_normal_reg(m, n, t, sigma_jn, ytilde, mubeta_jn):
    """
    Compute residuals for the EM optimization loop in a Numba-compatible way.

    Parameters:
    - m: Number of components (int)
    - n: Number of groups (int)
    - t: Number of time points per group (int)
    - sigma_jn: Array of current sigma values (1D array of floats, shape (m,))
    - ytilde: Adjusted response variable (1D array of floats, shape (n * t,))
    - mubeta_jn: Array of current beta means (1D array of floats, shape (m,))

    Returns:
    - r: Residuals array (2D array of floats, shape (m, n))
    """
    # Initialize the residuals array
    r = np.zeros((m, n), dtype=np.float64)

    # Loop over each component (m)
    for j in range(m):
        
        # Loop over each group (n)
        for i in range(n):
            sum_r_t = 0.0

            # Loop over each time point within the group (t)
            for k in range(t):
                idx = i * t + k  # Compute the flattened index
                diff = ytilde[idx] - mubeta_jn[j]
                r_t = (1.0 / sigma_jn[j]) * diff
                sum_r_t += 0.5 * (r_t**2)

            # Compute residual for group i and component j
            r[j, i] = t * np.log(sigma_jn[j]) + sum_r_t
    return r


@njit
def EM_optimization(y, x, z, p, q, sigma_0, alpha_draw, mubeta_draw, sigma_draw, gamma_draw, m, t, an, maxit=2000, tol=1e-8, tau = 0.5, epsilon=0.05):
    
    nt = len(y)
    n = nt // t

    ninits = alpha_draw.shape[1]
    # Handle x
    if q == 0:
        x1 = np.ones((nt, 1))
        q1 = 1
    else:
        x1 = np.zeros((nt, x.shape[1] + 1))
        x1[:, 0] = 1  # Add intercept
        x1[:, 1:] = x
        q1 = x1.shape[1]
    
    # Initialize variables
    lb = np.zeros(m)
    ub = np.zeros(m)
    l_j = np.zeros(m)
    w = np.zeros((m, nt))
    post = np.zeros((m * n, ninits))
    notcg = np.zeros(ninits)
    penloglikset = np.zeros(ninits)
    loglikset = np.zeros(ninits)
    
    
    for jn in range(ninits):
        alpha_jn = alpha_draw[:, jn]
        mubeta_jn = mubeta_draw[:, jn]
        sigma_jn = sigma_draw[:, jn]
        gamma_jn = gamma_draw[:, jn]  # Likely float64
    
        oldpenloglik = -np.inf
        emit = 0
        diff = 1.0
        sing = 0
        
        for iter_ii in range(maxit):
            ll = -nt * M_LN_SQRT_2PI
            
            if p > 0:
                ytilde = y - np.dot(z, gamma_jn)
            else:
                ytilde = y
            
            r = compute_residual_normal_reg(m, n, t, sigma_jn, ytilde, mubeta_jn)
            
            minr = min_along_axis_0(r)
            
            # Initialize arrays
            l_j = np.zeros((m,n))  # Same shape as `r`
            sum_l_j = np.zeros(n)   # Sum along axis 0
            w = np.zeros((m,n))    # Weights
            ll = 0.0                # Log-likelihood accumulator

            # Compute l_j = alpha_jn[:, None] * exp(minr - r)
            for i in range(n):
                for j in range(m):
                    l_j[j, i] = alpha_jn[j] * np.exp(minr[i] - r[j,i])
            
            # Compute sum_l_j = np.sum(l_j, axis=0)
            for j in range(m):
                for i in range(n):
                    sum_l_j[i] += l_j[j, i]
            
            # Compute w = l_j / sum_l_j
            for i in range(n):
                for j in range(m):
                    w[j, i] = l_j[j, i] / sum_l_j[i]
            
            # Compute ll += np.sum(np.log(sum_l_j) - minr)
            for i in range(n):
                ll += np.log(sum_l_j[i]) - minr[i]
            
            penloglik = ll + np.log(2.0) + min(np.log(tau), np.log(1 - tau))
            
            for j in range(m):
                s0j = sigma_0[j] / sigma_jn[j]
                penloglik += -an * (s0j**2 - 2.0 * np.log(s0j) - 1.0)
                penloglik += min(np.log(alpha_jn[j]), np.log(1 - alpha_jn[j]))
            diff = penloglik - oldpenloglik
            oldpenloglik = penloglik
            emit += 1
            
            # Update parameters
            mubeta_jn_mat = np.zeros((m,q1),dtype=np.float64)
            wtilde = np.zeros(nt)
            for j in range(m):
                alpha_jn[j] = np.mean(w[j, :])
                wtilde = w[j, :].T
                w_j = np.zeros(nt)
                for i in range(n):
                    w_j[i * t : (i + 1) * t] = wtilde[i]
                xtilde = np.zeros((nt, q1))
                for ii in range(q1):
                    xtilde[:, ii] = w_j * x1[:, ii]
                # design_matrix = xtilde.T @ x1
                # solve_linear_system_safe(xtilde.T @ x1, xtilde.T @ ytilde)
                # xtilde.T @ ytilde
                mubeta_jn_mat[j,:] = solve_linear_system_safe(xtilde.T @ x1, xtilde.T @ ytilde)
                ssr_j = np.sum(w_j * (ytilde - x1 @ mubeta_jn_mat[j,:])**2)
                sigma_jn[j] = np.sqrt((ssr_j + 2.0 * an * sigma_0[j]**2) / (np.sum(w_j) + 2.0 * an))
                sigma_jn[j] = max(sigma_jn[j], epsilon * sigma_0[j])
            
            # update alpha
            total_alpha = np.sum(alpha_jn)
            for j in range(m):
                alpha_jn[j] = max(0.01, alpha_jn[j] / total_alpha)
            
            # update gamma
            if p > 0:
                ztilde = np.zeros((nt, p), dtype=np.float64) 
                zz = np.zeros((p, p), dtype=np.float64) 
                ze = np.zeros((p, 1), dtype=np.float64) 
                for j in range(m):
                    wtilde = w[j, :]
                    w_j = np.zeros(nt)
                    for i in range(n):
                        w_j[i * t : (i + 1) * t] = wtilde[i]
                    for ii in range(p):
                        ztilde[:, ii] = w_j * z[:, ii]
                    zz += ztilde.T @ z / (sigma_jn[j]**2)
                    ze += ztilde.T @( y - x1 @ mubeta_jn_mat[j,:]) / (sigma_jn[j]**2)
                gamma_jn = solve_linear_system_safe(zz,ze).flatten()
            
        penloglikset[jn] = penloglik
        loglikset[jn] = ll
        post[:, jn] = w.T.flatten()
        alpha_draw[:, jn] = alpha_jn
        mubeta_draw[:, jn] = mubeta_jn_mat.T.flatten()
        sigma_draw[:, jn] = sigma_jn
        if p > 0:
            gamma_draw[:, jn] = gamma_jn
    return(alpha_draw,mubeta_draw,sigma_draw,gamma_draw,penloglikset, loglikset ,post)

In [None]:
@njit
def regpanelmixPMLE(y,x,z, p, q, m, ninits=10, epsilon=1e-8, maxit=2000, epsilon_short=1e-2, maxit_short=500): 
    
    t,n = y.shape
    nt = n * t
    y = y.T.flatten()
    
    # y.reshape((n,t)).T - data_lr[0][0] # check equivalence
    # Handle x
    
    x1 = np.hstack((np.ones((nt, 1)), x))
    q1 = q + 1
    

    xz = np.hstack((x1, z))
    
    out_coef = solve_least_squares(xz, y)  # Replace np.linalg.lstsq
    residuals = y - xz @ out_coef
    stdR = np.std(residuals)
    npar = m - 1 + (q1 + 1) * m + p
    ninits_short = ninits * 10 * (q1 + p) * m
    
    if (m == 1) :
        mubeta = out_coef[:q1]
        if p > 0:
            gamma = out_coef[q1:(q1 + p)]
        else:
            gamma = np.array([0.0])
        res = y - xz @ out_coef
        sigma = np.sqrt(np.mean(res**2))
        loglik = log_likelihood_normal(res,0,sigma)

        aic = -2 * loglik + 2 * npar
        bic = -2 * loglik + np.log(n) * npar
        penloglik = loglik
        alpha = np.array([1])
        postprobs = np.ones(n)
    else: 
        # First draw random start point
        if p > 0:
            gamma = out_coef[q1:(q1 + p)]
            # Perform least squares regression with both x and z
            gamma_draw = generate_random_uniform(0.5, 1.5, (p, ninits_short)) * gamma
            mubeta_hat = out_coef[:q1]
            y = y - z @ gamma
        else:
            # Perform least squares regression with x only
            
            gamma = np.array([0.0])
            mubeta_hat = out_coef
            gamma_draw = np.zeros((1,ninits_short), dtype=np.float64)

        # Initialize alpha
        alpha_draw = generate_random_uniform(0, 1, (m, ninits_short))
        alpha_draw = (alpha_draw / np.sum(alpha_draw, axis=0))

        # Initialize mubeta
        if q > 0:
            minMU = np.min(y - x @ mubeta_hat[1:])
            maxMU = np.max(y - x @ mubeta_hat[1:])
            mubeta_draw = np.zeros((q1 * m, ninits_short))
            for j in range(m):
                mubeta_draw[q1 * j, :] = np.random.uniform(minMU, maxMU, size=ninits_short)
                for i in range(1, q1):
                    mubeta_draw[q1 * j + i, :] = mubeta_hat[i] * np.random.uniform(-2, 2, size=ninits_short)
        else:
            minMU = np.min(y)
            maxMU = np.max(y)
            mubeta_draw = np.zeros((q1 * m, ninits_short))
            for j in range(m):
                mubeta_draw[q1 * j, :] = np.random.uniform(minMU, maxMU, size=ninits_short)
        
        an = 1 / n    
        sigma_0 = np.full(m, stdR)
    
        # Initialize sigma
        sigma_draw = generate_random_uniform(0.01, 1, (m, ninits_short)) * stdR
        
        alpha_draw,mubeta_draw,sigma_draw,gamma_draw,penloglikset, loglikset, post = EM_optimization(y, x, z, p, q, sigma_0, alpha_draw, mubeta_draw, sigma_draw, gamma_draw, m, t, an, maxit=maxit_short, tol=epsilon_short)
        
        # 
        components = np.argsort(penloglikset)[::-1][:ninits]
        alpha_draw = alpha_draw[:,components]
        mubeta_draw = mubeta_draw[:,components]
        sigma_draw = sigma_draw[:,components]
        gamma_draw = gamma_draw[:,components]
        
        
        alpha_draw,mubeta_draw,sigma_draw,gamma_draw,penloglikset, loglikset, post = EM_optimization(y, x, z, p, q, sigma_0, alpha_draw, mubeta_draw, sigma_draw, gamma_draw, m, t, an, maxit=maxit, tol=epsilon)
        
        index = np.argmax(penloglikset)
        alpha_hat = alpha_draw[:,index]
        mubeta_hat = mubeta_draw[:,index]
        sigma_hat = sigma_draw[:,index]
        gamma_hat = gamma_draw[:,index]
        post = post[:, index]
        penloglik = penloglikset[index]
        loglik = loglikset[index]
        aic = -2 * loglik + 2 * npar
        bic = -2 * loglik + np.log(n) * npar
        
        return(penloglik)
    
        # return(penloglik, loglik, aic, bic, alpha_hat, mubeta_hat, sigma_hat, gamma_hat, post)

In [None]:
@njit(parallel=True)
def compute_lr_stat(y_lr, x_lr, z_lr, m, p, q, nrep):
    
    # Preallocate lr_stat as a 1D array (Numba-compatible)
    lr_stat = np.zeros(nrep, dtype=np.float64)

    for ii in prange(nrep):
        # Extract y, x, z from data_lr (passed as separate arrays in Numba)
        y = y_lr[ii]  # y for replication ii
        x = x_lr[ii]  # x for replication ii
        z = z_lr[ii]  # z for replication ii

        # Call regpanelmixPMLE for m components
        penloglik = regpanelmixPMLE(y,x,z, p, q, m, ninits=1)
        
        # Call regpanelmixPMLE for m+1 components
        penloglik_m1 = regpanelmixPMLE(y, x, z, p, q, m + 1, ninits=1)

        # Compute likelihood ratio statistic
        lr_stat[ii] = -2 * (penloglik_m1 - penloglik)
    return lr_stat

In [None]:
# Simulation
# ------------------------------------------
   
import time
# Input Parameters
Nset = [200, 400]
Tset = [3, 5, 8]
alphaset = [np.array([0.5, 0.5]), np.array([0.2, 0.8])]
muset = [np.array([-1.0, 1.0]), np.array([-0.5, 0.5])]
sigmaset = [np.array([0.8, 1.2])]
beta = np.array([[0.0],[0.0]])
gamma = np.array([0.0])

# Test panel mixture
N = 200
T = 3
M = 2
p = 0
q = 0
nrep = 100
BB = 199

alpha = alphaset[0]
mu = muset[0]
sigma = sigmaset[0]
n_grid=3
r_test=2
# Generate data
weights_equal = np.full(N, 1 / N)

In [None]:
 # Determine the total number of parameter combinations
total_combinations = len(alphaset) * len(muset)

# Initialize the result matrix to store mean results for each parameter combination
simulation_result_matrix = np.zeros((total_combinations, 2))

# Optional: Track parameter combinations (for debugging or analysis)
parameter_combinations = []
count = 0
# Loop over parameters
for alpha in alphaset:
    for mu in muset:       
        start_time = time.time()
        result_rk_each = np.zeros((nrep,2))
        Data = [generate_data(alpha, mu, sigma, gamma, beta, N, T, M, p, q) for _ in range(nrep)]
        data_nopar = [data[0] for data in Data]
        # Nonparametric test
        result_rk_each = NonParTestParallel(data_nopar, N, T, M, p, q, nrep, n_grid, BB, r_test)
         # Print execution time for this parameter combination
        print(f"Execution time for alpha={alpha}, mu={mu}: {time.time() - start_time:.2f} seconds")

        # Increment the counter
        count += 1

# Print the final result matrix
print("Simulation Result Matrix:")
print(simulation_result_matrix)

Execution time for alpha=[0.5 0.5], mu=[-1.  1.]: 16.28 seconds
Execution time for alpha=[0.5 0.5], mu=[-0.5  0.5]: 18.04 seconds
Execution time for alpha=[0.2 0.8], mu=[-1.  1.]: 17.28 seconds
Execution time for alpha=[0.2 0.8], mu=[-0.5  0.5]: 15.65 seconds
Simulation Result Matrix:
[[0. 0.]
 [0. 0.]
 [0. 0.]
 [0. 0.]]


In [None]:
 # Determine the total number of parameter combinations
total_combinations = len(alphaset) * len(muset)

# Initialize the result matrix to store mean results for each parameter combination
simulation_result_matrix = np.zeros((total_combinations, 2))

# Optional: Track parameter combinations (for debugging or analysis)
parameter_combinations = []
count = 0
# Loop over parameters
for alpha in alphaset:
    for mu in muset:       
        start_time = time.time()
        result_rk_each = np.zeros((nrep,2))
        Data = [generate_data(alpha, mu, sigma, gamma, beta, N, T, M, p, q) for _ in range(nrep)]
        data_nopar = [data[0] for data in Data]
        # Nonparametric test
        result_rk_each = NonParTestParallel(data_nopar, N, T, M, p, q, nrep, n_grid, BB, r_test)
        
        # Compute the mean results across replications and store them
        simulation_result_matrix[count, :] = result_rk_each.mean(axis=0)
        
         # Print execution time for this parameter combination
        print(f"Execution time for alpha={alpha}, mu={mu}: {time.time() - start_time:.2f} seconds")

        # Increment the counter
        count += 1

# Print the final result matrix
print("Simulation Result Matrix:")
print(simulation_result_matrix)

Execution time for alpha=[0.5 0.5], mu=[-1.  1.]: 16.52 seconds
Execution time for alpha=[0.5 0.5], mu=[-0.5  0.5]: 15.64 seconds
Execution time for alpha=[0.2 0.8], mu=[-1.  1.]: 15.91 seconds
Execution time for alpha=[0.2 0.8], mu=[-0.5  0.5]: 17.21 seconds
Simulation Result Matrix:
[[0.03 0.05]
 [0.   0.  ]
 [0.07 0.08]
 [0.   0.  ]]


In [None]:
y = data[0]
x = data[1]
z = data[2]

NameError: name 'data' is not defined

In [None]:
out_h0 = regpanelmixPMLE(y,x,z, p, q, 2)

NameError: name 'y' is not defined

In [None]:
data = Data[0]
y = data[0]
x = data[1]
z = data[2]

In [None]:
out_h0 = regpanelmixPMLE(y,x,z, p, q, 2)

In [None]:
out_h0

-427.34312266440514

In [None]:
@njit
def regpanelmixPMLE(y,x,z, p, q, m, ninits=10, epsilon=1e-8, maxit=2000, epsilon_short=1e-2, maxit_short=500): 
    
    t,n = y.shape
    nt = n * t
    y = y.T.flatten()
    
    # y.reshape((n,t)).T - data_lr[0][0] # check equivalence
    # Handle x
    
    x1 = np.hstack((np.ones((nt, 1)), x))
    q1 = q + 1
    

    xz = np.hstack((x1, z))
    
    out_coef = solve_least_squares(xz, y)  # Replace np.linalg.lstsq
    residuals = y - xz @ out_coef
    stdR = np.std(residuals)
    npar = m - 1 + (q1 + 1) * m + p
    ninits_short = ninits * 10 * (q1 + p) * m
    
    if (m == 1) :
        mubeta = out_coef[:q1]
        if p > 0:
            gamma = out_coef[q1:(q1 + p)]
        else:
            gamma = np.array([0.0])
        res = y - xz @ out_coef
        sigma = np.sqrt(np.mean(res**2))
        loglik = log_likelihood_normal(res,0,sigma)

        aic = -2 * loglik + 2 * npar
        bic = -2 * loglik + np.log(n) * npar
        penloglik = loglik
        alpha = np.array([1])
        postprobs = np.ones(n)
    else: 
        # First draw random start point
        if p > 0:
            gamma = out_coef[q1:(q1 + p)]
            # Perform least squares regression with both x and z
            gamma_draw = generate_random_uniform(0.5, 1.5, (p, ninits_short)) * gamma
            mubeta_hat = out_coef[:q1]
            y = y - z @ gamma
        else:
            # Perform least squares regression with x only
            
            gamma = np.array([0.0])
            mubeta_hat = out_coef
            gamma_draw = np.zeros((1,ninits_short), dtype=np.float64)

        # Initialize alpha
        alpha_draw = generate_random_uniform(0, 1, (m, ninits_short))
        alpha_draw = (alpha_draw / np.sum(alpha_draw, axis=0))

        # Initialize mubeta
        if q > 0:
            minMU = np.min(y - x @ mubeta_hat[1:])
            maxMU = np.max(y - x @ mubeta_hat[1:])
            mubeta_draw = np.zeros((q1 * m, ninits_short))
            for j in range(m):
                mubeta_draw[q1 * j, :] = np.random.uniform(minMU, maxMU, size=ninits_short)
                for i in range(1, q1):
                    mubeta_draw[q1 * j + i, :] = mubeta_hat[i] * np.random.uniform(-2, 2, size=ninits_short)
        else:
            minMU = np.min(y)
            maxMU = np.max(y)
            mubeta_draw = np.zeros((q1 * m, ninits_short))
            for j in range(m):
                mubeta_draw[q1 * j, :] = np.random.uniform(minMU, maxMU, size=ninits_short)
        
        an = 1 / n    
        sigma_0 = np.full(m, stdR)
    
        # Initialize sigma
        sigma_draw = generate_random_uniform(0.01, 1, (m, ninits_short)) * stdR
        
        alpha_draw,mubeta_draw,sigma_draw,gamma_draw,penloglikset, loglikset, post = EM_optimization(y, x, z, p, q, sigma_0, alpha_draw, mubeta_draw, sigma_draw, gamma_draw, m, t, an, maxit=maxit_short, tol=epsilon_short)
        
        # 
        components = np.argsort(penloglikset)[::-1][:ninits]
        alpha_draw = alpha_draw[:,components]
        mubeta_draw = mubeta_draw[:,components]
        sigma_draw = sigma_draw[:,components]
        gamma_draw = gamma_draw[:,components]
        
        
        alpha_draw,mubeta_draw,sigma_draw,gamma_draw,penloglikset, loglikset, post = EM_optimization(y, x, z, p, q, sigma_0, alpha_draw, mubeta_draw, sigma_draw, gamma_draw, m, t, an, maxit=maxit, tol=epsilon)
        
        index = np.argmax(penloglikset)
        alpha_hat = alpha_draw[:,index]
        mubeta_hat = mubeta_draw[:,index]
        sigma_hat = sigma_draw[:,index]
        gamma_hat = gamma_draw[:,index]
        post = post[:, index]
        penloglik = penloglikset[index]
        loglik = loglikset[index]
        aic = -2 * loglik + 2 * npar
        bic = -2 * loglik + np.log(n) * npar
        
        return(penloglik, loglik, aic, bic, alpha_hat, mubeta_hat, sigma_hat, gamma_hat, post)

In [None]:
out_h0 = regpanelmixPMLE(y,x,z, p, q, 2)

In [None]:
out_h0

(-429.142613002301,
 -426.5807904715436,
 863.1615809430872,
 879.6531677758273,
 array([0.72209759, 0.27790241]),
 array([ 0.65113783, -0.26472958]),
 array([1.25154764, 0.9550746 ]),
 array([0.]),
 array([1.72480025e-01, 8.27519975e-01, 4.33508100e-01, 5.66491900e-01,
        9.41921351e-01, 5.80786493e-02, 9.68094801e-01, 3.19051993e-02,
        3.15221563e-01, 6.84778437e-01, 9.96611238e-01, 3.38876190e-03,
        6.15092753e-01, 3.84907247e-01, 2.57956348e-01, 7.42043652e-01,
        4.12076749e-01, 5.87923251e-01, 9.99198863e-01, 8.01136969e-04,
        3.73624339e-01, 6.26375661e-01, 6.30789234e-01, 3.69210766e-01,
        9.77834402e-01, 2.21655978e-02, 9.99219948e-01, 7.80052154e-04,
        5.55615680e-01, 4.44384320e-01, 9.34658684e-01, 6.53413162e-02,
        2.76412764e-01, 7.23587236e-01, 9.59082159e-01, 4.09178415e-02,
        7.51423281e-01, 2.48576719e-01, 7.26572260e-01, 2.73427740e-01,
        9.88373877e-01, 1.16261231e-02, 8.76833297e-01, 1.23166703e-01,
        9

In [None]:
alpha_hat  = out_h0[4]
mubeta_hat = out_h0[5]
sigma_hat  = out_h0[6]
gamma_hat  = out_h0[7]

In [None]:
gamma_hat

array([0.])

In [None]:
sigma_hat

array([1.25154764, 0.9550746 ])

In [None]:
mubeta_hat

array([ 0.65113783, -0.26472958])

In [None]:
alpha_hat

array([0.72209759, 0.27790241])

In [None]:
@njit(parallel=True)
def compute_lr_BB(alpha_hat, mu_hat, sigma_hat, gamma_hat, beta_hat, N, T,  m, p, q, BB):
    
    Data = [generate_data(alpha_hat, mu_hat, sigma_hat, gamma_hat, beta_hat, N, T, m, p, q) for _ in range(BB)]
    
    # Preallocate lr_stat as a 1D array (Numba-compatible)
    lr_stat = np.zeros(BB, dtype=np.float64)

    for ii in prange(BB):
        # Extract y, x, z from data_lr (passed as separate arrays in Numba)
        y = y_lr[ii]  # y for replication ii
        x = x_lr[ii]  # x for replication ii
        z = z_lr[ii]  # z for replication ii

        # Call regpanelmixPMLE for m components
        penloglik = regpanelmixPMLE(y,x,z, p, q, m, ninits=1)
        
        # Call regpanelmixPMLE for m+1 components
        penloglik_m1 = regpanelmixPMLE(y, x, z, p, q, m + 1, ninits=1)

        # Compute likelihood ratio statistic
        lr_stat[ii] = -2 * (penloglik_m1[0] - penloglik[0])
    return lr_stat

In [None]:
compute_lr_BB(alpha, mu, sigma, gamma, beta, N, T,  m, p, q, 10)

TypingError: Failed in nopython mode pipeline (step: native parfor lowering)
[1mFailed in full_parfor_gufunc mode pipeline (step: ensure features that are in use are in a valid form)
[1mThe use of a reflected list(array(float64, 2d, C))<iv=None> type, assigned to variable 'y_lr' in globals, is not supported as globals are considered compile-time constants and there is no known way to compile a reflected list(array(float64, 2d, C))<iv=None> type as a constant.
[1m
File "<ipython-input-117-8b76c1cc9a65>", line 13:[0m
[1mdef compute_lr_BB(alpha_hat, mu_hat, sigma_hat, gamma_hat, beta_hat, N, T,  m, p, q, BB):
    <source elided>
        # Extract y, x, z from data_lr (passed as separate arrays in Numba)
[1m        y = y_lr[ii]  # y for replication ii
[0m        [1m^[0m[0m
[0m
[0m[1mDuring: lowering "id=95[LoopNest(index_variable = parfor_index.3928, range = (0, BB, 1))]{188: <ir.Block at <ipython-input-117-8b76c1cc9a65> (11)>}Var(parfor_index.3928, <ipython-input-117-8b76c1cc9a65>:11)" at <ipython-input-117-8b76c1cc9a65> (11)[0m

In [None]:
Data = [generate_data(alpha_hat, mu_hat, sigma_hat, gamma_hat, beta_hat, N, T, m, p, q) for _ in range(BB)]

NameError: name 'mu_hat' is not defined

In [None]:
@njit(parallel=True)
def compute_lr_BB(alpha_hat, mu_hat, sigma_hat, gamma_hat, beta_hat, N, T,  m, p, q, BB):
    
    Data = [generate_data(alpha_hat, mu_hat, sigma_hat, gamma_hat, beta_hat, N, T, m, p, q) for _ in range(BB)]
    
    # Preallocate lr_stat as a 1D array (Numba-compatible)
    lr_stat = np.zeros(BB, dtype=np.float64)

    for ii in prange(BB):
        data = Data[ii]
        y = data[0]
        x = data[1]
        z = data[2]
        
        # Call regpanelmixPMLE for m components
        penloglik = regpanelmixPMLE(y,x,z, p, q, m, ninits=1)
        
        # Call regpanelmixPMLE for m+1 components
        penloglik_m1 = regpanelmixPMLE(y, x, z, p, q, m + 1, ninits=1)

        # Compute likelihood ratio statistic
        lr_stat[ii] = -2 * (penloglik_m1[0] - penloglik[0])
    return lr_stat

In [None]:
compute_lr_BB(alpha, mu, sigma, gamma, beta, N, T,  m, p, q, 10)

AttributeError: 'NoneType' object has no attribute 'args'

In [None]:
BB = 10

In [None]:
penloglik = regpanelmixPMLE(y,x,z, p, q, m, ninits=1)

In [None]:
Data = [generate_data(alpha, mu, sigma, gamma, beta, N, T, M, p, q) for _ in range(nrep)]

In [None]:
data = Data[0]

In [None]:
y = data[0]
x = data[1]
z = data[2]

In [None]:
y

array([[-1.15641116e+00,  1.55332160e-01,  1.07325920e+00,
         9.73436515e-01, -5.72416150e-01,  1.93812215e+00,
        -9.22175448e-01, -1.10968215e+00,  1.62301422e+00,
        -2.19768106e-01, -7.62987934e-01,  7.43256771e-01,
        -7.14584648e-01,  9.81750701e-01,  5.28894501e-01,
        -7.02082053e-01,  2.11839456e+00, -7.97685763e-01,
         2.06661645e+00,  1.23859976e+00,  6.30054264e-01,
         7.64205111e-01,  9.53553992e-01, -4.21535519e-01,
         5.97582123e-01, -2.07683741e-01, -1.30560339e+00,
         2.20017367e+00, -2.25908852e+00,  1.06781069e+00,
        -1.10849091e+00,  6.36001530e-01, -2.01822553e-02,
         1.19780104e+00, -9.61079964e-01,  2.85600511e-01,
         2.06829511e-01, -7.81212704e-01,  3.14646199e+00,
        -4.23761596e-01,  9.31277020e-01,  6.24245627e-01,
         2.50801561e-01,  6.70558350e-01, -1.25263109e+00,
        -2.86622034e-01,  4.83947273e-01,  1.19667324e+00,
         1.86933149e+00,  1.75304571e-01,  5.73722960e-0

In [None]:
x

array([[0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],

In [None]:
z

array([[0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],

In [None]:
@njit
def regpanelmixPMLE(y,x,z, p, q, m, ninits=10, epsilon=1e-8, maxit=2000, epsilon_short=1e-2, maxit_short=500)  : 
    
    t,n = y.shape
    nt = n * t
    y = y.T.flatten()
    
    # y.reshape((n,t)).T - data_lr[0][0] # check equivalence
    # Handle x
    
    x1 = np.hstack((np.ones((nt, 1)), x))
    q1 = q + 1
    

    xz = np.hstack((x1, z))
    
    out_coef = solve_least_squares(xz, y)  # Replace np.linalg.lstsq
    residuals = y - xz @ out_coef
    stdR = np.std(residuals)
    npar = m - 1 + (q1 + 1) * m + p
    ninits_short = ninits * 10 * (q1 + p) * m
    
    if (m == 1) :
        mubeta = out_coef[:q1]
        if p > 0:
            gamma = out_coef[q1:(q1 + p)]
        else:
            gamma = np.array([0.0])
        res = y - xz @ out_coef
        sigma = np.sqrt(np.mean(res**2))
        loglik = log_likelihood_normal(res,0,sigma)

        aic = -2 * loglik + 2 * npar
        bic = -2 * loglik + np.log(n) * npar
        penloglik = loglik
        alpha = np.array([1])
        postprobs = np.ones(n)
    else: 
        # First draw random start point
        if p > 0:
            gamma = out_coef[q1:(q1 + p)]
            # Perform least squares regression with both x and z
            gamma_draw = generate_random_uniform(0.5, 1.5, (p, ninits_short)) * gamma
            mubeta_hat = out_coef[:q1]
            y = y - z @ gamma
        else:
            # Perform least squares regression with x only
            
            gamma = np.array([0.0])
            mubeta_hat = out_coef
            gamma_draw = np.zeros((1,ninits_short), dtype=np.float64)

        # Initialize alpha
        alpha_draw = generate_random_uniform(0, 1, (m, ninits_short))
        alpha_draw = (alpha_draw / np.sum(alpha_draw, axis=0))

        # Initialize mubeta
        if q > 0:
            minMU = np.min(y - x @ mubeta_hat[1:])
            maxMU = np.max(y - x @ mubeta_hat[1:])
            mubeta_draw = np.zeros((q1 * m, ninits_short))
            for j in range(m):
                mubeta_draw[q1 * j, :] = np.random.uniform(minMU, maxMU, size=ninits_short)
                for i in range(1, q1):
                    mubeta_draw[q1 * j + i, :] = mubeta_hat[i] * np.random.uniform(-2, 2, size=ninits_short)
        else:
            minMU = np.min(y)
            maxMU = np.max(y)
            mubeta_draw = np.zeros((q1 * m, ninits_short))
            for j in range(m):
                mubeta_draw[q1 * j, :] = np.random.uniform(minMU, maxMU, size=ninits_short)
        
        an = 1 / n    
        sigma_0 = np.full(m, stdR)
    
        # Initialize sigma
        sigma_draw = generate_random_uniform(0.01, 1, (m, ninits_short)) * stdR
        
        alpha_draw,mubeta_draw,sigma_draw,gamma_draw,penloglikset, loglikset, post = EM_optimization(y, x, z, p, q, sigma_0, alpha_draw, mubeta_draw, sigma_draw, gamma_draw, m, t, an, maxit=maxit_short, tol=epsilon_short)
        
        # 
        components = np.argsort(penloglikset)[::-1][:ninits]
        alpha_draw = alpha_draw[:,components]
        mubeta_draw = mubeta_draw[:,components]
        sigma_draw = sigma_draw[:,components]
        gamma_draw = gamma_draw[:,components]
        
        
        alpha_draw,mubeta_draw,sigma_draw,gamma_draw,penloglikset, loglikset, post = EM_optimization(y, x, z, p, q, sigma_0, alpha_draw, mubeta_draw, sigma_draw, gamma_draw, m, t, an, maxit=maxit, tol=epsilon)
        
        index = np.argmax(penloglikset)
        alpha_hat = alpha_draw[:,index]
        mubeta_hat = mubeta_draw[:,index]
        sigma_hat = sigma_draw[:,index]
        gamma_hat = gamma_draw[:,index]
        post = post[:, index]
        penloglik = penloglikset[index]
        loglik = loglikset[index]
        aic = -2 * loglik + 2 * npar
        bic = -2 * loglik + np.log(n) * npar
        
        return(penloglik, loglik, aic, bic, alpha_hat, mubeta_hat, sigma_hat, gamma_hat, post)

In [None]:
@njit
def regpanelmixPMLE(y,x,z, p, q, m, ninits=10, epsilon=1e-8, maxit=2000, epsilon_short=1e-2, maxit_short=500)  : 
    
    t,n = y.shape
    nt = n * t
    y = y.T.flatten()
    
    # y.reshape((n,t)).T - data_lr[0][0] # check equivalence
    # Handle x
    
    x1 = np.hstack((np.ones((nt, 1)), x))
    q1 = q + 1
    

    xz = np.hstack((x1, z))
    
    out_coef = solve_least_squares(xz, y)  # Replace np.linalg.lstsq
    residuals = y - xz @ out_coef
    stdR = np.std(residuals)
    npar = m - 1 + (q1 + 1) * m + p
    ninits_short = ninits * 10 * (q1 + p) * m
    
    if (m == 1) :
        mubeta = out_coef[:q1]
        if p > 0:
            gamma = out_coef[q1:(q1 + p)]
        else:
            gamma = np.array([0.0])
        res = y - xz @ out_coef
        sigma = np.sqrt(np.mean(res**2))
        loglik = log_likelihood_normal(res,0,sigma)

        aic = -2 * loglik + 2 * npar
        bic = -2 * loglik + np.log(n) * npar
        penloglik = loglik
        alpha = np.array([1])
        postprobs = np.ones(n)
    else: 
        # First draw random start point
        if p > 0:
            gamma = out_coef[q1:(q1 + p)]
            # Perform least squares regression with both x and z
            gamma_draw = generate_random_uniform(0.5, 1.5, (p, ninits_short)) * gamma
            mubeta_hat = out_coef[:q1]
            y = y - z @ gamma
        else:
            # Perform least squares regression with x only
            
            gamma = np.array([0.0])
            mubeta_hat = out_coef
            gamma_draw = np.zeros((1,ninits_short), dtype=np.float64)

        # Initialize alpha
        alpha_draw = generate_random_uniform(0, 1, (m, ninits_short))
        alpha_draw = (alpha_draw / np.sum(alpha_draw, axis=0))

        # Initialize mubeta
        if q > 0:
            minMU = np.min(y - x @ mubeta_hat[1:])
            maxMU = np.max(y - x @ mubeta_hat[1:])
            mubeta_draw = np.zeros((q1 * m, ninits_short))
            for j in range(m):
                mubeta_draw[q1 * j, :] = np.random.uniform(minMU, maxMU, size=ninits_short)
                for i in range(1, q1):
                    mubeta_draw[q1 * j + i, :] = mubeta_hat[i] * np.random.uniform(-2, 2, size=ninits_short)
        else:
            minMU = np.min(y)
            maxMU = np.max(y)
            mubeta_draw = np.zeros((q1 * m, ninits_short))
            for j in range(m):
                mubeta_draw[q1 * j, :] = np.random.uniform(minMU, maxMU, size=ninits_short)
        
        an = 1 / n    
        sigma_0 = np.full(m, stdR)
    
        # Initialize sigma
        sigma_draw = generate_random_uniform(0.01, 1, (m, ninits_short)) * stdR
        
        alpha_draw,mubeta_draw,sigma_draw,gamma_draw,penloglikset, loglikset, post = EM_optimization(y, x, z, p, q, sigma_0, alpha_draw, mubeta_draw, sigma_draw, gamma_draw, m, t, an, maxit=maxit_short, tol=epsilon_short)
        
        # 
        components = np.argsort(penloglikset)[::-1][:ninits]
        alpha_draw = alpha_draw[:,components]
        mubeta_draw = mubeta_draw[:,components]
        sigma_draw = sigma_draw[:,components]
        gamma_draw = gamma_draw[:,components]
        
        
        alpha_draw,mubeta_draw,sigma_draw,gamma_draw,penloglikset, loglikset, post = EM_optimization(y, x, z, p, q, sigma_0, alpha_draw, mubeta_draw, sigma_draw, gamma_draw, m, t, an, maxit=maxit, tol=epsilon)
        
        index = np.argmax(penloglikset)
        alpha_hat = alpha_draw[:,index]
        mubeta_hat = mubeta_draw[:,index]
        sigma_hat = sigma_draw[:,index]
        gamma_hat = gamma_draw[:,index]
        post = post[:, index]
        penloglik = penloglikset[index]
        loglik = loglikset[index]
        aic = -2 * loglik + 2 * npar
        bic = -2 * loglik + np.log(n) * npar
        
        return(penloglik)
        # return(penloglik, loglik, aic, bic, alpha_hat, mubeta_hat, sigma_hat, gamma_hat, post)

In [None]:
@njit(parallel=True)
def compute_lr_BB(alpha_hat, mu_hat, sigma_hat, gamma_hat, beta_hat, N, T,  m, p, q, BB):
    
    Data = [generate_data(alpha_hat, mu_hat, sigma_hat, gamma_hat, beta_hat, N, T, m, p, q) for _ in range(BB)]
    
    # Preallocate lr_stat as a 1D array (Numba-compatible)
    lr_stat = np.zeros(BB, dtype=np.float64)

    for ii in prange(BB):
        data = Data[ii]
        y = data[0]
        x = data[1]
        z = data[2]
        
        # Call regpanelmixPMLE for m components
        penloglik = regpanelmixPMLE(y,x,z, p, q, m, ninits=1)
        
        # Call regpanelmixPMLE for m+1 components
        penloglik_m1 = regpanelmixPMLE(y, x, z, p, q, m + 1, ninits=1)

        # Compute likelihood ratio statistic
        lr_stat[ii] = -2 * (penloglik_m1[0] - penloglik[0])
    return lr_stat

In [None]:
compute_lr_BB(alpha, mu, sigma, gamma, beta, N, T,  m, p, q, 10)

TypingError: Failed in nopython mode pipeline (step: nopython frontend)
[1m[1m[1mNo implementation of function Function(<built-in function getitem>) found for signature:
 
 >>> getitem(OptionalType(float64), Literal[int](0))
 
There are 22 candidate implementations:
[1m      - Of which 22 did not match due to:
      Overload of function 'getitem': File: <numerous>: Line N/A.
        With argument(s): '(OptionalType(float64), int64)':[0m
[1m       No match.[0m
[0m
[0m[1mDuring: typing of intrinsic-call at <ipython-input-132-8add45523890> (24)[0m
[0m[1mDuring: typing of static-get-item at <ipython-input-132-8add45523890> (24)[0m
[1m
File "<ipython-input-132-8add45523890>", line 24:[0m
[1mdef compute_lr_BB(alpha_hat, mu_hat, sigma_hat, gamma_hat, beta_hat, N, T,  m, p, q, BB):
    <source elided>
        # Compute likelihood ratio statistic
[1m        lr_stat[ii] = -2 * (penloglik_m1[0] - penloglik[0])
[0m        [1m^[0m[0m


In [None]:
@njit(parallel=True)
def compute_lr_BB(alpha_hat, mu_hat, sigma_hat, gamma_hat, beta_hat, N, T,  m, p, q, BB):
    
    Data = [generate_data(alpha_hat, mu_hat, sigma_hat, gamma_hat, beta_hat, N, T, m, p, q) for _ in range(BB)]
    
    # Preallocate lr_stat as a 1D array (Numba-compatible)
    lr_stat = np.zeros(BB, dtype=np.float64)

    for ii in prange(BB):
        data = Data[ii]
        y = data[0]
        x = data[1]
        z = data[2]
        
        # Call regpanelmixPMLE for m components
        penloglik = regpanelmixPMLE(y,x,z, p, q, m, ninits=1)
        
        # Call regpanelmixPMLE for m+1 components
        penloglik_m1 = regpanelmixPMLE(y, x, z, p, q, m + 1, ninits=1)

        # Compute likelihood ratio statistic
        lr_stat[ii] = -2 * (penloglik_m1 - penloglik)
    return lr_stat

compute_lr_BB(alpha, mu, sigma, gamma, beta, N, T,  m, p, q, 10)

ValueError: M must be the size of alpha and mu

In [None]:
m

1

In [None]:
compute_lr_BB(alpha, mu, sigma, gamma, beta, N, T,  2, p, q, 10)

array([  1.81950356,  12.8162745 , -28.08831621,   3.3724856 ,
        -1.53633514, -14.75101788,  -0.94035488,  11.48404303,
        13.29599242,  -3.64148545])

In [None]:
mu_hat

NameError: name 'mu_hat' is not defined

In [None]:
mu


array([-0.5,  0.5])

In [None]:
beta

array([[0.],
       [0.]])

In [None]:
mubeta_hat

array([ 0.65113783, -0.26472958])

In [None]:
@njit
def regpanelmixPMLE(y,x,z, p, q, m, ninits=10, epsilon=1e-8, maxit=2000, epsilon_short=1e-2, maxit_short=500)  : 
    
    t,n = y.shape
    nt = n * t
    y = y.T.flatten()
    
    # y.reshape((n,t)).T - data_lr[0][0] # check equivalence
    # Handle x
    
    x1 = np.hstack((np.ones((nt, 1)), x))
    q1 = q + 1
    

    xz = np.hstack((x1, z))
    
    out_coef = solve_least_squares(xz, y)  # Replace np.linalg.lstsq
    residuals = y - xz @ out_coef
    stdR = np.std(residuals)
    npar = m - 1 + (q1 + 1) * m + p
    ninits_short = ninits * 10 * (q1 + p) * m
    
    if (m == 1) :
        mubeta = out_coef[:q1]
        if p > 0:
            gamma = out_coef[q1:(q1 + p)]
        else:
            gamma = np.array([0.0])
        res = y - xz @ out_coef
        sigma = np.sqrt(np.mean(res**2))
        loglik = log_likelihood_normal(res,0,sigma)

        aic = -2 * loglik + 2 * npar
        bic = -2 * loglik + np.log(n) * npar
        penloglik = loglik
        alpha = np.array([1])
        postprobs = np.ones(n)
    else: 
        # First draw random start point
        if p > 0:
            gamma = out_coef[q1:(q1 + p)]
            # Perform least squares regression with both x and z
            gamma_draw = generate_random_uniform(0.5, 1.5, (p, ninits_short)) * gamma
            mubeta_hat = out_coef[:q1]
            y = y - z @ gamma
        else:
            # Perform least squares regression with x only
            
            gamma = np.array([0.0])
            mubeta_hat = out_coef
            gamma_draw = np.zeros((1,ninits_short), dtype=np.float64)

        # Initialize alpha
        alpha_draw = generate_random_uniform(0, 1, (m, ninits_short))
        alpha_draw = (alpha_draw / np.sum(alpha_draw, axis=0))

        # Initialize mubeta
        if q > 0:
            minMU = np.min(y - x @ mubeta_hat[1:])
            maxMU = np.max(y - x @ mubeta_hat[1:])
            mubeta_draw = np.zeros((q1 * m, ninits_short))
            for j in range(m):
                mubeta_draw[q1 * j, :] = np.random.uniform(minMU, maxMU, size=ninits_short)
                for i in range(1, q1):
                    mubeta_draw[q1 * j + i, :] = mubeta_hat[i] * np.random.uniform(-2, 2, size=ninits_short)
        else:
            minMU = np.min(y)
            maxMU = np.max(y)
            mubeta_draw = np.zeros((q1 * m, ninits_short))
            for j in range(m):
                mubeta_draw[q1 * j, :] = np.random.uniform(minMU, maxMU, size=ninits_short)
        
        an = 1 / n    
        sigma_0 = np.full(m, stdR)
    
        # Initialize sigma
        sigma_draw = generate_random_uniform(0.01, 1, (m, ninits_short)) * stdR
        
        alpha_draw,mubeta_draw,sigma_draw,gamma_draw,penloglikset, loglikset, post = EM_optimization(y, x, z, p, q, sigma_0, alpha_draw, mubeta_draw, sigma_draw, gamma_draw, m, t, an, maxit=maxit_short, tol=epsilon_short)
        
        # 
        components = np.argsort(penloglikset)[::-1][:ninits]
        alpha_draw = alpha_draw[:,components]
        mubeta_draw = mubeta_draw[:,components]
        sigma_draw = sigma_draw[:,components]
        gamma_draw = gamma_draw[:,components]
        
        
        alpha_draw,mubeta_draw,sigma_draw,gamma_draw,penloglikset, loglikset, post = EM_optimization(y, x, z, p, q, sigma_0, alpha_draw, mubeta_draw, sigma_draw, gamma_draw, m, t, an, maxit=maxit, tol=epsilon)
        
        index = np.argmax(penloglikset)
        alpha_hat = alpha_draw[:,index]
        mubeta_hat = mubeta_draw[:,index]
        sigma_hat = sigma_draw[:,index]
        gamma_hat = gamma_draw[:,index]
        post = post[:, index]
        penloglik = penloglikset[index]
        loglik = loglikset[index]
        aic = -2 * loglik + 2 * npar
        bic = -2 * loglik + np.log(n) * npar
        
        return(penloglik, loglik, aic, bic, alpha_hat, mubeta_hat, sigma_hat, gamma_hat, post)