In [1]:
# Task 1
import numpy as np
from sklearn.decomposition import FastICA

def perform_ica(X, n_components):
    ica = FastICA(n_components=n_components)
    S_ = ica.fit_transform(X)  # Reconstruct signals
    A_ = ica.mixing_  # Get estimated mixing matrix
    return S_, A_


In [2]:
# Task 2
def estimate_lingam(W, threshold=0.1):
    W_sparse = np.where(np.abs(W) < threshold, 0, W)
    B_est = np.linalg.inv(W_sparse)  # Estimating B as the inverse of sparse W
    return B_est

In [3]:
# Task 3
def generate_data(B, n_samples=1000):
    n = B.shape[0]
    E = np.random.uniform(-1, 1, (n_samples, n))
    X = np.zeros_like(E)
    for i in range(n_samples):
        X[i, :] = np.linalg.solve(np.eye(n) - B, E[i, :])
    return X

# Strictly lower triangular B matrix for simulation
B_true = np.tril(np.random.uniform(-1, 1, (4, 4)), -1)
X = generate_data(B_true)

In [4]:
# Task 4
# Step 1: Perform ICA
S_, A_ = perform_ica(X, n_components=4)

# Step 2: Estimate LiNGAM model
W_ = np.linalg.pinv(A_)  # Compute the pseudo-inverse of mixing matrix A
B_est = estimate_lingam(W_)

# Verify if B_est is close to B_true
print("Estimated B matrix:\n", B_est)
print("True B matrix:\n", B_true)


Estimated B matrix:
 [[-0.         -0.5757595  -0.         -0.        ]
 [ 0.59902853 -0.24759146  0.          0.        ]
 [ 0.41526836 -0.41061637 -0.         -0.58536489]
 [ 0.07595626 -0.30118101 -0.57604845  0.55584645]]
True B matrix:
 [[ 0.          0.          0.          0.        ]
 [ 0.4126085   0.          0.          0.        ]
 [ 0.43800415  0.69491042  0.          0.        ]
 [ 0.84950708  0.85054274 -0.98028084  0.        ]]


In [5]:
# Task 5
import pandas as pd
import numpy as np
from sklearn.decomposition import FastICA

# Load the data
data = pd.read_csv('./Galton_processed.txt', delimiter='\t')

# Select the relevant columns
data = data[['father', 'mother', 'Gender', 'Height']]

# Preprocess: converting 'Gender' to numeric if needed (assuming 0 for female, 1 for male)
data['Gender'] = data['Gender'].replace({'M': 1, 'F': 0})


In [7]:
S_, A_ = perform_ica(X, n_components=4)

# Step 2: Estimate LiNGAM model
W_ = np.linalg.pinv(A_)  # Compute the pseudo-inverse of mixing matrix A
B_est = estimate_lingam(W_)

# Verify if B_est is close to B_true
print("Estimated B matrix:\n", B_est)
print("True B matrix:\n", B_true)


Estimated B matrix:
 [[-0.         -0.         -0.         -0.57571111]
 [-0.         -0.         -0.59885447 -0.24753691]
 [ 0.58532435  0.         -0.41490124 -0.41067408]
 [-0.55577038  0.57604566 -0.07618199 -0.30087739]]
True B matrix:
 [[ 0.          0.          0.          0.        ]
 [ 0.4126085   0.          0.          0.        ]
 [ 0.43800415  0.69491042  0.          0.        ]
 [ 0.84950708  0.85054274 -0.98028084  0.        ]]
