In [37]:
import os
import numpy as np
import pandas as pd
from mne import create_info
from mne.io import RawArray
from mne.preprocessing import ICA

In [38]:
P1K_X = pd.read_csv('../Data/P1K_X.csv', index_col=0)

In [39]:
import pandas as pd
import os
from mne.preprocessing import ICA
from mne import create_info
from mne.io import RawArray

# Assume your DataFrame is named P1K_X
# P1K_X has 4257 genes as row indices and 1035 columns as samples

# 1. Convert DataFrame to a numpy array (no need to transpose)
data = P1K_X.values  # Shape should be (4257 genes, 1035 samples)

# 2. Create MNE Info structure
sfreq = 1.0  # 1 Hz sampling frequency, modify if needed
info = create_info(ch_names=list(P1K_X.index), sfreq=sfreq, ch_types='eeg')

# 3. Create Raw object
raw = RawArray(data, info)

# 4. Initialize ICA with Infomax method
n_components = 250  # Number of independent components
ica = ICA(n_components=n_components, method='infomax', random_state=97)

# 5. Fit ICA to the raw data
ica.fit(raw)

# 6. Extract the sources matrix S (4257 genes, 200 components)
S = ica.get_components().T  # The components (genes x components)

# 7. Calculate the mixing matrix A (200 components, 1035 samples)
A = ica.get_sources(raw).get_data()  # Sources (components x samples)

# 8. Convert S and A to pandas DataFrames
S_df = pd.DataFrame(S)  # S with genes as rows
A_df = pd.DataFrame(A)  # A with samples as columns

# 9. Create directory for results if it doesn't exist
results_dir = "Results/InfoMax_ICA"
if not os.path.exists(results_dir):
    os.makedirs(results_dir)

# 10. Save the DataFrames to CSV files
S_df.to_csv(f"{results_dir}/proc_S.csv", index=False)  # Save index (gene names)
A_df.to_csv(f"{results_dir}/proc_A.csv", index=False)  # Save without index

print(f"Mixing matrix A saved to {results_dir}/proc_A.csv with shape {A_df.shape}")
print(f"Sources matrix S saved to {results_dir}/proc_S.csv with shape {S_df.shape}")

Creating RawArray with float64 data, n_channels=4257, n_times=1035
    Range : 0 ... 1034 =      0.000 ...  1034.000 secs
Ready.
Fitting ICA to data using 4257 channels (please be patient, this may take a while)


  ica.fit(raw)


Selecting by number: 250 components
Computing Infomax ICA
Fitting ICA took 5.6s.
Mixing matrix A saved to Results/InfoMax_ICA/proc_A.csv with shape (250, 1035)
Sources matrix S saved to Results/InfoMax_ICA/proc_S.csv with shape (250, 4257)


In [40]:
A.shape

(250, 1035)