<a href="https://colab.research.google.com/github/AyeshaAnzerBCIT/Multisource/blob/main/Fusion_Extraction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install h5py

/bin/bash: /home/ayesha/miniconda3/lib/libtinfo.so.6: no version information available (required by /bin/bash)
Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com


In [None]:
import numpy as np
import scipy.signal as signal
import scipy.io
import h5py
import os
import matplotlib.pyplot as plt

In [None]:
def extract_tar_files(root_dir):
    """Extracts all .tar.gz files in the dataset if not already extracted."""
    for subdir, _, files in os.walk(root_dir):
        for file in files:
            if file.endswith(".tar.gz"):
                tar_path = os.path.join(subdir, file)
                extract_path = os.path.join(subdir, file.replace(".tar.gz", ""))

                # Check if extraction is needed
                if not os.path.exists(extract_path):
                    print(f"Extracting {tar_path}...")
                    try:
                        with tarfile.open(tar_path, "r:gz") as tar:
                            tar.extractall(extract_path)  # Extract into a separate folder
                        print(f"Extraction completed: {extract_path}")
                    except Exception as e:
                        print(f"Error extracting {tar_path}: {e}")
                else:
                    print(f"Already extracted: {extract_path}")

In [None]:
def find_mat_files(root_dir, modality="EEG"):
    """Finds all .mat files under the given modality folder."""
    mat_files = []
    for subdir, _, files in os.walk(root_dir):
        if modality in subdir and "mat_format" in subdir:
            for file in files:
                if file.endswith(".mat"):
                    mat_files.append(os.path.join(subdir, file))
    return mat_files

In [None]:
def load_mat_file(file_path):
    """Loads EEG signals from a .mat file, supporting both standard and MATLAB v7.3 HDF5 formats."""
    try:
        if h5py.is_hdf5(file_path):  # Check if it's an HDF5 (v7.3) file
            with h5py.File(file_path, 'r') as f:
                data = {key: np.array(f[key]) for key in f.keys()}  # Convert datasets to NumPy arrays
                print(f"Loaded (HDF5): {file_path}")
                return data
        else:
            mat_data = scipy.io.loadmat(file_path)
            print(f"Loaded: {file_path}")
            return mat_data
    except Exception as e:
        print(f"Error loading {file_path}: {e}")
        return None

In [None]:
def process_eeg_data(root_dir):
    """Main pipeline to process EEG .mat files."""
    extract_tar_files(root_dir)
    mat_files = find_mat_files(root_dir)

    all_eeg_data = {}
    for mat_file in mat_files:
        subject_id = mat_file.split(os.sep)[-3]  # Extract subject ID from path
        eeg_data = load_mat_file(mat_file)
        if eeg_data:
            all_eeg_data[subject_id] = eeg_data

    return all_eeg_data


In [None]:

# Example Usage
root_dataset_path = "/data/ayesha/PhD/dataset/EEG"
extract_tar_files(root_dataset_path)
mat_files = find_mat_files(root_dataset_path)
loaded_data = {file: load_mat_file(file) for file in mat_files}

Already extracted: /data/ayesha/PhD/dataset/EEG/age1824/Female/A00054122
Already extracted: /data/ayesha/PhD/dataset/EEG/age1824/Female/A00054387
Already extracted: /data/ayesha/PhD/dataset/EEG/age1824/Female/A00054023
Already extracted: /data/ayesha/PhD/dataset/EEG/age1824/Female/A00054039
Already extracted: /data/ayesha/PhD/dataset/EEG/age1824/Female/A00054207
Already extracted: /data/ayesha/PhD/dataset/EEG/age1824/Female/A00056640
Already extracted: /data/ayesha/PhD/dataset/EEG/age1824/Female/A00063117
Already extracted: /data/ayesha/PhD/dataset/EEG/age1824/Female/A00058775
Already extracted: /data/ayesha/PhD/dataset/EEG/age1824/Female/A00057092
Already extracted: /data/ayesha/PhD/dataset/EEG/age1824/Female/A00063051
Already extracted: /data/ayesha/PhD/dataset/EEG/age1824/Male/A00062919
Already extracted: /data/ayesha/PhD/dataset/EEG/age1824/Male/A00053990
Already extracted: /data/ayesha/PhD/dataset/EEG/age1824/Male/A00056604
Already extracted: /data/ayesha/PhD/dataset/EEG/age1824/M