# MNE Data Analysis Pipeline with Metadata

This notebook provides tools for:
- Loading FIF files and their corresponding CSV metadata
- Concatenating multiple sessions from a subject
- Filtering and preprocessing

## 1. Imports

In [1]:
import h5py
import numpy as np
import mne
from pathlib import Path
from typing import Dict, List, Optional, Tuple
import matplotlib
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import os
from scipy import stats as scipy_stats
import glob
from matplotlib.backends.backend_pdf import PdfPages

# Set plotting style
sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (12, 8)

# For interactive plotting
matplotlib.use("Qt5Agg")

Channels marked as bad:
none


## 2. MetaData Loading Function

In [2]:
def load_session_metadata(csv_path):
    """
    Load metadata from a single CSV file.
    
    Parameters
    ----------
    csv_path : str
        Path to CSV metadata file
    
    Returns
    -------
    metadata : pd.DataFrame
        Trial metadata with columns: trial_number, set_size, match, correct, 
        response, response_time, probe_letter
    """
    metadata = pd.read_csv(csv_path)
    
    # Convert to appropriate types
    numeric_cols = ['trial_number', 'set_size', 'match', 'correct', 'response', 'response_time']
    for col in numeric_cols:
        if col in metadata.columns:
            metadata[col] = pd.to_numeric(metadata[col], errors='coerce')
    
    print(f"Loaded metadata: {len(metadata)} trials")
    print(f"  Columns: {list(metadata.columns)}")
    
    # Show summary
    if 'correct' in metadata.columns:
        acc = metadata['correct'].mean() * 100
        print(f"  Accuracy: {acc:.1f}%")
    if 'response_time' in metadata.columns:
        rt = metadata['response_time'].mean()
        print(f"  Mean RT: {rt:.3f}s")
    if 'set_size' in metadata.columns:
        sizes = sorted(metadata['set_size'].dropna().unique())
        print(f"  Set sizes: {sizes}")
    
    return metadata


def load_subject_metadata(subject_dir, subject_id=None, pattern='*metadata*.csv'):
    """
    Load and concatenate metadata from all sessions for a subject.
    
    Parameters
    ----------
    subject_dir : str
        Directory containing subject's session files
    subject_id : str, optional
        Subject identifier to add as a column
    pattern : str
        Glob pattern to match metadata CSV files (default: '*metadata*.csv')
    
    Returns
    -------
    metadata_all : pd.DataFrame
        Concatenated metadata from all sessions with added 'session' column
    """
    csv_files = sorted(glob.glob(os.path.join(subject_dir, pattern)))
    
    if len(csv_files) == 0:
        raise ValueError(f"No metadata CSV files found in {subject_dir}")
    
    print(f"Found {len(csv_files)} metadata files:")
    for f in csv_files:
        print(f"  {os.path.basename(f)}")
    
    print("\n" + "="*80)
    
    metadata_list = []
    
    for session_idx, csv_file in enumerate(csv_files, start=1):
        print(f"\nLoading session {session_idx}: {os.path.basename(csv_file)}")
        
        metadata = pd.read_csv(csv_file)
        
        # Add session identifier
        metadata['session'] = session_idx
        metadata['session_file'] = os.path.basename(csv_file)
        
        if subject_id is not None:
            metadata['subject'] = subject_id
        
        # Convert types
        numeric_cols = ['trial_number', 'set_size', 'match', 'correct', 'response', 'response_time']
        for col in numeric_cols:
            if col in metadata.columns:
                metadata[col] = pd.to_numeric(metadata[col], errors='coerce')
        
        print(f"  Trials: {len(metadata)}")
        if 'correct' in metadata.columns:
            acc = metadata['correct'].mean() * 100
            print(f"  Accuracy: {acc:.1f}%")
        
        metadata_list.append(metadata)
    
    # Concatenate all sessions
    metadata_all = pd.concat(metadata_list, ignore_index=True)
    
    print("\n" + "="*80)
    print(f"\nCombined metadata:")
    print(f"  Total trials: {len(metadata_all)}")
    print(f"  Sessions: {metadata_all['session'].nunique()}")
    
    if 'correct' in metadata_all.columns:
        overall_acc = metadata_all['correct'].mean() * 100
        print(f"  Overall accuracy: {overall_acc:.1f}%")
        
        # Per-session accuracy
        print(f"\n  Per-session accuracy:")
        for session in sorted(metadata_all['session'].unique()):
            session_data = metadata_all[metadata_all['session'] == session]
            acc = session_data['correct'].mean() * 100
            print(f"    Session {session}: {acc:.1f}%")
    
    if 'response_time' in metadata_all.columns:
        overall_rt = metadata_all['response_time'].mean()
        print(f"\n  Overall mean RT: {overall_rt:.3f}s")
    
    return metadata_all

## 3. Multi-Session Concatenation Functions

In [3]:
def load_and_concatenate_subject(
    subject_dir: str,
    use_common_channels: bool = True,
    preload: bool = True,
    verbose: bool = False
) -> Tuple[mne.io.Raw, pd.DataFrame]:
    """
    Load and concatenate all FIF files and metadata for a subject.
    
    Automatically finds all .fif and .csv files in the directory.
    Matches them by sorting alphabetically.
    
    Parameters
    ----------
    subject_dir : str
        Directory containing subject's session files
    use_common_channels : bool
        If True, only keep channels common to all files
    preload : bool
        Whether to load data into memory
    verbose : bool
        Verbose output
    
    Returns
    -------
    raw_concat : mne.io.Raw
        Concatenated Raw object from all sessions
    metadata_all : pd.DataFrame
        Concatenated metadata from all sessions with 'session' column
    
    Examples
    --------
    >>> raw, metadata = load_and_concatenate_subject('data/Subject_01/')
    """
    
    print("="*80)
    print("LOADING SUBJECT DATA")
    print("="*80)
    print(f"\nDirectory: {subject_dir}")
    
    # Get all files in directory
    all_files = os.listdir(subject_dir)
    
    # Find FIF files
    fif_files = sorted([f for f in all_files if f.endswith('.fif')])
    
    # Find CSV files (look for files with .csv extension)
    csv_files = sorted([f for f in all_files if f.endswith('.csv')])
    
    # Verify we found files
    if len(fif_files) == 0:
        raise ValueError(f"No .fif files found in {subject_dir}")
    
    if len(csv_files) == 0:
        raise ValueError(f"No .csv files found in {subject_dir}")
    
    print(f"\n[1/2] Loading FIF files...")
    print(f"Found {len(fif_files)} FIF files:")
    for f in fif_files:
        print(f"  {f}")
    
    # Load FIF files
    raw_list = []
    
    for fif_file in fif_files:
        full_path = os.path.join(subject_dir, fif_file)
        raw = mne.io.read_raw_fif(full_path, preload=False, verbose=verbose)
        raw_list.append(raw)
        
        if verbose:
            print(f"  Loaded: {fif_file} ({len(raw.ch_names)} channels)")
    
    # Find and use common channels
    if use_common_channels and len(raw_list) > 1:
        common_channels = set(raw_list[0].ch_names)
        for raw in raw_list[1:]:
            common_channels &= set(raw.ch_names)
        
        common_channels = sorted(list(common_channels))
        print(f"\nUsing {len(common_channels)} common channels")
        
        # Pick common channels from all files
        for i, raw in enumerate(raw_list):
            raw_list[i] = raw.copy().pick_channels(common_channels, ordered=True)
    
    # Concatenate FIF files
    print("\nConcatenating FIF files...")
    raw_concat = mne.concatenate_raws(raw_list, preload=preload, verbose=verbose)
    
    print(f"✓ Neural data concatenated:")
    print(f"  Duration: {raw_concat.times[-1]:.2f}s")
    print(f"  Channels: {len(raw_concat.ch_names)}")
    print(f"  Sampling rate: {raw_concat.info['sfreq']} Hz")
    
    # Load metadata
    print("\n" + "="*80)
    print("[2/2] Loading metadata...")
    print(f"Found {len(csv_files)} CSV files:")
    for f in csv_files:
        print(f"  {f}")
    
    metadata_list = []
    
    for session_idx, csv_file in enumerate(csv_files, start=1):
        full_path = os.path.join(subject_dir, csv_file)
        
        if verbose:
            print(f"\n  Loading session {session_idx}: {csv_file}")
        
        metadata = pd.read_csv(full_path)
        
        # Add session identifier
        metadata['session'] = session_idx
        metadata['session_file'] = csv_file
        
        # Convert to numeric types
        numeric_cols = ['trial_number', 'set_size', 'match', 'correct', 
                       'response', 'response_time']
        for col in numeric_cols:
            if col in metadata.columns:
                metadata[col] = pd.to_numeric(metadata[col], errors='coerce')
        
        if verbose:
            print(f"    Trials: {len(metadata)}")
            if 'correct' in metadata.columns:
                acc = metadata['correct'].mean() * 100
                print(f"    Accuracy: {acc:.1f}%")
        
        metadata_list.append(metadata)
    
    # Concatenate all metadata
    metadata_all = pd.concat(metadata_list, ignore_index=True)
    
    print(f"\n✓ Metadata concatenated:")
    print(f"  Total trials: {len(metadata_all)}")
    print(f"  Sessions: {metadata_all['session'].nunique()}")
    
    if 'correct' in metadata_all.columns:
        overall_acc = metadata_all['correct'].mean() * 100
        print(f"  Overall accuracy: {overall_acc:.1f}%")
    
    if 'response_time' in metadata_all.columns:
        overall_rt = metadata_all['response_time'].mean()
        print(f"  Overall mean RT: {overall_rt:.3f}s")
    
    # Verify alignment
    print("\n" + "="*80)
    print("VERIFICATION")
    print("="*80)
    
    n_sessions_fif = len(fif_files)
    n_sessions_meta = metadata_all['session'].nunique()
    
    if n_sessions_fif == n_sessions_meta:
        print(f"✓ Session count matches: {n_sessions_fif} sessions")
    else:
        print(f"⚠ WARNING: Session count mismatch!")
        print(f"  FIF files: {n_sessions_fif}")
        print(f"  CSV files: {n_sessions_meta}")
    
    print("\n✓ Loading complete!")
    
    return raw_concat, metadata_all


def load_and_concatenate_subject_paired(
    subject_dir: str,
    fif_prefix: Optional[str] = None,
    csv_suffix: Optional[str] = None,
    use_common_channels: bool = True,
    preload: bool = True,
    verbose: bool = False
) -> Tuple[mne.io.Raw, pd.DataFrame]:
    """
    Load and concatenate with smart FIF-CSV pairing.
    
    Pairs files based on shared naming (e.g., Session_01_raw.fif with Session_01_metadata.csv).
    
    Parameters
    ----------
    subject_dir : str
        Directory containing subject's session files
    fif_prefix : str, optional
        Only load FIF files starting with this prefix
    csv_suffix : str, optional
        Only load CSV files with this suffix (e.g., 'metadata')
    use_common_channels : bool
        If True, only keep channels common to all files
    preload : bool
        Whether to load data into memory
    verbose : bool
        Verbose output
    
    Returns
    -------
    raw_concat : mne.io.Raw
        Concatenated Raw object
    metadata_all : pd.DataFrame
        Concatenated metadata
    
    Examples
    --------
    >>> # Load only files with specific naming
    >>> raw, meta = load_and_concatenate_subject_paired(
    ...     'data/Subject_01/',
    ...     csv_suffix='metadata'
    ... )
    """
    
    print("="*80)
    print("LOADING SUBJECT DATA (PAIRED MODE)")
    print("="*80)
    print(f"\nDirectory: {subject_dir}")
    
    # Get all files
    all_files = os.listdir(subject_dir)
    
    # Find FIF files
    fif_files = [f for f in all_files if f.endswith('.fif')]
    if fif_prefix:
        fif_files = [f for f in fif_files if f.startswith(fif_prefix)]
    fif_files = sorted(fif_files)
    
    # Find CSV files
    csv_files = [f for f in all_files if f.endswith('.csv')]
    if csv_suffix:
        csv_files = [f for f in csv_files if csv_suffix in f.lower()]
    csv_files = sorted(csv_files)
    
    # Verify
    if len(fif_files) == 0:
        raise ValueError(f"No FIF files found in {subject_dir}")
    if len(csv_files) == 0:
        raise ValueError(f"No CSV files found in {subject_dir}")
    
    print(f"\n[1/2] Loading {len(fif_files)} FIF files...")
    for f in fif_files:
        print(f"  {f}")
    
    # Try to pair FIF with CSV files
    paired_files = []
    
    for fif_file in fif_files:
        # Extract base name (remove extension and common suffixes)
        base_name = fif_file.replace('.fif', '').replace('_raw', '').replace('_eeg', '')
        
        # Look for matching CSV
        matching_csv = None
        for csv_file in csv_files:
            csv_base = csv_file.replace('.csv', '').replace('_metadata', '')
            if base_name in csv_base or csv_base in base_name:
                matching_csv = csv_file
                break
        
        if matching_csv:
            paired_files.append((fif_file, matching_csv))
            if verbose:
                print(f"  Paired: {fif_file} ↔ {matching_csv}")
        else:
            # No match found, still use this FIF but warn
            paired_files.append((fif_file, None))
            print(f"  ⚠ No matching CSV for: {fif_file}")
    
    # Load FIF files
    raw_list = []
    for fif_file, _ in paired_files:
        full_path = os.path.join(subject_dir, fif_file)
        raw = mne.io.read_raw_fif(full_path, preload=False, verbose=verbose)
        raw_list.append(raw)
    
    # Common channels
    if use_common_channels and len(raw_list) > 1:
        common_channels = set(raw_list[0].ch_names)
        for raw in raw_list[1:]:
            common_channels &= set(raw.ch_names)
        common_channels = sorted(list(common_channels))
        print(f"\nUsing {len(common_channels)} common channels")
        
        for i, raw in enumerate(raw_list):
            raw_list[i] = raw.copy().pick_channels(common_channels, ordered=True)
    
    # Concatenate
    raw_concat = mne.concatenate_raws(raw_list, preload=preload, verbose=verbose)
    print(f"\n✓ Neural data: {raw_concat.times[-1]:.2f}s, {len(raw_concat.ch_names)} channels")
    
    # Load metadata
    print("\n[2/2] Loading metadata...")
    metadata_list = []
    
    for session_idx, (fif_file, csv_file) in enumerate(paired_files, start=1):
        if csv_file is None:
            print(f"  ⚠ Skipping session {session_idx} (no CSV)")
            continue
        
        full_path = os.path.join(subject_dir, csv_file)
        metadata = pd.read_csv(full_path)
        metadata['session'] = session_idx
        metadata['session_file'] = csv_file
        metadata['fif_file'] = fif_file
        
        # Convert types
        numeric_cols = ['trial_number', 'set_size', 'match', 'correct', 
                       'response', 'response_time']
        for col in numeric_cols:
            if col in metadata.columns:
                metadata[col] = pd.to_numeric(metadata[col], errors='coerce')
        
        metadata_list.append(metadata)
    
    if len(metadata_list) == 0:
        raise ValueError("No metadata files could be loaded")
    
    metadata_all = pd.concat(metadata_list, ignore_index=True)
    
    print(f"\n✓ Metadata: {len(metadata_all)} trials from {len(metadata_list)} sessions")
    
    return raw_concat, metadata_all


## 5. ICA summary pdf output

In [4]:
def ICA_summary_pdf(data, ica):
    # Calculate explained variance properly
    # Method 1: Use the correct approach
    try:
        sources = ica.get_sources(data)
        # Calculate variance for each component
        explained_var_array = np.var(sources.get_data(), axis=1)
        # Normalize to get proportion
        explained_var_array = explained_var_array / np.sum(explained_var_array)
    except:
        # Fallback
        explained_var_array = np.ones(ica.n_components_) / ica.n_components_

    print(f"Explained variance shape: {explained_var_array.shape}")
    print(f"Number of components: {ica.n_components_}")
    print(f"Explained variance values:\n{explained_var_array}")
    print(f"Total variance: {np.sum(explained_var_array)*100:.2f}%")

    # Create PDF with all component properties
    pdf_filename = 'ica_components_report.pdf'

    with PdfPages(pdf_filename) as pdf:
        n_components = ica.n_components_
        
        for comp_idx in range(n_components):
            # Create figure with subplots for each component
            fig = plt.figure(figsize=(11.69, 8.27))  # A4 landscape
            
            # Add title with component number and explained variance
            var_text = f'{explained_var_array[comp_idx]*100:.2f}%'
            fig.suptitle(f'ICA{comp_idx:03d} - Explained Variance: {var_text}', 
                        fontsize=16, fontweight='bold')
            
            # 1. Topography
            ax1 = plt.subplot(2, 3, 1)
            ica.plot_components(picks=comp_idx, axes=ax1, show=False, colorbar=True)
            
            # Get source data for this component
            sources = ica.get_sources(raw)
            sfreq = raw.info['sfreq']
            
            # 2. Time course (using first 10 seconds of data)
            ax2 = plt.subplot(2, 3, 2)
            n_samples = min(int(10 * sfreq), sources.n_times)
            times = sources.times[:n_samples]
            data = sources.get_data(picks=comp_idx)[:, :n_samples]
            ax2.plot(times, data.T, 'k', linewidth=0.5)
            ax2.set_xlabel('Time (s)')
            ax2.set_ylabel('AU')
            ax2.set_title('Time Course (first 10s)')
            ax2.grid(True, alpha=0.3)
            
            # 3. Power Spectrum (fixed)
            ax3 = plt.subplot(2, 3, 3)
            from mne.time_frequency import psd_array_welch
            
            # Use all available data for PSD
            data_full = sources.get_data(picks=comp_idx)
            
            # Adjust n_fft based on available data
            n_fft = min(2048, data_full.shape[1])
            n_per_seg = min(n_fft, data_full.shape[1])
            
            psds, freqs = psd_array_welch(
                data_full, 
                sfreq=sfreq, 
                fmin=0.5, 
                fmax=50, 
                n_fft=n_fft,
                n_per_seg=n_per_seg
            )
            
            ax3.semilogy(freqs, psds.T, 'k', linewidth=1)
            ax3.set_xlabel('Frequency (Hz)')
            ax3.set_ylabel('Power Spectral Density (µV²/Hz)')
            ax3.set_title('Power Spectrum')
            ax3.grid(True, alpha=0.3)
            ax3.set_xlim([0, 50])
            
            # 4. Component properties (alternative visualization)
            ax4 = plt.subplot(2, 3, 4)
            # Plot first 2 seconds in more detail
            n_samples_detail = min(int(2 * sfreq), sources.n_times)
            times_detail = sources.times[:n_samples_detail]
            data_detail = sources.get_data(picks=comp_idx)[:, :n_samples_detail]
            ax4.plot(times_detail, data_detail.T, 'b', linewidth=0.8)
            ax4.set_xlabel('Time (s)')
            ax4.set_ylabel('AU')
            ax4.set_title('Time Course Detail (first 2s)')
            ax4.grid(True, alpha=0.3)
            
            # 5. Variance bar (showing this component in context)
            ax5 = plt.subplot(2, 3, 5)
            colors = ['red' if i == comp_idx else 'gray' for i in range(n_components)]
            ax5.bar(range(n_components), explained_var_array * 100, color=colors, alpha=0.6)
            ax5.set_xlabel('Component')
            ax5.set_ylabel('Explained Variance (%)')
            ax5.set_title('Variance Explained by All Components')
            ax5.axhline(y=5, color='r', linestyle='--', alpha=0.5, label='5% threshold')
            ax5.legend()
            ax5.grid(True, alpha=0.3, axis='y')
            
            # 6. Properties statistics
            ax6 = plt.subplot(2, 3, 6)
            ax6.axis('off')
            
            # Calculate some statistics
            data_stats = sources.get_data(picks=comp_idx).flatten()
            stats_text = f"""Component Statistics:
            
    Mean: {np.mean(data_stats):.3f}
    Std: {np.std(data_stats):.3f}
    Min: {np.min(data_stats):.3f}
    Max: {np.max(data_stats):.3f}
    Kurtosis: {np.mean((data_stats - np.mean(data_stats))**4) / (np.std(data_stats)**4):.3f}

    Explained Variance: {explained_var_array[comp_idx]*100:.2f}%
    Rank by Variance: #{np.where(np.argsort(explained_var_array)[::-1] == comp_idx)[0][0] + 1}
            """
            
            ax6.text(0.1, 0.9, stats_text, 
                    transform=ax6.transAxes,
                    fontsize=10,
                    verticalalignment='top',
                    fontfamily='monospace',
                    bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))
            
            plt.tight_layout()
            pdf.savefig(fig, bbox_inches='tight')
            plt.close(fig)
            
            print(f"Processed component {comp_idx+1}/{n_components}")
        
        # Add summary page at the end
        fig_summary = plt.figure(figsize=(11.69, 8.27))
        fig_summary.suptitle('ICA Components Summary', fontsize=16, fontweight='bold')
        
        # Variance explained table
        ax_table = plt.subplot(1, 2, 1)
        ax_table.axis('tight')
        ax_table.axis('off')
        
        # Sort by variance for the table
        sorted_indices = np.argsort(explained_var_array)[::-1]
        table_data = [[f'ICA{i:03d}', f'{explained_var_array[i]*100:.2f}%', f'#{rank+1}'] 
                    for rank, i in enumerate(sorted_indices)]
        
        table = ax_table.table(cellText=table_data, 
                            colLabels=['Component', 'Variance (%)', 'Rank'],
                            cellLoc='center',
                            loc='center')
        table.auto_set_font_size(False)
        table.set_fontsize(8)
        table.scale(1, 1.5)
        ax_table.set_title('Components Ranked by Variance', fontsize=12, pad=20)
        
        # Overall variance plot
        ax_var = plt.subplot(1, 2, 2)
        ax_var.bar(range(n_components), explained_var_array * 100, color='steelblue', alpha=0.7)
        ax_var.set_xlabel('Component Index')
        ax_var.set_ylabel('Explained Variance (%)')
        ax_var.set_title('Variance Explained by Each Component')
        ax_var.axhline(y=5, color='r', linestyle='--', alpha=0.5, label='5% threshold')
        ax_var.legend()
        ax_var.grid(True, alpha=0.3)
        
        # Add text summary
        total_var = np.sum(explained_var_array) * 100
        ax_var.text(0.02, 0.98, f'Total variance: {total_var:.2f}%',
                    transform=ax_var.transAxes, 
                    verticalalignment='top',
                    bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))
        
        plt.tight_layout()
        pdf.savefig(fig_summary, bbox_inches='tight')
        plt.close(fig_summary)

    print(f'\nPDF report saved to: {pdf_filename}')

    # Print summary to console
    print('\nExplained Variance Summary (sorted):')
    print('-' * 50)
    sorted_indices = np.argsort(explained_var_array)[::-1]
    for rank, i in enumerate(sorted_indices):
        print(f'#{rank+1:2d} - ICA{i:03d}: {explained_var_array[i]*100:6.2f}%')
    print('-' * 50)
    print(f'Total: {np.sum(explained_var_array)*100:6.2f}%')

## 6. Usage Example: Load Subject Data

In [5]:
if __name__ == '__main__':
    # Example 1: Simple loading (all .fif and .csv files)
    subject_dir = 'Data_converted_MetaData\Subject_08'
    
    try:
        raw, metadata = load_and_concatenate_subject(
            subject_dir=subject_dir,
            use_common_channels=True,
            preload=True,
            verbose=True
        )
        
        print("\n" + "="*80)
        print("SUCCESS!")
        print("="*80)
        print(f"Loaded {len(raw.ch_names)} channels, {raw.times[-1]:.1f}s")
        print(f"Loaded {len(metadata)} trials")
        
    except FileNotFoundError as e:
        print(f"\nDirectory not found: {e}")
        print("Please update subject_dir to point to your data")
    except ValueError as e:
        print(f"\nError: {e}")
    
    print("\n" + "="*80)
raw_cropped = raw.copy().crop(tmax = 1600)

LOADING SUBJECT DATA

Directory: Data_converted_MetaData\Subject_08

[1/2] Loading FIF files...
Found 5 FIF files:
  Data_Subject_08_Session_01.h5_seeg_raw.fif
  Data_Subject_08_Session_02.h5_seeg_raw.fif
  Data_Subject_08_Session_03.h5_seeg_raw.fif
  Data_Subject_08_Session_04.h5_seeg_raw.fif
  Data_Subject_08_Session_05.h5_seeg_raw.fif
Opening raw data file Data_converted_MetaData\Subject_08\Data_Subject_08_Session_01.h5_seeg_raw.fif...
Isotrak not found
    Range : 0 ... 79999 =      0.000 ...   399.995 secs
Ready.


  subject_dir = 'Data_converted_MetaData\Subject_08'


  Loaded: Data_Subject_08_Session_01.h5_seeg_raw.fif (20 channels)
Opening raw data file Data_converted_MetaData\Subject_08\Data_Subject_08_Session_02.h5_seeg_raw.fif...
Isotrak not found
    Range : 0 ... 79999 =      0.000 ...   399.995 secs
Ready.
  Loaded: Data_Subject_08_Session_02.h5_seeg_raw.fif (20 channels)
Opening raw data file Data_converted_MetaData\Subject_08\Data_Subject_08_Session_03.h5_seeg_raw.fif...
Isotrak not found
    Range : 0 ... 79999 =      0.000 ...   399.995 secs
Ready.
  Loaded: Data_Subject_08_Session_03.h5_seeg_raw.fif (20 channels)
Opening raw data file Data_converted_MetaData\Subject_08\Data_Subject_08_Session_04.h5_seeg_raw.fif...
Isotrak not found
    Range : 0 ... 79999 =      0.000 ...   399.995 secs
Ready.
  Loaded: Data_Subject_08_Session_04.h5_seeg_raw.fif (20 channels)
Opening raw data file Data_converted_MetaData\Subject_08\Data_Subject_08_Session_05.h5_seeg_raw.fif...
Isotrak not found
    Range : 0 ... 78399 =      0.000 ...   391.995 secs
Rea

## 7. Band-pass Filtering

In [6]:
raw_filtered = raw_cropped.copy().filter(l_freq=0.1, h_freq=40.0, verbose=True)
raw_filtered_ica = raw_cropped.copy().filter(l_freq=1, h_freq = 40, verbose=True)

Filtering raw data in 5 contiguous segments
Setting up band-pass filter from 0.1 - 40 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 0.10
- Lower transition bandwidth: 0.10 Hz (-6 dB cutoff frequency: 0.05 Hz)
- Upper passband edge: 40.00 Hz
- Upper transition bandwidth: 10.00 Hz (-6 dB cutoff frequency: 45.00 Hz)
- Filter length: 6601 samples (33.005 s)

Filtering raw data in 5 contiguous segments
Setting up band-pass filter from 1 - 40 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 1.00
- Lower transition bandwidth: 1.00 Hz (-6 dB cutoff frequency: 0.50 Hz)
- Upper passband e

## 8. ICA Decleration & Fitting

In [7]:
n_components = 19  # Should normally be higher, like 0.999!!
method = 'picard'
max_iter = 500  # Should normally be higher, like 500 or even 1000!!

random_state = 1

ica = mne.preprocessing.ICA(n_components=n_components,
                            method=method,
                            max_iter=max_iter,
                            random_state=random_state)
ica.fit(raw_filtered_ica)

Fitting ICA to data using 19 channels (please be patient, this may take a while)
Selecting by number: 19 components
Fitting ICA took 13.0s.


0,1
Method,picard
Fit parameters,max_iter=500
Fit,69 iterations on raw data (320001 samples)
ICA components,19
Available PCA components,19
Channel types,eeg
ICA components marked for exclusion,—


## 9. Visualisation & Saving as PDF

In [None]:
ICA_summary_pdf(raw_filtered_ica, ica)
ica.plot_components(inst = raw_filtered_ica)


<MNEFigure size 975x967 with 19 Axes>

## 10. Excluding components & visualisation

In [18]:
ica.exclude = [0, 1, 2]
reconst_raw_filtered_ica = raw_filtered_ica.copy()
ica.apply(reconst_raw_filtered_ica)
reconst_raw_filtered_ica.plot()
ica.plot_overlay(raw_filtered_ica, exclude=[0,1,2,16])

Applying ICA to Raw instance
    Transforming to ICA space (19 components)
    Zeroing out 3 ICA components
    Projecting back using 19 PCA components
Applying ICA to Raw instance
    Transforming to ICA space (19 components)
    Zeroing out 4 ICA components
    Projecting back using 19 PCA components
Channels marked as bad:
none


<Figure size 1200x800 with 3 Axes>