# MNE Data Cleaning Pipeline with Metadata

This notebook provides tools for:
- Loading FIF files and their corresponding CSV metadata
- Concatenating multiple sessions from a subject
- Filtering and preprocessing

## 1. Imports

In [1]:
import h5py
import numpy as np
import mne
from pathlib import Path
from typing import Dict, List, Optional, Tuple
import matplotlib
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import os
from scipy import stats as scipy_stats
import glob
from matplotlib.backends.backend_pdf import PdfPages

# Set plotting style
sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (12, 8)

# For interactive plotting
matplotlib.use("Qt5Agg")

## 2. MetaData Loading Function

In [2]:
def load_session_metadata(csv_path):
    """
    Load metadata from a single CSV file.
    
    Parameters
    ----------
    csv_path : str
        Path to CSV metadata file
    
    Returns
    -------
    metadata : pd.DataFrame
        Trial metadata with columns: trial_number, set_size, match, correct, 
        response, response_time, probe_letter
    """
    metadata = pd.read_csv(csv_path)
    
    # Convert to appropriate types
    numeric_cols = ['trial_number', 'set_size', 'match', 'correct', 'response', 'response_time']
    for col in numeric_cols:
        if col in metadata.columns:
            metadata[col] = pd.to_numeric(metadata[col], errors='coerce')
    
    print(f"Loaded metadata: {len(metadata)} trials")
    print(f"  Columns: {list(metadata.columns)}")
    
    # Show summary
    if 'correct' in metadata.columns:
        acc = metadata['correct'].mean() * 100
        print(f"  Accuracy: {acc:.1f}%")
    if 'response_time' in metadata.columns:
        rt = metadata['response_time'].mean()
        print(f"  Mean RT: {rt:.3f}s")
    if 'set_size' in metadata.columns:
        sizes = sorted(metadata['set_size'].dropna().unique())
        print(f"  Set sizes: {sizes}")
    
    return metadata


def load_subject_metadata(subject_dir, subject_id=None, pattern='*metadata*.csv'):
    """
    Load and concatenate metadata from all sessions for a subject.
    
    Parameters
    ----------
    subject_dir : str
        Directory containing subject's session files
    subject_id : str, optional
        Subject identifier to add as a column
    pattern : str
        Glob pattern to match metadata CSV files (default: '*metadata*.csv')
    
    Returns
    -------
    metadata_all : pd.DataFrame
        Concatenated metadata from all sessions with added 'session' column
    """
    csv_files = sorted(glob.glob(os.path.join(subject_dir, pattern)))
    
    if len(csv_files) == 0:
        raise ValueError(f"No metadata CSV files found in {subject_dir}")
    
    print(f"Found {len(csv_files)} metadata files:")
    for f in csv_files:
        print(f"  {os.path.basename(f)}")
    
    print("\n" + "="*80)
    
    metadata_list = []
    
    for session_idx, csv_file in enumerate(csv_files, start=1):
        print(f"\nLoading session {session_idx}: {os.path.basename(csv_file)}")
        
        metadata = pd.read_csv(csv_file)
        
        # Add session identifier
        metadata['session'] = session_idx
        metadata['session_file'] = os.path.basename(csv_file)
        
        if subject_id is not None:
            metadata['subject'] = subject_id
        
        # Convert types
        numeric_cols = ['trial_number', 'set_size', 'match', 'correct', 'response', 'response_time']
        for col in numeric_cols:
            if col in metadata.columns:
                metadata[col] = pd.to_numeric(metadata[col], errors='coerce')
        
        print(f"  Trials: {len(metadata)}")
        if 'correct' in metadata.columns:
            acc = metadata['correct'].mean() * 100
            print(f"  Accuracy: {acc:.1f}%")
        
        metadata_list.append(metadata)
    
    # Concatenate all sessions
    metadata_all = pd.concat(metadata_list, ignore_index=True)
    
    print("\n" + "="*80)
    print(f"\nCombined metadata:")
    print(f"  Total trials: {len(metadata_all)}")
    print(f"  Sessions: {metadata_all['session'].nunique()}")
    
    if 'correct' in metadata_all.columns:
        overall_acc = metadata_all['correct'].mean() * 100
        print(f"  Overall accuracy: {overall_acc:.1f}%")
        
        # Per-session accuracy
        print(f"\n  Per-session accuracy:")
        for session in sorted(metadata_all['session'].unique()):
            session_data = metadata_all[metadata_all['session'] == session]
            acc = session_data['correct'].mean() * 100
            print(f"    Session {session}: {acc:.1f}%")
    
    if 'response_time' in metadata_all.columns:
        overall_rt = metadata_all['response_time'].mean()
        print(f"\n  Overall mean RT: {overall_rt:.3f}s")
    
    return metadata_all

## 3. Multi-Session Concatenation Functions

In [3]:
def load_and_concatenate_subject(
    subject_dir: str,
    use_common_channels: bool = True,
    preload: bool = True,
    verbose: bool = False
) -> Tuple[mne.io.Raw, pd.DataFrame]:
    """
    Load and concatenate all FIF files and metadata for a subject.
    
    Automatically finds all .fif and .csv files in the directory.
    Matches them by sorting alphabetically.
    
    Parameters
    ----------
    subject_dir : str
        Directory containing subject's session files
    use_common_channels : bool
        If True, only keep channels common to all files
    preload : bool
        Whether to load data into memory
    verbose : bool
        Verbose output
    
    Returns
    -------
    raw_concat : mne.io.Raw
        Concatenated Raw object from all sessions
    metadata_all : pd.DataFrame
        Concatenated metadata from all sessions with 'session' column
    
    Examples
    --------
    >>> raw, metadata = load_and_concatenate_subject('data/Subject_01/')
    """
    
    print("="*80)
    print("LOADING SUBJECT DATA")
    print("="*80)
    print(f"\nDirectory: {subject_dir}")
    
    # Get all files in directory
    all_files = os.listdir(subject_dir)
    
    # Find FIF files
    fif_files = sorted([f for f in all_files if f.endswith('.fif')])
    
    # Find CSV files (look for files with .csv extension)
    csv_files = sorted([f for f in all_files if f.endswith('.csv')])
    
    # Verify we found files
    if len(fif_files) == 0:
        raise ValueError(f"No .fif files found in {subject_dir}")
    
    if len(csv_files) == 0:
        raise ValueError(f"No .csv files found in {subject_dir}")
    
    print(f"\n[1/2] Loading FIF files...")
    print(f"Found {len(fif_files)} FIF files:")
    for f in fif_files:
        print(f"  {f}")
    
    # Load FIF files
    raw_list = []
    
    for fif_file in fif_files:
        full_path = os.path.join(subject_dir, fif_file)
        raw = mne.io.read_raw_fif(full_path, preload=False, verbose=verbose)
        raw_list.append(raw)
        
        if verbose:
            print(f"  Loaded: {fif_file} ({len(raw.ch_names)} channels)")
    
    # Find and use common channels
    if use_common_channels and len(raw_list) > 1:
        common_channels = set(raw_list[0].ch_names)
        for raw in raw_list[1:]:
            common_channels &= set(raw.ch_names)
        
        common_channels = sorted(list(common_channels))
        print(f"\nUsing {len(common_channels)} common channels")
        
        # Pick common channels from all files
        for i, raw in enumerate(raw_list):
            raw_list[i] = raw.copy().pick_channels(common_channels, ordered=True)
    
    # Concatenate FIF files
    print("\nConcatenating FIF files...")
    raw_concat = mne.concatenate_raws(raw_list, preload=preload, verbose=verbose)
    
    print(f"✓ Neural data concatenated:")
    print(f"  Duration: {raw_concat.times[-1]:.2f}s")
    print(f"  Channels: {len(raw_concat.ch_names)}")
    print(f"  Sampling rate: {raw_concat.info['sfreq']} Hz")
    
    # Load metadata
    print("\n" + "="*80)
    print("[2/2] Loading metadata...")
    print(f"Found {len(csv_files)} CSV files:")
    for f in csv_files:
        print(f"  {f}")
    
    metadata_list = []
    
    for session_idx, csv_file in enumerate(csv_files, start=1):
        full_path = os.path.join(subject_dir, csv_file)
        
        if verbose:
            print(f"\n  Loading session {session_idx}: {csv_file}")
        
        metadata = pd.read_csv(full_path)
        
        # Add session identifier
        metadata['session'] = session_idx
        metadata['session_file'] = csv_file
        
        # Convert to numeric types
        numeric_cols = ['trial_number', 'set_size', 'match', 'correct', 
                       'response', 'response_time']
        for col in numeric_cols:
            if col in metadata.columns:
                metadata[col] = pd.to_numeric(metadata[col], errors='coerce')
        
        if verbose:
            print(f"    Trials: {len(metadata)}")
            if 'correct' in metadata.columns:
                acc = metadata['correct'].mean() * 100
                print(f"    Accuracy: {acc:.1f}%")
        
        metadata_list.append(metadata)
    
    # Concatenate all metadata
    metadata_all = pd.concat(metadata_list, ignore_index=True)
    
    print(f"\n✓ Metadata concatenated:")
    print(f"  Total trials: {len(metadata_all)}")
    print(f"  Sessions: {metadata_all['session'].nunique()}")
    
    if 'correct' in metadata_all.columns:
        overall_acc = metadata_all['correct'].mean() * 100
        print(f"  Overall accuracy: {overall_acc:.1f}%")
    
    if 'response_time' in metadata_all.columns:
        overall_rt = metadata_all['response_time'].mean()
        print(f"  Overall mean RT: {overall_rt:.3f}s")
    
    # Verify alignment
    print("\n" + "="*80)
    print("VERIFICATION")
    print("="*80)
    
    n_sessions_fif = len(fif_files)
    n_sessions_meta = metadata_all['session'].nunique()
    
    if n_sessions_fif == n_sessions_meta:
        print(f"✓ Session count matches: {n_sessions_fif} sessions")
    else:
        print(f"⚠ WARNING: Session count mismatch!")
        print(f"  FIF files: {n_sessions_fif}")
        print(f"  CSV files: {n_sessions_meta}")
    
    print("\n✓ Loading complete!")
    
    return raw_concat, metadata_all


def load_and_concatenate_subject_paired(
    subject_dir: str,
    fif_prefix: Optional[str] = None,
    csv_suffix: Optional[str] = None,
    use_common_channels: bool = True,
    preload: bool = True,
    verbose: bool = False
) -> Tuple[mne.io.Raw, pd.DataFrame]:
    """
    Load and concatenate with smart FIF-CSV pairing.
    
    Pairs files based on shared naming (e.g., Session_01_raw.fif with Session_01_metadata.csv).
    
    Parameters
    ----------
    subject_dir : str
        Directory containing subject's session files
    fif_prefix : str, optional
        Only load FIF files starting with this prefix
    csv_suffix : str, optional
        Only load CSV files with this suffix (e.g., 'metadata')
    use_common_channels : bool
        If True, only keep channels common to all files
    preload : bool
        Whether to load data into memory
    verbose : bool
        Verbose output
    
    Returns
    -------
    raw_concat : mne.io.Raw
        Concatenated Raw object
    metadata_all : pd.DataFrame
        Concatenated metadata
    
    Examples
    --------
    >>> # Load only files with specific naming
    >>> raw, meta = load_and_concatenate_subject_paired(
    ...     'data/Subject_01/',
    ...     csv_suffix='metadata'
    ... )
    """
    
    print("="*80)
    print("LOADING SUBJECT DATA (PAIRED MODE)")
    print("="*80)
    print(f"\nDirectory: {subject_dir}")
    
    # Get all files
    all_files = os.listdir(subject_dir)
    
    # Find FIF files
    fif_files = [f for f in all_files if f.endswith('.fif')]
    if fif_prefix:
        fif_files = [f for f in fif_files if f.startswith(fif_prefix)]
    fif_files = sorted(fif_files)
    
    # Find CSV files
    csv_files = [f for f in all_files if f.endswith('.csv')]
    if csv_suffix:
        csv_files = [f for f in csv_files if csv_suffix in f.lower()]
    csv_files = sorted(csv_files)
    
    # Verify
    if len(fif_files) == 0:
        raise ValueError(f"No FIF files found in {subject_dir}")
    if len(csv_files) == 0:
        raise ValueError(f"No CSV files found in {subject_dir}")
    
    print(f"\n[1/2] Loading {len(fif_files)} FIF files...")
    for f in fif_files:
        print(f"  {f}")
    
    # Try to pair FIF with CSV files
    paired_files = []
    
    for fif_file in fif_files:
        # Extract base name (remove extension and common suffixes)
        base_name = fif_file.replace('.fif', '').replace('_raw', '').replace('_eeg', '')
        
        # Look for matching CSV
        matching_csv = None
        for csv_file in csv_files:
            csv_base = csv_file.replace('.csv', '').replace('_metadata', '')
            if base_name in csv_base or csv_base in base_name:
                matching_csv = csv_file
                break
        
        if matching_csv:
            paired_files.append((fif_file, matching_csv))
            if verbose:
                print(f"  Paired: {fif_file} ↔ {matching_csv}")
        else:
            # No match found, still use this FIF but warn
            paired_files.append((fif_file, None))
            print(f"  ⚠ No matching CSV for: {fif_file}")
    
    # Load FIF files
    raw_list = []
    for fif_file, _ in paired_files:
        full_path = os.path.join(subject_dir, fif_file)
        raw = mne.io.read_raw_fif(full_path, preload=False, verbose=verbose)
        raw_list.append(raw)
    
    # Common channels
    if use_common_channels and len(raw_list) > 1:
        common_channels = set(raw_list[0].ch_names)
        for raw in raw_list[1:]:
            common_channels &= set(raw.ch_names)
        common_channels = sorted(list(common_channels))
        print(f"\nUsing {len(common_channels)} common channels")
        
        for i, raw in enumerate(raw_list):
            raw_list[i] = raw.copy().pick_channels(common_channels, ordered=True)
    
    # Concatenate
    raw_concat = mne.concatenate_raws(raw_list, preload=preload, verbose=verbose)
    print(f"\n✓ Neural data: {raw_concat.times[-1]:.2f}s, {len(raw_concat.ch_names)} channels")
    
    # Load metadata
    print("\n[2/2] Loading metadata...")
    metadata_list = []
    
    for session_idx, (fif_file, csv_file) in enumerate(paired_files, start=1):
        if csv_file is None:
            print(f"  ⚠ Skipping session {session_idx} (no CSV)")
            continue
        
        full_path = os.path.join(subject_dir, csv_file)
        metadata = pd.read_csv(full_path)
        metadata['session'] = session_idx
        metadata['session_file'] = csv_file
        metadata['fif_file'] = fif_file
        
        # Convert types
        numeric_cols = ['trial_number', 'set_size', 'match', 'correct', 
                       'response', 'response_time']
        for col in numeric_cols:
            if col in metadata.columns:
                metadata[col] = pd.to_numeric(metadata[col], errors='coerce')
        
        metadata_list.append(metadata)
    
    if len(metadata_list) == 0:
        raise ValueError("No metadata files could be loaded")
    
    metadata_all = pd.concat(metadata_list, ignore_index=True)
    
    print(f"\n✓ Metadata: {len(metadata_all)} trials from {len(metadata_list)} sessions")
    
    return raw_concat, metadata_all


## 5. ICA summary pdf output

In [14]:
def ICA_summary_pdf(data, ica, pdf_filename):
    # Calculate explained variance properly
    # Method 1: Use the correct approach
    try:
        sources = ica.get_sources(data)
        # Calculate variance for each component
        explained_var_array = np.var(sources.get_data(), axis=1)
        # Normalize to get proportion
        explained_var_array = explained_var_array / np.sum(explained_var_array)
    except:
        # Fallback
        explained_var_array = np.ones(ica.n_components_) / ica.n_components_

    print(f"Explained variance shape: {explained_var_array.shape}")
    print(f"Number of components: {ica.n_components_}")
    print(f"Explained variance values:\n{explained_var_array}")
    print(f"Total variance: {np.sum(explained_var_array)*100:.2f}%")

    # Create PDF with all component properties

    with PdfPages(pdf_filename) as pdf:
        n_components = ica.n_components_
        
        for comp_idx in range(n_components):
            # Create figure with subplots for each component
            fig = plt.figure(figsize=(11.69, 8.27))  # A4 landscape
            
            # Add title with component number and explained variance
            var_text = f'{explained_var_array[comp_idx]*100:.2f}%'
            fig.suptitle(f'ICA{comp_idx:03d} - Explained Variance: {var_text}', 
                        fontsize=16, fontweight='bold')
            
            # 1. Topography
            ax1 = plt.subplot(2, 3, 1)
            ica.plot_components(picks=comp_idx, axes=ax1, show=False, colorbar=True)
            
            # Get source data for this component
            sources = ica.get_sources(raw)
            sfreq = raw.info['sfreq']
            
            # 2. Time course (using first 10 seconds of data)
            ax2 = plt.subplot(2, 3, 2)
            n_samples = min(int(10 * sfreq), sources.n_times)
            times = sources.times[:n_samples]
            data = sources.get_data(picks=comp_idx)[:, :n_samples]
            ax2.plot(times, data.T, 'k', linewidth=0.5)
            ax2.set_xlabel('Time (s)')
            ax2.set_ylabel('AU')
            ax2.set_title('Time Course (first 10s)')
            ax2.grid(True, alpha=0.3)
            
            # 3. Power Spectrum (fixed)
            ax3 = plt.subplot(2, 3, 3)
            from mne.time_frequency import psd_array_welch
            
            # Use all available data for PSD
            data_full = sources.get_data(picks=comp_idx)
            
            # Adjust n_fft based on available data
            n_fft = min(2048, data_full.shape[1])
            n_per_seg = min(n_fft, data_full.shape[1])
            
            psds, freqs = psd_array_welch(
                data_full, 
                sfreq=sfreq, 
                fmin=0.5, 
                fmax=50, 
                n_fft=n_fft,
                n_per_seg=n_per_seg
            )
            
            ax3.semilogy(freqs, psds.T, 'k', linewidth=1)
            ax3.set_xlabel('Frequency (Hz)')
            ax3.set_ylabel('Power Spectral Density (µV²/Hz)')
            ax3.set_title('Power Spectrum')
            ax3.grid(True, alpha=0.3)
            ax3.set_xlim([0, 50])
            
            # 4. Component properties (alternative visualization)
            ax4 = plt.subplot(2, 3, 4)
            # Plot first 2 seconds in more detail
            n_samples_detail = min(int(2 * sfreq), sources.n_times)
            times_detail = sources.times[:n_samples_detail]
            data_detail = sources.get_data(picks=comp_idx)[:, :n_samples_detail]
            ax4.plot(times_detail, data_detail.T, 'b', linewidth=0.8)
            ax4.set_xlabel('Time (s)')
            ax4.set_ylabel('AU')
            ax4.set_title('Time Course Detail (first 2s)')
            ax4.grid(True, alpha=0.3)
            
            # 5. Variance bar (showing this component in context)
            ax5 = plt.subplot(2, 3, 5)
            colors = ['red' if i == comp_idx else 'gray' for i in range(n_components)]
            ax5.bar(range(n_components), explained_var_array * 100, color=colors, alpha=0.6)
            ax5.set_xlabel('Component')
            ax5.set_ylabel('Explained Variance (%)')
            ax5.set_title('Variance Explained by All Components')
            ax5.axhline(y=5, color='r', linestyle='--', alpha=0.5, label='5% threshold')
            ax5.legend()
            ax5.grid(True, alpha=0.3, axis='y')
            
            # 6. Properties statistics
            ax6 = plt.subplot(2, 3, 6)
            ax6.axis('off')
            
            # Calculate some statistics
            data_stats = sources.get_data(picks=comp_idx).flatten()
            stats_text = f"""Component Statistics:
            
    Mean: {np.mean(data_stats):.3f}
    Std: {np.std(data_stats):.3f}
    Min: {np.min(data_stats):.3f}
    Max: {np.max(data_stats):.3f}
    Kurtosis: {np.mean((data_stats - np.mean(data_stats))**4) / (np.std(data_stats)**4):.3f}

    Explained Variance: {explained_var_array[comp_idx]*100:.2f}%
    Rank by Variance: #{np.where(np.argsort(explained_var_array)[::-1] == comp_idx)[0][0] + 1}
            """
            
            ax6.text(0.1, 0.9, stats_text, 
                    transform=ax6.transAxes,
                    fontsize=10,
                    verticalalignment='top',
                    fontfamily='monospace',
                    bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))
            
            plt.tight_layout()
            pdf.savefig(fig, bbox_inches='tight')
            plt.close(fig)
            
            print(f"Processed component {comp_idx+1}/{n_components}")
        
        # Add summary page at the end
        fig_summary = plt.figure(figsize=(11.69, 8.27))
        fig_summary.suptitle('ICA Components Summary', fontsize=16, fontweight='bold')
        
        # Variance explained table
        ax_table = plt.subplot(1, 2, 1)
        ax_table.axis('tight')
        ax_table.axis('off')
        
        # Sort by variance for the table
        sorted_indices = np.argsort(explained_var_array)[::-1]
        table_data = [[f'ICA{i:03d}', f'{explained_var_array[i]*100:.2f}%', f'#{rank+1}'] 
                    for rank, i in enumerate(sorted_indices)]
        
        table = ax_table.table(cellText=table_data, 
                            colLabels=['Component', 'Variance (%)', 'Rank'],
                            cellLoc='center',
                            loc='center')
        table.auto_set_font_size(False)
        table.set_fontsize(8)
        table.scale(1, 1.5)
        ax_table.set_title('Components Ranked by Variance', fontsize=12, pad=20)
        
        # Overall variance plot
        ax_var = plt.subplot(1, 2, 2)
        ax_var.bar(range(n_components), explained_var_array * 100, color='steelblue', alpha=0.7)
        ax_var.set_xlabel('Component Index')
        ax_var.set_ylabel('Explained Variance (%)')
        ax_var.set_title('Variance Explained by Each Component')
        ax_var.axhline(y=5, color='r', linestyle='--', alpha=0.5, label='5% threshold')
        ax_var.legend()
        ax_var.grid(True, alpha=0.3)
        
        # Add text summary
        total_var = np.sum(explained_var_array) * 100
        ax_var.text(0.02, 0.98, f'Total variance: {total_var:.2f}%',
                    transform=ax_var.transAxes, 
                    verticalalignment='top',
                    bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))
        
        plt.tight_layout()
        pdf.savefig(fig_summary, bbox_inches='tight')
        plt.close(fig_summary)

    print(f'\nPDF report saved to: {pdf_filename}')

    # Print summary to console
    print('\nExplained Variance Summary (sorted):')
    print('-' * 50)
    sorted_indices = np.argsort(explained_var_array)[::-1]
    for rank, i in enumerate(sorted_indices):
        print(f'#{rank+1:2d} - ICA{i:03d}: {explained_var_array[i]*100:6.2f}%')
    print('-' * 50)
    print(f'Total: {np.sum(explained_var_array)*100:6.2f}%')

## 6. Exclusion Report


In [41]:
def generate_ica_rejection_report(ica, raw, exclude_components, output_filename='ica_rejection_report.pdf'):
    """
    Generate a comprehensive PDF report for ICA component rejection.
    
    Parameters
    ----------
    ica : mne.preprocessing.ICA
        Fitted ICA object
    raw : mne.io.Raw
        Raw data (should be the same data or filtered version used for ICA)
    exclude_components : list
        List of component indices to exclude (e.g., [0, 1, 2])
    output_filename : str
        Path for output PDF file
    
    Returns
    -------
    dict
        Summary statistics dictionary
    """
    
    # Add .pdf extension if not present
    if not output_filename.endswith('.pdf'):
        output_filename += '.pdf'
    
    # Calculate variance explained by each component
    sources = ica.get_sources(raw)
    explained_var = np.var(sources.get_data(), axis=1)
    explained_var_ratio = explained_var / np.sum(explained_var)
    
    # Calculate statistics for excluded components
    total_var_removed = np.sum(explained_var_ratio[exclude_components]) * 100
    n_total_components = ica.n_components_
    n_excluded = len(exclude_components)
    n_kept = n_total_components - n_excluded
    
    # Calculate kurtosis for all components
    kurtosis_values = []
    for i in range(n_total_components):
        comp_data = sources.get_data(picks=i).flatten()
        kurt = np.mean((comp_data - np.mean(comp_data))**4) / (np.std(comp_data)**4)
        kurtosis_values.append(kurt)
    
    # Create summary dictionary
    summary = {
        'total_components': n_total_components,
        'excluded_components': exclude_components,
        'kept_components': [i for i in range(n_total_components) if i not in exclude_components],
        'n_excluded': n_excluded,
        'n_kept': n_kept,
        'total_variance_removed': total_var_removed,
        'variance_per_excluded': {f'ICA{i:03d}': explained_var_ratio[i]*100 for i in exclude_components},
        'kurtosis_per_excluded': {f'ICA{i:03d}': kurtosis_values[i] for i in exclude_components}
    }
    
    # Create PDF report
    with PdfPages(output_filename) as pdf:
        # Page 1: Summary Overview
        fig = plt.figure(figsize=(11, 8.5))
        fig.suptitle('ICA Component Rejection Report', fontsize=18, fontweight='bold')
        
        ax = fig.add_subplot(111)
        ax.axis('off')
        
        summary_text = f"""
REJECTION SUMMARY
{'='*80}

Total Components: {n_total_components}
Excluded: {n_excluded} components
Kept: {n_kept} components

EXCLUDED COMPONENTS: {', '.join([f'ICA{i:03d}' for i in exclude_components])}

VARIANCE ANALYSIS
{'-'*80}
Total variance removed: {total_var_removed:.2f}%

Variance per excluded component:
"""
        for comp_idx in exclude_components:
            var_pct = explained_var_ratio[comp_idx] * 100
            kurt = kurtosis_values[comp_idx]
            rank = np.where(np.argsort(explained_var_ratio)[::-1] == comp_idx)[0][0] + 1
            summary_text += f"  ICA{comp_idx:03d}: {var_pct:6.2f}% (Rank #{rank:2d}, Kurtosis: {kurt:7.2f})\n"
        
        summary_text += f"""
ARTIFACT SIGNATURES
{'-'*80}
"""
        for comp_idx in exclude_components:
            kurt = kurtosis_values[comp_idx]
            if kurt > 50:
                artifact_type = "Strong artifact (very high kurtosis)"
            elif kurt > 30:
                artifact_type = "Likely artifact (high kurtosis)"
            elif kurt > 10:
                artifact_type = "Possible artifact (moderate kurtosis)"
            else:
                artifact_type = "Review needed (low kurtosis for rejection)"
            summary_text += f"  ICA{comp_idx:03d}: {artifact_type}\n"
        
        ax.text(0.05, 0.95, summary_text, transform=ax.transAxes,
                fontsize=11, verticalalignment='top', fontfamily='monospace',
                bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.3))
        
        plt.tight_layout()
        pdf.savefig(fig)
        plt.close(fig)
        
        # Page 2: Before/After EEG Comparison
        fig = plt.figure(figsize=(11, 8.5))
        fig.suptitle('EEG Data: Before and After ICA Artifact Rejection', fontsize=14, fontweight='bold')
        
        # Prepare data
        raw_display = raw.copy().filter(l_freq=None, h_freq=40)
        raw_clean = raw.copy().filter(l_freq=None, h_freq=40)
        ica.apply(raw_clean, exclude=exclude_components)
        
        # Select time window (first 10 seconds or less)
        duration = min(10, raw_display.times[-1])
        n_samples = int(duration * raw_display.info['sfreq'])
        times = raw_display.times[:n_samples]
        
        # Get channel names (limit to first 10 channels for clarity)
        n_channels_display = min(10, len(raw_display.ch_names))
        picks = range(n_channels_display)
        ch_names = [raw_display.ch_names[i] for i in picks]
        
        # Get data
        data_before = raw_display.get_data(picks=picks, start=0, stop=n_samples)
        data_after = raw_clean.get_data(picks=picks, start=0, stop=n_samples)
        
        # Plot before
        ax1 = plt.subplot(2, 1, 1)
        offset = np.arange(n_channels_display) * np.max(np.abs(data_before)) * 2
        for i, ch_data in enumerate(data_before):
            ax1.plot(times, ch_data + offset[i], 'k', linewidth=0.5, alpha=0.7)
        ax1.set_yticks(offset)
        ax1.set_yticklabels(ch_names)
        ax1.set_xlabel('Time (s)')
        ax1.set_title('Before ICA Rejection', fontsize=12, fontweight='bold')
        ax1.grid(True, alpha=0.3)
        ax1.set_xlim([0, duration])
        
        # Plot after
        ax2 = plt.subplot(2, 1, 2)
        offset = np.arange(n_channels_display) * np.max(np.abs(data_after)) * 2
        for i, ch_data in enumerate(data_after):
            ax2.plot(times, ch_data + offset[i], 'b', linewidth=0.5, alpha=0.7)
        ax2.set_yticks(offset)
        ax2.set_yticklabels(ch_names)
        ax2.set_xlabel('Time (s)')
        ax2.set_title('After ICA Rejection', fontsize=12, fontweight='bold', color='blue')
        ax2.grid(True, alpha=0.3)
        ax2.set_xlim([0, duration])
        
        plt.tight_layout()
        pdf.savefig(fig)
        plt.close(fig)
        
        # Page 3: Combined overlay plot for all excluded components
        fig = ica.plot_overlay(raw_display, exclude=exclude_components, picks='eeg', show=False)
        excluded_list = ', '.join([f'ICA{i:03d}' for i in exclude_components])
        fig.suptitle(f'Overlay: Effect of Removing All Excluded Components\n({excluded_list})', 
                    fontsize=14, fontweight='bold')
        pdf.savefig(fig)
        plt.close(fig)
        
        # Page 4: Overall RMS comparison
        fig = plt.figure(figsize=(11, 8.5))
        fig.suptitle('Signal Quality Metrics: Before vs After ICA', fontsize=14, fontweight='bold')
        
        # Calculate RMS for all channels
        rms_before = np.sqrt(np.mean(raw_display.get_data()**2, axis=1))
        rms_after = np.sqrt(np.mean(raw_clean.get_data()**2, axis=1))
        
        # RMS comparison
        ax1 = plt.subplot(2, 2, 1)
        x = np.arange(len(rms_before))
        width = 0.35
        ax1.bar(x - width/2, rms_before * 1e6, width, label='Before', alpha=0.7, color='red')
        ax1.bar(x + width/2, rms_after * 1e6, width, label='After', alpha=0.7, color='blue')
        ax1.set_xlabel('Channel Index')
        ax1.set_ylabel('RMS (µV)')
        ax1.set_title('RMS Amplitude by Channel')
        ax1.legend()
        ax1.grid(True, alpha=0.3, axis='y')
        
        # RMS reduction percentage
        ax2 = plt.subplot(2, 2, 2)
        rms_reduction = ((rms_before - rms_after) / rms_before) * 100
        colors = ['green' if r > 0 else 'red' for r in rms_reduction]
        ax2.bar(x, rms_reduction, color=colors, alpha=0.7)
        ax2.set_xlabel('Channel Index')
        ax2.set_ylabel('RMS Reduction (%)')
        ax2.set_title('Artifact Reduction by Channel')
        ax2.axhline(y=0, color='black', linestyle='-', linewidth=0.5)
        ax2.grid(True, alpha=0.3, axis='y')
        
        # Overall variance before/after
        ax3 = plt.subplot(2, 2, 3)
        var_before = np.var(raw_display.get_data())
        var_after = np.var(raw_clean.get_data())
        ax3.bar(['Before ICA', 'After ICA'], [var_before * 1e12, var_after * 1e12], 
               color=['red', 'blue'], alpha=0.7)
        ax3.set_ylabel('Variance (µV²)')
        ax3.set_title('Overall Signal Variance')
        ax3.grid(True, alpha=0.3, axis='y')
        
        # Statistics table
        ax4 = plt.subplot(2, 2, 4)
        ax4.axis('off')
        
        stats_text = f"""SIGNAL QUALITY METRICS

Before ICA Rejection:
  Mean RMS: {np.mean(rms_before)*1e6:.2f} µV
  Std RMS:  {np.std(rms_before)*1e6:.2f} µV
  Variance: {var_before*1e12:.2f} µV²

After ICA Rejection:
  Mean RMS: {np.mean(rms_after)*1e6:.2f} µV
  Std RMS:  {np.std(rms_after)*1e6:.2f} µV
  Variance: {var_after*1e12:.2f} µV²

Overall Reduction:
  RMS reduction: {((np.mean(rms_before) - np.mean(rms_after))/np.mean(rms_before)*100):.2f}%
  Variance reduction: {((var_before - var_after)/var_before*100):.2f}%
"""
        
        ax4.text(0.1, 0.9, stats_text, transform=ax4.transAxes,
                fontsize=10, verticalalignment='top', fontfamily='monospace',
                bbox=dict(boxstyle='round', facecolor='lightblue', alpha=0.3))
        
        plt.tight_layout()
        pdf.savefig(fig)
        plt.close(fig)
        
        # Page 5: Variance Distribution
        fig = plt.figure(figsize=(11, 8.5))
        fig.suptitle('Variance Distribution Analysis', fontsize=14, fontweight='bold')
        
        # Variance bar plot
        ax1 = plt.subplot(2, 2, 1)
        colors = ['red' if i in exclude_components else 'steelblue' 
                 for i in range(n_total_components)]
        ax1.bar(range(n_total_components), explained_var_ratio * 100, 
               color=colors, alpha=0.7)
        ax1.set_xlabel('Component Index')
        ax1.set_ylabel('Explained Variance (%)')
        ax1.set_title('Variance by Component')
        ax1.axhline(y=5, color='orange', linestyle='--', alpha=0.5, label='5% threshold')
        ax1.legend()
        ax1.grid(True, alpha=0.3, axis='y')
        
        # Cumulative variance
        ax2 = plt.subplot(2, 2, 2)
        cumvar = np.cumsum(explained_var_ratio) * 100
        ax2.plot(range(n_total_components), cumvar, 'b-', linewidth=2)
        for idx in exclude_components:
            ax2.axvline(x=idx, color='red', linestyle='--', alpha=0.5)
        ax2.set_xlabel('Component Index')
        ax2.set_ylabel('Cumulative Variance (%)')
        ax2.set_title('Cumulative Variance Explained')
        ax2.grid(True, alpha=0.3)
        
        # Kurtosis distribution
        ax3 = plt.subplot(2, 2, 3)
        colors_kurt = ['red' if i in exclude_components else 'steelblue' 
                      for i in range(n_total_components)]
        ax3.bar(range(n_total_components), kurtosis_values, 
               color=colors_kurt, alpha=0.7)
        ax3.set_xlabel('Component Index')
        ax3.set_ylabel('Kurtosis')
        ax3.set_title('Kurtosis by Component')
        ax3.axhline(y=30, color='orange', linestyle='--', alpha=0.5, 
                   label='High kurtosis threshold')
        ax3.legend()
        ax3.grid(True, alpha=0.3, axis='y')
        ax3.set_yscale('log')
        
        # Pie chart of variance
        ax4 = plt.subplot(2, 2, 4)
        kept_var = np.sum(explained_var_ratio[[i for i in range(n_total_components) 
                                               if i not in exclude_components]]) * 100
        removed_var = total_var_removed
        
        ax4.pie([kept_var, removed_var], 
               labels=['Kept Components', 'Removed Artifacts'],
               colors=['steelblue', 'red'],
               autopct='%1.1f%%',
               startangle=90)
        ax4.set_title('Variance Distribution')
        
        plt.tight_layout()
        pdf.savefig(fig)
        plt.close(fig)
        
        # Page 6+: Individual component details for excluded components
        for comp_idx in exclude_components:
            fig = plt.figure(figsize=(11, 8.5))
            fig.suptitle(f'ICA{comp_idx:03d} - REJECTED COMPONENT', 
                        fontsize=14, fontweight='bold', color='red')
            
            # Topography
            ax1 = plt.subplot(2, 3, 1)
            ica.plot_components(picks=comp_idx, axes=ax1, show=False, colorbar=True)
            
            # Time course
            ax2 = plt.subplot(2, 3, 2)
            comp_data = sources.get_data(picks=comp_idx)
            times = sources.times[:min(int(10 * raw.info['sfreq']), len(sources.times))]
            data = comp_data[:, :len(times)]
            ax2.plot(times, data.T, 'k', linewidth=0.5)
            ax2.set_xlabel('Time (s)')
            ax2.set_ylabel('AU')
            ax2.set_title('Time Course (first 10s)')
            ax2.grid(True, alpha=0.3)
            
            # Power spectrum
            ax3 = plt.subplot(2, 3, 3)
            from mne.time_frequency import psd_array_welch
            n_fft = min(2048, comp_data.shape[1])
            psds, freqs = psd_array_welch(comp_data, sfreq=raw.info['sfreq'],
                                         fmin=0.5, fmax=50, n_fft=n_fft,
                                         n_per_seg=n_fft)
            ax3.semilogy(freqs, psds.T, 'k', linewidth=1)
            ax3.set_xlabel('Frequency (Hz)')
            ax3.set_ylabel('PSD (µV²/Hz)')
            ax3.set_title('Power Spectrum')
            ax3.grid(True, alpha=0.3)
            ax3.set_xlim([0, 50])
            
            # Time course detail (first 2s)
            ax4 = plt.subplot(2, 3, 4)
            times_detail = sources.times[:min(int(2 * raw.info['sfreq']), len(sources.times))]
            data_detail = comp_data[:, :len(times_detail)]
            ax4.plot(times_detail, data_detail.T, 'b', linewidth=0.8)
            ax4.set_xlabel('Time (s)')
            ax4.set_ylabel('AU')
            ax4.set_title('Time Course Detail (first 2s)')
            ax4.grid(True, alpha=0.3)
            
            # Statistics box
            ax5 = plt.subplot(2, 3, 5)
            ax5.axis('off')
            
            var_pct = explained_var_ratio[comp_idx] * 100
            kurt = kurtosis_values[comp_idx]
            rank = np.where(np.argsort(explained_var_ratio)[::-1] == comp_idx)[0][0] + 1
            comp_stats = sources.get_data(picks=comp_idx).flatten()
            
            stats_text = f"""Component Statistics:

Mean: {np.mean(comp_stats):.3f}
Std: {np.std(comp_stats):.3f}
Min: {np.min(comp_stats):.3f}
Max: {np.max(comp_stats):.3f}
Kurtosis: {kurt:.3f}

Explained Variance: {var_pct:.2f}%
Rank by Variance: #{rank}

REJECTION RATIONALE:
"""
            if kurt > 100:
                stats_text += "• EXTREME artifact signature\n"
                stats_text += "• Likely eye blinks/movements"
            elif kurt > 50:
                stats_text += "• Very strong artifact\n"
                stats_text += "• Eye or muscle artifact"
            elif kurt > 30:
                stats_text += "• Strong artifact signature\n"
                stats_text += "• Probable artifact"
            else:
                stats_text += "• Moderate signature\n"
                stats_text += "• Review recommended"
            
            ax5.text(0.1, 0.9, stats_text, transform=ax5.transAxes,
                    fontsize=9, verticalalignment='top', fontfamily='monospace',
                    bbox=dict(boxstyle='round', facecolor='salmon', alpha=0.3))
            
            # Component in context
            ax6 = plt.subplot(2, 3, 6)
            colors_context = ['red' if i == comp_idx else 'gray' 
                            for i in range(n_total_components)]
            ax6.bar(range(n_total_components), explained_var_ratio * 100, 
                   color=colors_context, alpha=0.6)
            ax6.set_xlabel('Component')
            ax6.set_ylabel('Explained Variance (%)')
            ax6.set_title('This Component in Context')
            ax6.axhline(y=5, color='r', linestyle='--', alpha=0.5)
            ax6.grid(True, alpha=0.3, axis='y')
            
            plt.tight_layout()
            pdf.savefig(fig)
            plt.close(fig)
    
    print(f"\n{'='*80}")
    print(f"ICA REJECTION REPORT GENERATED")
    print(f"{'='*80}")
    print(f"Report saved to: {output_filename}")
    print(f"\nSummary:")
    print(f"  Total components: {n_total_components}")
    print(f"  Rejected: {n_excluded}")
    print(f"  Kept: {n_kept}")
    print(f"  Variance removed: {total_var_removed:.2f}%")
    print(f"\nRejected components:")
    for comp_idx in exclude_components:
        var_pct = explained_var_ratio[comp_idx] * 100
        kurt = kurtosis_values[comp_idx]
        print(f"  ICA{comp_idx:03d}: {var_pct:6.2f}% variance, kurtosis={kurt:.2f}")
    print(f"{'='*80}\n")
    
    return summary

## 7. Usage Example: Load Subject Data

In [15]:
if __name__ == '__main__':
    # Example 1: Simple loading (all .fif and .csv files)
    subject_dir = 'Data_converted_MetaData\Subject_08'
    
    try:
        raw, metadata = load_and_concatenate_subject(
            subject_dir=subject_dir,
            use_common_channels=True,
            preload=True,
            verbose=True
        )
        
        print("\n" + "="*80)
        print("SUCCESS!")
        print("="*80)
        print(f"Loaded {len(raw.ch_names)} channels, {raw.times[-1]:.1f}s")
        print(f"Loaded {len(metadata)} trials")
        
    except FileNotFoundError as e:
        print(f"\nDirectory not found: {e}")
        print("Please update subject_dir to point to your data")
    except ValueError as e:
        print(f"\nError: {e}")
    
    print("\n" + "="*80)
raw_cropped = raw.copy().crop(tmax = 1600)

LOADING SUBJECT DATA

Directory: Data_converted_MetaData\Subject_08

[1/2] Loading FIF files...
Found 5 FIF files:
  Data_Subject_08_Session_01.h5_seeg_raw.fif
  Data_Subject_08_Session_02.h5_seeg_raw.fif
  Data_Subject_08_Session_03.h5_seeg_raw.fif
  Data_Subject_08_Session_04.h5_seeg_raw.fif
  Data_Subject_08_Session_05.h5_seeg_raw.fif
Opening raw data file Data_converted_MetaData\Subject_08\Data_Subject_08_Session_01.h5_seeg_raw.fif...
Isotrak not found
    Range : 0 ... 79999 =      0.000 ...   399.995 secs
Ready.
  Loaded: Data_Subject_08_Session_01.h5_seeg_raw.fif (20 channels)
Opening raw data file Data_converted_MetaData\Subject_08\Data_Subject_08_Session_02.h5_seeg_raw.fif...
Isotrak not found
    Range : 0 ... 79999 =      0.000 ...   399.995 secs
Ready.
  Loaded: Data_Subject_08_Session_02.h5_seeg_raw.fif (20 channels)
Opening raw data file Data_converted_MetaData\Subject_08\Data_Subject_08_Session_03.h5_seeg_raw.fif...
Isotrak not found
    Range : 0 ... 79999 =      0.000 

  subject_dir = 'Data_converted_MetaData\Subject_08'



Concatenating FIF files...
✓ Neural data concatenated:
  Duration: 1991.99s
  Channels: 20
  Sampling rate: 200.0 Hz

[2/2] Loading metadata...
Found 5 CSV files:
  Data_Subject_08_Session_01.h5_seeg_raw.csv
  Data_Subject_08_Session_02.h5_seeg_raw.csv
  Data_Subject_08_Session_03.h5_seeg_raw.csv
  Data_Subject_08_Session_04.h5_seeg_raw.csv
  Data_Subject_08_Session_05.h5_seeg_raw.csv

  Loading session 1: Data_Subject_08_Session_01.h5_seeg_raw.csv
    Trials: 50
    Accuracy: 88.0%

  Loading session 2: Data_Subject_08_Session_02.h5_seeg_raw.csv
    Trials: 50
    Accuracy: 94.0%

  Loading session 3: Data_Subject_08_Session_03.h5_seeg_raw.csv
    Trials: 50
    Accuracy: 86.0%

  Loading session 4: Data_Subject_08_Session_04.h5_seeg_raw.csv
    Trials: 50
    Accuracy: 88.0%

  Loading session 5: Data_Subject_08_Session_05.h5_seeg_raw.csv
    Trials: 49
    Accuracy: 93.9%

✓ Metadata concatenated:
  Total trials: 249
  Sessions: 5
  Overall accuracy: 90.0%
  Overall mean RT: 1.538s

## 8. Band-pass Filtering

In [22]:
raw_filtered_highpass = raw_cropped.copy().filter(l_freq=1, h_freq = None, verbose=True)
raw_filtered_both = raw_cropped.copy().filter(l_freq=1, h_freq = None, verbose=True)

Filtering raw data in 5 contiguous segments
Setting up high-pass filter at 1 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal highpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 1.00
- Lower transition bandwidth: 1.00 Hz (-6 dB cutoff frequency: 0.50 Hz)
- Filter length: 661 samples (3.305 s)

Filtering raw data in 5 contiguous segments
Setting up high-pass filter at 1 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal highpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 1.00
- Lower transition bandwidth: 1.00 Hz (-6 dB cutoff frequency: 0.50 Hz)
- Filter length: 661 samples (3.305 s)



## 9. ICA Decleration & Fitting

In [38]:
n_components = 19  # Should normally be higher, like 0.999!!
method = 'picard'
max_iter = 500  # Should normally be higher, like 500 or even 1000!!

random_state = 1

ica_highpass = mne.preprocessing.ICA(n_components=n_components,
                            method=method,
                            max_iter=max_iter,
                            random_state=random_state)
ica_highpass.fit(raw_filtered_highpass)

Fitting ICA to data using 19 channels (please be patient, this may take a while)
Selecting by number: 19 components
Fitting ICA took 8.7s.


0,1
Method,picard
Fit parameters,max_iter=500
Fit,52 iterations on raw data (320001 samples)
ICA components,19
Available PCA components,19
Channel types,eeg
ICA components marked for exclusion,—


In [39]:
n_components = 19  # Should normally be higher, like 0.999!!
method = 'picard'
max_iter = 500  # Should normally be higher, like 500 or even 1000!!

random_state = 1

ica_both = mne.preprocessing.ICA(n_components=n_components,
                            method=method,
                            max_iter=max_iter,
                            random_state=random_state)
ica_both.fit(raw_filtered_both)

Fitting ICA to data using 19 channels (please be patient, this may take a while)
Selecting by number: 19 components
Fitting ICA took 13.1s.


0,1
Method,picard
Fit parameters,max_iter=500
Fit,52 iterations on raw data (320001 samples)
ICA components,19
Available PCA components,19
Channel types,eeg
ICA components marked for exclusion,—


## 10. Visualisation & Saving as PDF

In [40]:
ICA_summary_pdf(raw_filtered_highpass, ica_highpass, "ICA_report_highpass.pdf")
ICA_summary_pdf(raw_filtered_both, ica_both, "ICA_report_both.pdf")


Explained variance shape: (19,)
Number of components: 19
Explained variance values:
[0.04947421 0.02753686 0.09322267 0.00229592 0.03495261 0.0461842
 0.04977761 0.00961155 0.06364997 0.04617864 0.0675893  0.08297366
 0.04590059 0.0433387  0.07543416 0.05668208 0.08496475 0.05459133
 0.06564119]
Total variance: 100.00%
Effective window size : 10.240 (s)
Processed component 1/19
Effective window size : 10.240 (s)
Processed component 2/19
Effective window size : 10.240 (s)
Processed component 3/19
Effective window size : 10.240 (s)
Processed component 4/19
Effective window size : 10.240 (s)
Processed component 5/19
Effective window size : 10.240 (s)
Processed component 6/19
Effective window size : 10.240 (s)
Processed component 7/19
Effective window size : 10.240 (s)
Processed component 8/19
Effective window size : 10.240 (s)
Processed component 9/19
Effective window size : 10.240 (s)
Processed component 10/19
Effective window size : 10.240 (s)
Processed component 11/19
Effective window 

## 11. Excluding components & visualisation

In [None]:
generate_ica_rejection_report(ica_highpass, raw = raw_filtered_highpass, exclude_components=[2], output_filename="Rejection_report_highpass_cons")
generate_ica_rejection_report(ica_highpass, raw = raw_filtered_highpass, exclude_components=[2,9,11], output_filename="Rejection_report_highpass_free")
generate_ica_rejection_report(ica_both, raw = raw_filtered_both, exclude_components=[2,9,11], output_filename="Rejection_report_both")

## 12. Saving the cleaned data

In [54]:
ica_highpass.exclude = [2]
reconst_raw_filtered_highpass = raw_filtered_highpass.copy().filter(l_freq = None, h_freq = 40)
ica_highpass.apply(reconst_raw_filtered_highpass)
reconst_raw_filtered_highpass.save(fname = "Data_cleaned/Cleaned_Highpass.fif", overwrite = True)
########
ica_both.exclude = [2,9,11]
reconst_raw_filtered_both = raw_filtered_both.copy()
ica_both.apply(reconst_raw_filtered_both)
reconst_raw_filtered_both.save(fname = "Data_cleaned/Cleaned_Both.fif",overwrite = True)
########
metadata.to_csv("Data_cleaned/Metadata.csv")

Filtering raw data in 5 contiguous segments
Setting up low-pass filter at 40 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal lowpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Upper passband edge: 40.00 Hz
- Upper transition bandwidth: 10.00 Hz (-6 dB cutoff frequency: 45.00 Hz)
- Filter length: 67 samples (0.335 s)

Applying ICA to Raw instance
    Transforming to ICA space (19 components)
    Zeroing out 1 ICA component
    Projecting back using 19 PCA components
Overwriting existing file.
Writing d:\Documents\Studies\Neuroscience\Project\Mem-Proj\Data_cleaned\Cleaned_Highpass.fif
Overwriting existing file.
Closing d:\Documents\Studies\Neuroscience\Project\Mem-Proj\Data_cleaned\Cleaned_Highpass.fif
[done]
Applying ICA to Raw instance
    Transforming to ICA space (19 components)
    Zeroing out 3 ICA components
    Projecting back using 19 PCA componen

  reconst_raw_filtered_highpass.save(fname = "Data_cleaned/Cleaned_Highpass.fif", overwrite = True)
  reconst_raw_filtered_both.save(fname = "Data_cleaned/Cleaned_Both.fif",overwrite = True)


Closing d:\Documents\Studies\Neuroscience\Project\Mem-Proj\Data_cleaned\Cleaned_Both.fif
[done]
