In [5]:
import numpy as np
from dataclasses import dataclass
from typing import List, Dict, Optional
import mne  

@dataclass
class EEGTrial:
   
    raw_signal: np.ndarray
    command_label: str
    subject_id: str
    trial_number: int
    channel_names: List[str]
    sampling_rate: int
    movement_onset: Optional[float] = None
    movement_offset: Optional[float] = None
    artifacts_removed: bool = False
    
def preprocess_eeg_data(trial: EEGTrial) -> np.ndarray:
    
    
    ch_types = ['eeg'] * len(trial.channel_names)
    info = mne.create_info(trial.channel_names, trial.sampling_rate, ch_types)
    raw = mne.io.RawArray(trial.raw_signal, info)
    
    
    raw.filter(0.5, 45)  # Bandpass filter
    raw.notch_filter(50)  # Remove power line noise
    
    
    ica = mne.preprocessing.ICA()
    ica.fit(raw)
    raw = ica.apply(raw)
    
    return raw.get_data()

def extract_features(processed_data: np.ndarray, window_size: int = 1000) -> Dict:
    """
    Extract relevant features from preprocessed EEG data
    """
    features = {
        'band_power': {
            'delta': np.mean(processed_data[:, :window_size]),  # 0.5-4 Hz
            'theta': np.mean(processed_data[:, :window_size]),  # 4-8 Hz
            'alpha': np.mean(processed_data[:, :window_size]),  # 8-13 Hz
            'beta': np.mean(processed_data[:, :window_size]),   # 13-30 Hz
        },
        'temporal_features': {
            'mean': np.mean(processed_data, axis=1),
            'std': np.std(processed_data, axis=1),
            'max': np.max(processed_data, axis=1),
        }
    }
    return features

In [13]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from typing import Tuple

class EEGDataProcessor:
    def __init__(self, csv_file: str):
        
        self.df = pd.read_csv(csv_file)
        self.sampling_rate = 256  
        self.eeg_columns = [col for col in self.df.columns if col.startswith('EEG-')]
        
    def prepare_ml_data(self, test_size: float = 0.2, random_state: int = 42, save_path: str = "preprocessed_data") -> None:
        """
        Preparing data for machine learning by:
        1. Grouping by epoch
        2. Extracting features
        3. Splitting into train/test sets
        4. Scaling features
        5. Saving to CSV
        """
        
        grouped = self.df.groupby(['patient', 'epoch'])
        
        features_list = []
        labels = []
        
        # Iterate over grouped epochs to extract features and labels
        for _, epoch_data in grouped:
            eeg_data = epoch_data[self.eeg_columns].values.T
            epoch_features = self._extract_epoch_features(eeg_data)
            features_list.append(epoch_features)
            labels.append(epoch_data['label'].iloc[0])
        
        X = np.array(features_list)
        y = np.array(labels)
        
       
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=test_size, random_state=random_state, stratify=y
        )
        
        # Scale features
        scaler = StandardScaler()
        X_train_scaled = scaler.fit_transform(X_train)
        X_test_scaled = scaler.transform(X_test)
        
        # Save preprocessed data into CSV files
        self._save_to_csv(X_train_scaled, y_train, X_test_scaled, y_test, save_path)
    
    def _extract_epoch_features(self, epoch_data: np.ndarray) -> np.ndarray:
        
        features = []
        features.extend([
            np.mean(epoch_data, axis=1),  # Mean for each channel
            np.std(epoch_data, axis=1),   # Standard deviation
            np.max(epoch_data, axis=1),   # Maximum value
            np.min(epoch_data, axis=1),   # Minimum value
        ])
        return np.concatenate(features)
    
    def _save_to_csv(self, X_train: np.ndarray, y_train: np.ndarray, X_test: np.ndarray, y_test: np.ndarray, save_path: str):
        
        # Create DataFrames for train and test data
        train_df = pd.DataFrame(X_train, columns=[f"Feature_{i}" for i in range(X_train.shape[1])])
        train_df['Label'] = y_train
        
        test_df = pd.DataFrame(X_test, columns=[f"Feature_{i}" for i in range(X_test.shape[1])])
        test_df['Label'] = y_test
        
        # Save to CSV files
        train_csv_path = f"{save_path}_train.csv"
        test_csv_path = f"{save_path}_test.csv"
        
        train_df.to_csv(train_csv_path, index=False)
        test_df.to_csv(test_csv_path, index=False)
        
        print(f"Train data saved to {train_csv_path}")
        print(f"Test data saved to {test_csv_path}")



if __name__ == "__main__":
    processor = EEGDataProcessor(csv_file="your_eeg_data.csv")
    processor.prepare_ml_data(save_path="preprocessed_data")


Train data saved to preprocessed_data_train.csv
Test data saved to preprocessed_data_test.csv
