In [3]:
import pandas as pd
import numpy as np

def calculate_skew_kurt(window):
    """Manual calculation of skewness and kurtosis using NumPy (CPU-based)"""
    # Ensure float32 for efficiency
    window = window.astype(np.float32)
    
    # Calculate moments
    mean = np.mean(window, axis=0)
    std = np.std(window, axis=0, ddof=0)
    diffs = window - mean
    
    # Avoid division by zero
    std += np.where(std == 0, 1e-8, 0)
    
    # Standardized values
    z = diffs / std
    
    # Calculate skewness and kurtosis
    skew = np.mean(z**3, axis=0)
    kurt = np.mean(z**4, axis=0) - 3  # Fisher's definition
    
    return skew, kurt

def extract_time_features(window, eeg_columns):
    """CPU-accelerated feature extraction with manual skew/kurtosis"""
    features = {}
    
    # Basic stats
    features.update({
        f'{col}_mean': val for col, val in zip(
            eeg_columns, np.mean(window, axis=0).tolist())
    })
    features.update({
        f'{col}_std': val for col, val in zip(
            eeg_columns, np.std(window, axis=0).tolist())
    })
    features.update({
        f'{col}_var': val for col, val in zip(
            eeg_columns, np.var(window, axis=0).tolist())
    })
    
    # Signal characteristics
    features.update({
        f'{col}_max': val for col, val in zip(
            eeg_columns, np.max(window, axis=0).tolist())
    })
    features.update({
        f'{col}_min': val for col, val in zip(
            eeg_columns, np.min(window, axis=0).tolist())
    })
    features.update({
        f'{col}_ptp': val for col, val in zip(
            eeg_columns, np.ptp(window, axis=0).tolist())
    })
    
    # Manual skew/kurt calculation
    skew_vals, kurt_vals = calculate_skew_kurt(window)
    features.update({
        f'{col}_skew': val for col, val in zip(eeg_columns, skew_vals.tolist())
    })
    features.update({
        f'{col}_kurt': val for col, val in zip(eeg_columns, kurt_vals.tolist())
    })
    
    # Zero-crossing
    sign_changes = np.diff((window >= 0).astype(int), axis=0)
    features.update({
        f'{col}_zcross': val for col, val in zip(
            eeg_columns, np.sum(sign_changes != 0, axis=0).tolist())
    })
    
    # Energy features
    features.update({
        f'{col}_energy': val for col, val in zip(
            eeg_columns, np.sum(window**2, axis=0).tolist())
    })
    features.update({
        f'{col}_rms': val for col, val in zip(
            eeg_columns, np.sqrt(np.mean(window**2, axis=0)).tolist())
    })
    
    return features

def sliding_window_features(eeg_data, outcomes, eeg_columns, window_size, step_size):
    """CPU-based sliding window feature extraction with target alignment."""
    features_list = []
    targets = []
    n_samples = eeg_data.shape[0]
    
    for start in range(0, n_samples - window_size + 1, step_size):
        end = start + window_size
        window = eeg_data[start:end]
        outcome_window = outcomes[start:end]
        
        features = extract_time_features(window, eeg_columns)
        features_list.append(features)
        
        # Determine window target (modify based on your use case)
        targets.append(1 if np.any(outcome_window) else 0)
    
    return features_list, targets

# Main processing
if __name__ == "__main__":
    # Load data with proper dtype specification
    data = pd.read_csv('/Users/puchku-home/KIIT SEM/PROJECT/EEG/EEG Assets/chbmit_preprocessed_data.csv')
    eeg_columns = [col for col in data.columns if col != 'Outcome']
    
    # Convert to NumPy arrays with float32 precision
    eeg_data = np.asarray(data[eeg_columns].values, dtype=np.float32)
    outcomes = np.asarray(data['Outcome'].values, dtype=np.float32)
    
    # Window parameters
    fs = 256  # Sampling frequency
    window_size = fs * 1  # 1-second windows
    step_size = window_size // 2  # 50% overlap
    
    # Process data on CPU
    features, targets = sliding_window_features(
        eeg_data, outcomes, eeg_columns, window_size, step_size
    )
    
    # Convert to DataFrame and save
    features_df = pd.DataFrame(features)
    features_df['target'] = targets
    features_df.to_csv('/Users/puchku-home/Downloads/TIME FEATURES/timefeature_data.csv', index=False)
    print("Feature extraction complete. Features saved to 'timefeature_data.csv'")


Feature extraction complete. Features saved to 'timefeature_data.csv'
