# LIBRARY IMPORT

In [1]:
import pandas as pd
import neurokit2 as nk
from warnings import warn
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import kurtosis, skew
import time
import warnings 
from scipy.signal import savgol_filter

# DATA LOADING

In [2]:
# Load the local CSV file using pandas
data = pd.read_csv(r"C:\Users\AmitDebnath\Downloads\Kaggle dataset and ground truth\p20_1280_rows_value.csv")

# Extract the single column as 1D array
raw_eda_full = data.iloc[:, 0].values

# SET SAMPLE RATE AND START AND END POINT

In [3]:
# --- 2. Prepare for the iterative analysis ---
all_results = []
sampling_rate = 128
end_point = 1260

# MAIN FUNCTION

In [None]:
# --- 3. Loop through the data with an incrementing start point ---
for start_point in range(end_point):
    # Slice the raw EDA data for the current window.
    raw_eda_window = raw_eda_full[start_point:end_point]

    print(f"--- Processing slice: {start_point} to {end_point} ---")
    
    # --- 4. Process the sliced signal ---
    try:
        # We will catch and ignore the specific RuntimeWarning that occurs when no peaks are found.
        # This is expected behavior for short signal windows.
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", category=RuntimeWarning)

            signals, info = nk.eda_process(raw_eda_window, sampling_rate=sampling_rate, method="neurokit")
            analysis_df = nk.eda_intervalrelated(signals, sampling_rate=sampling_rate)

        # Add identifiers for the data slice
        analysis_df['start_index'] = start_point
        analysis_df['end_index'] = end_point

        # --- 6. Calculate additional features from 'info' --- for SCR peak related features
        peak_amplitudes = signals["SCR_Amplitude"]
        
        # For derivative features, ALSC, INSC, APSC, RMSC
        clean = signals["EDA_Clean"]
        
        eda_phasic = signals["EDA_Phasic"]
        
        # For peaks related features
        peaks = signals["SCR_Amplitude"]
        
        US_TO_S_CONVERSION = 1_000_000.0
        
        
        # 1. This check correctly where peaks were found
        analysis_df['MEAN'] = np.nanmean(clean) / US_TO_S_CONVERSION
        analysis_df['MAX'] = np.nanmax(clean) / US_TO_S_CONVERSION
        analysis_df['MIN'] = np.nanmin(clean) / US_TO_S_CONVERSION
        analysis_df['RANGE'] = (analysis_df['MAX'] - analysis_df['MIN']) / US_TO_S_CONVERSION
        analysis_df['SKEWNESS'] = pd.Series(clean).skew()
        analysis_df['KURTOSIS'] = pd.Series(clean).kurtosis()


        # --- 2. Calculate derivative Features ---
        # --- 2. Calculate derivative/Gradient Features ---
        first_derivative = np.gradient(eda_phasic) 
        second_derivative = np.gradient(first_derivative)

        analysis_df['MEAN_D1'] = np.mean(first_derivative)
        analysis_df['STD_D1'] = np.std(first_derivative)
        analysis_df['MEAN_D2'] = np.mean(second_derivative)
        analysis_df['STD_D2'] = np.std(second_derivative)
        
        # 3. Find out the ALSC- Arc length of the scr (EDA_phasic)
        t = np.arange(len(eda_phasic)) / sampling_rate  # Create a time array
        analysis_df['ALSC'] = np.sum(np.sqrt(np.diff(t) ** 2 + np.diff(eda_phasic) ** 2))
        
        # 4. INSC - Integral of the scr (EDA_phasic)
        dt = 1 / sampling_rate  # Time difference between samples
        
        # Integral calculations for scr (EDA_Phasic)
        analysis_df['INSC'] = np.sum(eda_phasic) * dt  # Simple Riemann sum approximation
        
        # Use trapezoidal rule for better accuracy (better for uneven signals)
        analysis_df['INSC_trapz'] = np.trapezoid(eda_phasic, dx=dt)
        
        # 5. APSC - Average Power of the scr (EDA_phasic)
        avg_power = np.mean(eda_phasic**2)
        analysis_df['APSC'] = avg_power / np.max(eda_phasic**2) if np.max(eda_phasic**2) != 0 else 0
        
        # 6. RMSC - Root Mean Square of the scr (EDA_phasic)
        rms = np.sqrt(np.mean(eda_phasic**2))
        
        max_rms = np.max(np.abs(eda_phasic)) if np.max(np.abs(eda_phasic)) != 0 else 0
        
        analysis_df['RMSC'] = rms / max_rms if max_rms != 0 else 0
        
        onsets = signals['EDA_Clean'][signals["SCR_Onsets"] == 1]
        
        # 7. SCR onsets
        analysis_df['MEAN_Onsets'] = np.nanmean(onsets)
        analysis_df['MAX_Onsets'] = np.nanmax(onsets)
        analysis_df['MIN_Onsets'] = np.nanmin(onsets)
        analysis_df['STD_Onsets'] = np.nanstd(onsets)
        
        # 8. SCR Peaks
        analysis_df['MEAN_Peaks'] = np.nanmean(peaks)
        analysis_df['MAX_Peaks'] = np.nanmax(peaks)
        analysis_df['MIN_Peaks'] = np.nanmin(peaks)
        analysis_df['STD_Peaks'] = np.nanstd(peaks)

        # Append the results of this iteration
        all_results.append(analysis_df)

    except Exception as e:
        print(f"An error occurred during processing at iteration {start_point}: {e}")

In [None]:
# --- 9. Combine all results and save to CSV ---
if all_results:
    final_results_df = pd.concat(all_results, ignore_index=True)

    # Neurokit sometimes outputs NaN for mean amplitude when no peaks are found.
    # It's good practice to fill these with 0 to maintain consistency.
    final_results_df.fillna(0, inplace=True)


    print("\n--- Final DataFrame with All Features from All Iterations ---")
    print(final_results_df)

    final_results_df.to_csv("C:/Users/AmitDebnath/Downloads/eda_analysis_results_p20-1280_updated_values_9th_thursday_for_derivative_features.csv", index=False)
    print("\nResults have been successfully saved to 'eda_analysis_results_fixed.csv'")
else:
    print("\nNo results were generated to save.")