In [None]:
import pandas as pd

# Load the dataset
file_path = '/content/full_dataset.csv'
df = pd.read_csv(file_path)

# Display the first few rows of the dataset to understand its structure
df.head()


In [None]:
from scipy.signal import resample
import numpy as np
import pandas as pd

def downsample_eeg_with_subject(data, original_fs, target_fs):
    numeric_cols = data.select_dtypes(include=[np.number]).columns
    non_numeric_cols = ['participant_id']
    
    # Calculate number of samples after downsampling
    num_samples = int(len(data) * target_fs / original_fs)
    
    # Resample numeric columns, including triggers
    downsampled_numeric_data = pd.DataFrame()
    for col in numeric_cols:
        downsampled_numeric_data[col] = resample(data[col], num_samples)
    
    # Round the 'trigger' column: Custom rounding to ensure integers, and convert -0 to 0
    def custom_round_trigger(val):
        val = np.round(val)  # Round the value to the nearest integer
        if val == -0:  # Ensure -0 becomes 0
            return 0
        return int(val)  # Convert to integer
    
    downsampled_numeric_data['trigger'] = downsampled_numeric_data['trigger'].apply(custom_round_trigger)
    
    # Process non-numeric columns (such as participant_id)
    downsampled_non_numeric_data = pd.DataFrame()
    for col in non_numeric_cols:
        downsampled_non_numeric_data[col] = [data[col].iloc[0]] * num_samples
        
    # Concatenate the downsampled numeric data, trigger column, and non-numeric columns
    downsampled_data = pd.concat([downsampled_numeric_data, downsampled_non_numeric_data], axis=1)
    
    return downsampled_data

# Checkup Function to verify timestamp preservation and counts
def verify_downsampling(original_data, downsampled_data):
    # Follow the first '1' and '-1' in the original data and compare in the downsampled data
    original_first_1_idx = original_data[original_data['trigger'] == 1].index[0]
    original_first_neg1_idx = original_data[original_data['trigger'] == -1].index[0]
    
    downsampled_first_1_idx = downsampled_data[downsampled_data['trigger'] == 1].index[0] if len(downsampled_data[downsampled_data['trigger'] == 1]) > 0 else None
    downsampled_first_neg1_idx = downsampled_data[downsampled_data['trigger'] == -1].index[0] if len(downsampled_data[downsampled_data['trigger'] == -1]) > 0 else None
    
    print(f"First '1' in original data at index: {original_first_1_idx}")
    print(f"First '1' in downsampled data at index: {downsampled_first_1_idx}")
    
    print(f"First '-1' in original data at index: {original_first_neg1_idx}")
    print(f"First '-1' in downsampled data at index: {downsampled_first_neg1_idx}")
    
    # Count the total number of '1s' and '-1s' in original and downsampled data
    original_count_1s = original_data['trigger'].value_counts().get(1, 0)
    original_count_neg1s = original_data['trigger'].value_counts().get(-1, 0)
    
    downsampled_count_1s = downsampled_data['trigger'].value_counts().get(1, 0)
    downsampled_count_neg1s = downsampled_data['trigger'].value_counts().get(-1, 0)
    
    print(f"Original data count of '1s': {original_count_1s}, '-1s': {original_count_neg1s}")
    print(f"Downsampled data count of '1s': {downsampled_count_1s}, '-1s': {downsampled_count_neg1s}")

# Example usage with original_fs = 250 Hz
original_fs = 250

# Assuming df is your original dataframe with the data
# downsampled_64Hz = downsample_eeg_with_subject(df, original_fs, 64)
downsampled_128Hz = downsample_eeg_with_subject(df, original_fs, 125)

# Run the check-up
verify_downsampling(df, downsampled_128Hz)


In [None]:
from scipy.signal import resample
import numpy as np
import pandas as pd

def downsample_eeg_with_subject(data, original_fs, target_fs):
    numeric_cols = data.select_dtypes(include=[np.number]).columns.difference(['trigger'])
    non_numeric_cols = ['participant_id']
    
    # Calculate number of samples after downsampling
    num_samples = int(len(data) * target_fs / original_fs)
    
    # Resample numeric columns (excluding 'trigger')
    downsampled_numeric_data = pd.DataFrame()
    for col in numeric_cols:
        downsampled_numeric_data[col] = resample(data[col], num_samples)
    
    # Handle the 'trigger' column separately
    window_size = original_fs // target_fs  # Define the window size based on the downsampling ratio
    downsampled_trigger = []
    
    # Iterate through each window in the 'trigger' column
    for i in range(0, len(data), window_size):
        window_triggers = data['trigger'].iloc[i:i + window_size]
        
        # If there's at least one '1', keep it as '1', if not, check for '-1'
        if (window_triggers == 1).any():
            downsampled_trigger.append(1)
        elif (window_triggers == -1).any():
            downsampled_trigger.append(-1)
        else:
            downsampled_trigger.append(0)
    
    # Create a DataFrame for the downsampled 'trigger'
    downsampled_trigger_df = pd.DataFrame({'trigger': downsampled_trigger})
    
    # Process non-numeric columns (such as participant_id)
    downsampled_non_numeric_data = pd.DataFrame()
    for col in non_numeric_cols:
        downsampled_non_numeric_data[col] = [data[col].iloc[0]] * num_samples
        
    # Concatenate the downsampled numeric data, trigger column, and non-numeric columns
    downsampled_data = pd.concat([downsampled_numeric_data.reset_index(drop=True),
                                  downsampled_trigger_df.reset_index(drop=True),
                                  downsampled_non_numeric_data.reset_index(drop=True)], axis=1)
    
    return downsampled_data

# Example usage with original_fs = 250 Hz
original_fs = 250

# Downsample and preserve triggers
downsampled_128Hz = downsample_eeg_with_subject(df, original_fs, 125)

# Check the result
verify_downsampling(df, downsampled_128Hz)
