In [1]:
import pandas as pd
import pickle
import os
import numpy as np

# Define the relative file path
file_path = './deap_orig/data_preprocessed_python/s01.dat'
# Load the .dat file
with open(file_path, 'rb') as file:
    database = pickle.load(file, encoding='latin1')  # Adjust encoding as necessary

# Check the type of the data loaded
data = database['data']
label = database['labels']

In [2]:
print(f"original data shape: {data.shape}")
first_video = data[1]
first_video_df = pd.DataFrame(first_video)
first_video_df = first_video_df.T
print(f"first video final shape: {first_video_df.shape}")
#first_video_df

original data shape: (40, 40, 8064)
first video final shape: (8064, 40)


In [3]:
def select_columns(df, columns_to_keep):
    """
    Select specific columns from a DataFrame.
    
    Parameters:
    df (pd.DataFrame): The original DataFrame.
    columns_to_keep (list of int): List of column indices to keep.
    
    Returns:
    pd.DataFrame: New DataFrame with only the specified columns.
    """
    return df.iloc[:, columns_to_keep]

In [4]:
columns_to_keep = [1, 2, 3, 4, 7, 11, 13, 17, 19, 20, 21, 25, 28, 31]
new_combined_df = select_columns(first_video_df, columns_to_keep)

print(new_combined_df.shape)  # This should print (322560, 18) because we selected 14 columns
#new_combined_df

(8064, 14)


In [5]:

num_columns = 70
num_groups = num_columns // 5  # Each group contains 5 columns with the same name prefix

# Create a list of column names based on your pattern
column_names = []
for i in range(num_groups):
    column_names.extend([f'Delta {i+1}', f'Theta {i+1}', f'Alpha {i+1}', f'Beta {i+1}', f'Gamma {i+1}'])

# Initialize an empty DataFrame for the results
result_video_df = pd.DataFrame()

# iterate on all the 14 channels
for channel in new_combined_df.columns:
    channel_column = new_combined_df[channel]
    amount_of_samples = len(channel_column)
    result_channel_df = pd.DataFrame(np.zeros((amount_of_samples, 5)), columns=['Delta', 'Theta', 'Alpha', 'Beta', 'Gamma'])
    segment_counter = 0
    # Process each 64 samples (128 Hz)
    for i in range(0, amount_of_samples, 64):
        for k in range(64):
            if 4 <= k <= 7: # Theta
                result_channel_df.iloc[k + 64 * segment_counter, 1] = channel_column[k + 64 * segment_counter]
            elif 8 <= k <= 13: # Alpha
                result_channel_df.iloc[k + 64 * segment_counter, 2] = channel_column[k + 64 * segment_counter]
            elif 14 <= k <= 30: # Beta
                result_channel_df.iloc[k + 64 * segment_counter, 3] = channel_column[k + 64 * segment_counter]
            elif 31 <= k <= 45: # Gamma
                result_channel_df.iloc[k + 64 * segment_counter, 4] = channel_column[k + 64 * segment_counter]
                
        segment_counter += 1
        
    # Concatenate along columns (axis=1)
    result_video_df = pd.concat([result_video_df, result_channel_df], axis=1)
    
result_video_df.columns = column_names
print(f"result video df shape: {result_video_df.shape}")
#result_video_df

result video df shape: (8064, 70)
