In [None]:
import os
import scipy.io
import numpy as np

def load_mat_file(filepath):
    """Load .mat file and return the data."""
    return scipy.io.loadmat(filepath)

def preprocess_ecg_data(subject_id, base_path):
    """Preprocess ECG data for a subject."""
    ecg_file_template = "ECG_Clip{:02d}.mat"
    ecg_file_paths = [os.path.join(base_path, f"Movie_P{subject_id:02d}", ecg_file_template.format(i)) for i in range(1, 37)]

    all_ecg_data = []
    for filepath in ecg_file_paths:
        mat_data = load_mat_file(filepath)
        data_ecg = mat_data["Data_ECG"]
        timestamp = data_ecg[:, 0]
        #acc_data = data_ecg[:, 1:4] if data_ecg.shape[1] > 3 else None
        ecg_data = data_ecg[:, -1:-2]

        ecg_data = {
            "timestamp": timestamp,
            #"acc_data": acc_data,
            "ecg": ecg_data,
        }
        all_ecg_data.append(ecg_data)
    
    return all_ecg_data

def preprocess_gsr_data(subject_id, base_path):
    """Preprocess GSR data for a subject."""
    gsr_file_template = "GSR_Clip{:02d}.mat"
    gsr_file_paths = [os.path.join(base_path, f"Movie_P{subject_id:02d}", gsr_file_template.format(i)) for i in range(1, 37)]

    all_gsr_data = []
    for filepath in gsr_file_paths:
        mat_data = load_mat_file(filepath)
        data_gsr = mat_data["Data_GSR"]
        timestamp = data_gsr[:, 0]
        acc_data = data_gsr[:, 1:4]
        eda_data = data_gsr[:, -1]

        gsr_data = {
            "timestamp": timestamp,
            "acc_data": acc_data,
            "eda_data": eda_data
        }
        all_gsr_data.append(gsr_data)
    
    return all_gsr_data

def preprocess_data(subject_id, base_path):
    """Preprocess ECG and GSR data for a subject and save it."""
    ecg_data = preprocess_ecg_data(subject_id, base_path)
    gsr_data = preprocess_gsr_data(subject_id, base_path)

    data = {
        "ecg": ecg_data,
        "gsr": gsr_data
    }

    save_path = os.path.join(base_path, f"S{subject_id:02d}", f"S{subject_id:02d}.pkl")
    os.makedirs(os.path.dirname(save_path), exist_ok=True)

    with open(save_path, 'wb') as f:
        pickle.dump(data, f)

# Example usage for all subjects
base_path = "/path/to/data"

for subject_id in range(1, 59):
    preprocess_data(subject_id, base_path)


In [65]:
# import os
# import scipy.io
# import numpy as np
# import pickle
# import pandas as pd

# selfreports = scipy.io.loadmat('Dt_Selfreports.mat')
# order_movie = scipy.io.loadmat('Dt_Order_Movie.mat')

# ratings = selfreports['Ratings']

# NS, NV = 58, 36

# rating_comparison = {rating_type: [] for rating_type in ['Arousal', 'Valence']}

# for rating_type_index, rating_type in enumerate(rating_comparison.keys()):
#     rating_matrix = ratings[rating_type_index, :, :]  # NS x NV matrix for the current rating type
#     average_ratings = rating_matrix.mean(axis=1)  # Calculate the average rating for each subject

#     comparison_matrix = (rating_matrix > average_ratings[:, None]).astype(int)  # NS x NV boolean matrix
#     rating_comparison[rating_type] = comparison_matrix

# comparison_dfs = {rating_type: pd.DataFrame(data=comparison_matrix, columns=[f'Video_{i+1}' for i in range(NV)])
#                   for rating_type, comparison_matrix in rating_comparison.items()}

# def load_mat_file(filepath):
#     """Load .mat file and return the data."""
#     return scipy.io.loadmat(filepath)
    
# def preprocess_data(subject_id, base_path):
#     ecg_file_template = "ECG_Clip{}.mat"
#     ecg_file_paths = [os.path.join(base_path, f"ECGData/Movie_P{subject_id:02d}", ecg_file_template.format(i)) for i in range(1, 37)]

#     all_ecg_data = pd.DataFrame()

#     for idx, filepath in enumerate(ecg_file_paths):
#         mat_data = load_mat_file(filepath)
#         data_ecg = pd.DataFrame(mat_data["Data_ECG"])
#         ecg_data = data_ecg.iloc[:, -2:]
        
#         ars = comparison_dfs['Arousal'].iloc[subject_id-1][idx]
#         vlc = comparison_dfs['Valence'].iloc[subject_id-1][idx]

#         ecg_data['ars'] = ars
#         ecg_data['vlc'] = vlc

#         all_ecg_data = pd.concat([all_ecg_data, ecg_data], axis=0)

#     print(all_ecg_data)

#     gsr_file_template = "GSR_Clip{}.mat"
#     gsr_file_paths = [os.path.join(base_path, f"GSRData/Movie_P{subject_id:02d}", gsr_file_template.format(i)) for i in range(1, 37)]

#     all_gsr_data = pd.DataFrame()

#     for filepath in gsr_file_paths:
#         mat_data = load_mat_file(filepath)
#         data_gsr = pd.DataFrame(mat_data["Data_GSR"])
#         acc_eda_data = data_gsr.iloc[:, 1:]

#         all_gsr_data = pd.concat([all_gsr_data, acc_eda_data], axis=0)

#     dict_data = {'signal':
#             {'ecg': all_ecg_data.iloc[:,:1].to_numpy(),
#             'acc': all_gsr_data.iloc[:,:2].to_numpy(),
#             'eda': all_gsr_data.iloc[:,-1].to_numpy()},
#             'label':
#             {'AROUSAL': all_ecg_data[['ars']].to_numpy(),
#             'VALENCE': all_ecg_data[['vlc']].to_numpy()},
#             'subject': f"S{subject_id}"}

#     folder_name = f"S{subject_id}"
#     if not os.path.exists(folder_name):
#         os.makedirs(folder_name)

#     pkl_file_path = os.path.join(folder_name, f"S{subject_id}.pkl")
#     with open(pkl_file_path, 'wb') as pkl_file:
#         pickle.dump(dict_data, pkl_file)

# base_path = "./"

# for subject_id in range(1, 37):
#     preprocess_data(subject_id, base_path)

In [49]:
import scipy.io
import pandas as pd

# Load the .mat files
selfreports = scipy.io.loadmat('Dt_Selfreports.mat')
order_movie = scipy.io.loadmat('Dt_Order_Movie.mat')

# Extract the ratings and the permutation list
ratings = selfreports['Ratings']
print(ratings.shape)
permutation_list = order_movie['PermutationList']

# Number of subjects and videos (assuming NS and NV are the dimensions of the matrices)
NS, NV = permutation_list.shape
print(NV)

# Initialize a dictionary to store whether each rating is higher or lower than the average
rating_comparison = {rating_type: [] for rating_type in ['Arousal', 'Valence']}

# Iterate over each rating type (0 to 4 in the 5 rating types)
for rating_type_index, rating_type in enumerate(rating_comparison.keys()):
    #rating_matrix = ratings[:, :, rating_type_index]  # NS x NV matrix for the current rating type
    rating_matrix = ratings[rating_type_index, :, :]  # NS x NV matrix for the current rating type
    average_ratings = rating_matrix.mean(axis=1)  # Calculate the average rating for each subject
    print(average_ratings.shape)
    
    # Compare each rating with the subject's average rating
    comparison_matrix = (rating_matrix > average_ratings[:, None]).astype(int)  # NS x NV boolean matrix
    rating_comparison[rating_type] = comparison_matrix

# Convert the results to DataFrames for easier manipulation if needed
comparison_dfs = {rating_type: pd.DataFrame(data=comparison_matrix, columns=[f'Video_{i+1}' for i in range(NV)])
                  for rating_type, comparison_matrix in rating_comparison.items()}

# Print the comparison results for each rating type
for rating_type, df in comparison_dfs.items():
    print(f'Comparison for {rating_type}:')
    print(df)
    print()


(5, 58, 36)
36
(58,)
(58,)
Comparison for Arousal:
    Video_1  Video_2  Video_3  Video_4  Video_5  Video_6  Video_7  Video_8  \
0         0        0        0        1        1        1        0        1   
1         1        1        1        1        1        0        0        1   
2         1        0        0        0        1        0        0        0   
3         0        0        0        1        1        0        0        0   
4         1        1        1        1        1        1        1        1   
5         1        1        1        1        1        0        1        1   
6         0        1        0        1        1        0        1        1   
7         1        0        0        1        1        1        1        1   
8         1        1        1        1        1        0        0        1   
9         1        1        0        1        1        0        0        0   
10        1        0        0        1        1        0        0        1   
11        0  

In [81]:
import scipy.io
import pandas as pd
import pickle
from pathlib import Path

# Load ratings data
selfreports = scipy.io.loadmat('Dt_Selfreports.mat')
ratings = selfreports['Ratings']

NS, NV = 58, 36

# Compute average ratings for each subject and comparison matrices
rating_comparison = {}
for rating_type in ['Arousal', 'Valence']:
    rating_matrix = ratings[['Arousal', 'Valence'].index(rating_type), :, :]
    average_ratings = rating_matrix.mean(axis=1)
    comparison_matrix = (rating_matrix > average_ratings[:, None]).astype(int)
    rating_comparison[rating_type] = pd.DataFrame(comparison_matrix, columns=[f'Video_{i+1}' for i in range(NV)])

def load_mat_file(filepath):
    """Load .mat file and return the data."""
    return scipy.io.loadmat(filepath)

def preprocess_data(subject_id, base_path):
    # Initialize empty lists to store data
    all_ecg_data = []
    all_gsr_data = []
    
    # Process ECG data
    for idx in range(1, 37):
        ecg_filepath = Path(base_path) / f"ECGData/Movie_P{subject_id:02d}/ECG_Clip{idx}.mat"
        mat_data = load_mat_file(ecg_filepath)
        data_ecg = pd.DataFrame(mat_data["Data_ECG"])
        ecg_data = data_ecg.iloc[:, -2:]
        
        # Assign ars and vlc values
        ecg_data['ars'] = rating_comparison['Arousal'].iloc[subject_id - 1, idx - 1]
        ecg_data['vlc'] = rating_comparison['Valence'].iloc[subject_id - 1, idx - 1]
        
        all_ecg_data.append(ecg_data)
    
    # Concatenate all_ecg_data into a single DataFrame
    all_ecg_data = pd.concat(all_ecg_data, axis=0, ignore_index=True)
    
    # Process GSR data
    for idx in range(1, 37):
        gsr_filepath = Path(base_path) / f"GSRData/Movie_P{subject_id:02d}/GSR_Clip{idx}.mat"
        mat_data = load_mat_file(gsr_filepath)
        data_gsr = pd.DataFrame(mat_data["Data_GSR"])
        acc_eda_data = data_gsr.iloc[:, 1:]
        
        all_gsr_data.append(acc_eda_data)
    
    # Concatenate all_gsr_data into a single DataFrame
    all_gsr_data = pd.concat(all_gsr_data, axis=0, ignore_index=True)
    
    # Prepare dictionary for pickle file
    dict_data = {
        'signal': {
            'ecg': all_ecg_data.iloc[:, :2].to_numpy(),
            'acc': all_gsr_data.iloc[:, :3].to_numpy(),
            'eda': all_gsr_data.iloc[:, -1].to_numpy(),
        },
        'label': {
            'AROUSAL': all_ecg_data['ars'].to_numpy(),
            'VALENCE': all_ecg_data['vlc'].to_numpy(),
        },
        'subject': f"S{subject_id}"
    }
    
    # Create folder if it doesn't exist
    folder_name = f"S{subject_id}"
    Path(folder_name).mkdir(parents=True, exist_ok=True)
    
    # Save dictionary as pickle file
    with open(Path(folder_name) / f"S{subject_id}.pkl", 'wb') as pkl_file:
        pickle.dump(dict_data, pkl_file)

# Base path
base_path = "./"

# Process data for each subject
for subject_id in range(1, 2):
    preprocess_data(subject_id, base_path)


1
