# Part I
## loading dataset and installing tools

In [None]:
import os # Operating System API 

if not os.path.exists('./data'):
    !gdown 1cBV6at_F2mPWIXKnRT1AQKSySZX7bnT3 # download DEAP dataset
    !unzip -o /data.zip -d ./ # unzip archive

import numpy as np # linear programming operations
import pandas as pd # data manipulation and preprocessing
import matplotlib.pyplot as plt # for plotting and data visualization
import torch # pytorch for Deep Learning
import torch.nn as nn # neural network module
import torch.nn.functional as F # functional module
import torch.optim as optim # model optimzer module
from sklearn.decomposition import PCA #PCA
import pywt # Wavelet Transform

In [None]:
DEAP = [pd.read_pickle(f"./data/s{i}.dat") for i in range(1,33)]

In [None]:
data = [DEAP[i]['data'] for i in range(0,32)]
# data: 32 Subjects x 40 Trials (63 secs each with first 3 secs removed) x 40 Data rows (including 32 EEG channels) x 8064 (Timeseries data downsampled to 128 Hz amd EOG-filtered with bandpass filter of (4-45 Hz) average to common reference)
labels = [DEAP[i]['labels'] for i in range(0,32)]
# lables: 32 Subjects x 40 Trials x 4 Ratings (Valence,Arousal,Dominance,Liking)

In [None]:
processed_data = []

for subject in data:
    subject_processed = []
    for trial in subject:
        # Convert each trial (40x8064) to a Pandas DataFrame
        df_trial = pd.DataFrame(trial)

        # Drop the last 8 rows (We only need the 32 EEG channels)
        df_trial_dropped = df_trial.iloc[:-8]

        # Append the processed trial as a NumPy array
        subject_processed.append(df_trial_dropped.to_numpy())

    # Add processed subject to the list
    processed_data.append(subject_processed)

In [None]:
processed_data_flattened = [trial for subject in processed_data for trial in subject]
labels_flattened = [trial for subject_labels in labels for trial in subject_labels]

In [None]:
print(len(processed_data_flattened),"\n", processed_data_flattened[0].shape)


1280 
 (32, 8064)


In [None]:
# Concatenate all trials to create a single DataFrame
eeg_signal_names = [
    "Fp1", "AF3", "F3", "F7", "FC5", "FC1", "C3", "T7", "CP5", "CP1",
    "P3", "P7", "PO3", "O1", "Oz", "Pz", "Fp2", "AF4", "Fz", "F4",
    "F8", "FC6", "FC2", "Cz", "C4", "T8", "CP6", "CP2", "P4", "P8",
    "PO4", "O2"
]
processed_data_df_list = [pd.DataFrame(trial) for trial in processed_data_flattened]
for i in range(len(processed_data_df_list)):
    processed_data_df_list[i] = processed_data_df_list[i].transpose()
# Convert labels_flattened to a DataFrame
labels_df = pd.DataFrame(labels_flattened, columns=['Valence', 'Arousal', 'Dominance', 'Liking'])

In [None]:
len(processed_data_df_list)

1280

In [None]:
processed_data_df_list[0].shape

(8064, 32)

In [None]:
labels_df.shape

(1280, 4)

In [None]:
pca = PCA(n_components=16)
for i in range(len(processed_data_df_list)):
  processed_data_df_list[i] = pca.fit_transform(processed_data_df_list[i])
data = np.array(processed_data_df_list)

In [None]:
data.shape

(1280, 8064, 16)

In [None]:
scales = np.arange(1, 128)  # freq range
sampling_frequency = 128  # 128 Hz for DEAP
output_dir = "scalograms" # Save for later use
os.makedirs(output_dir, exist_ok=True)

# Perform CWT and store scalograms
for trial in range(0,1281):
    trial_scalograms = []
    for channel in range(0,16): # 16 PCs
        signal = data[trial, :, channel] # Extract single trial-channel data
        coef, freqs = pywt.cwt(signal, scales, 'cmor', sampling_period=1/sampling_frequency) # CWT
        scalogram = np.abs(coef) ** 2
        # Save the scalogram as a .npy file
        filename = f"scalogram_trial{trial}_channel{channel}.png"
        filepath = os.path.join(output_dir, filename)
        # Save using a dedicated figure to ensure proper scaling
        plt.figure(figsize=(12, 6))  # Adjust figure size for better resolution
        plt.imshow(scalogram, extent=[0, 8064, freqs[-1], freqs[0]], aspect='auto', cmap='jet')
        plt.axis('off')  # No axes
        plt.tight_layout(pad=0)
        plt.savefig(filepath, bbox_inches='tight', pad_inches=0)
        plt.close()  # Close the figure to free memory
        print(f"Saved scalograms for Trial {trial + 1} and Channel {channel + 1}")

    # (Optional) Save trial-level scalograms if needed
    # trial_filename = f"scalograms_trial{trial}.npy"
    # trial_filepath = os.path.join(output_dir, trial_filename)
    # np.save(trial_filepath, trial_scalograms)


  wavelet = DiscreteContinuousWavelet(wavelet)
