In [None]:
import scipy.io
import pandas as pd
import numpy as np
import os


path = '/content/drive/MyDrive/Colab Notebooks/data'
subjects = ["S1", "S2", "S3", "S4", "S5"]
unicorn_channels = ["Fz", "C3", "Cz", "C4", "Pz", "PO7", "Oz", "PO8"]

In [None]:
from google.colab import drive
drive.mount('/content/drive')

#### Data conversion
Converting all .mat files to Pandas DataFrames in .json

In [None]:
for subj in subjects:
  file_path = os.path.join(path, subj)
  data = scipy.io.loadmat(file_path  + ".mat")

  df = pd.DataFrame(data=data["y"], columns=unicorn_channels)
  df["trigger"] = data["trig"]
  df["sampling_rate"] = data["fs"][0][0]
  new_path = os.path.join(path, "json", subj)
  df.to_json(new_path + ".json")

Load one dataset to check the format

In [None]:
df = pd.read_json("/content/drive/MyDrive/Colab Notebooks/data/json/S2.json")
trigger = np.array(df.trigger)
print(np.nonzero(trigger), len(np.nonzero(trigger)[0]))

Visualize trigger data

In [None]:
import matplotlib.pyplot as plt
plt.rcParams["figure.figsize"] = (20,5)


target = np.where(trigger == 1)[0] 
not_target = np.where(trigger == -1)[0]
print(len(target), len(not_target))
plt.plot(trigger, 'x')

In [None]:
# Setup empty dictionary with all the channels
all_data = {
    "subject": [],
    "Fz": [],
    "C3": [],
    "Cz": [],
    "C4": [],
    "Pz": [],
    "PO7": [],
    "Oz": [],
    "PO8": [],
    "trigger": [],
    "sampling_rate": []
}
for subj in subjects:
  file_path = os.path.join(path, subj)
  data = scipy.io.loadmat(file_path  + ".mat")
  eeg_data = data["y"]

  # Transpose the data for each channel into a single array
  for i in range(len(unicorn_channels)):
    all_data[unicorn_channels[i]].append(eeg_data[:, i])


  # Flatten trigger data to obtain a clean array
  trigger = data["trig"].flatten()
  
  # Check index of trigger events and amount of datapoints
  print(np.nonzero(trigger), len(np.nonzero(trigger)[0]))

  # Append remaining metadata
  all_data["subject"].append(subj)
  all_data["trigger"].append(trigger)
  all_data["sampling_rate"].append(int(data["fs"][0][0]))

# Convert dict to df then save
df = pd.DataFrame(all_data)
new_path = os.path.join(path, "json", "p300")

# Load df from json to test
df.to_json(new_path + ".json")
df = pd.read_json(new_path + ".json")
df