# EEG Biometrics


In [1]:
# Run this cell to load required libraries and mount your Drive folder

import numpy as np
from matplotlib import pyplot as plt
from google.colab import drive
import os
import json
import csv


drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
# Run this cell to navigate through the Drive filesystem and extract the dataset for a specific task

dataset_path = '/content/drive/MyDrive/ml2-eeg-biometrics/raw-data/'

task_duration = 5 # Task average duration is 5  seconds

dataset = {}

left_hand_task = 769
right_hand_task = 770
both_feet_task = 771
idle_task = 780

labels = np.array(['Participant', 'Date', 'Run', 'Task','Trial','F3', 'F1', 'Fz', 'F2', 'F4', 'FC5', 'FC3', 'FC1', 'FCz', 'FC2', 'FC4', 'FC6', 'C5', 'C3', 'C1', 'Cz', 'C2', 'C4', 'C6', 'CP5', 'CP3', 'CP1', 'CPz', 'CP2', 'CP4', 'CP6', 'hEOG', 'vEOG', 'F5', 'AF3', 'AF4', 'P5', 'P3', 'P1', 'Pz', 'P2', 'P4', 'P6', 'PO3', 'POz', 'PO4', 'Oz', 'F6'])

folder_list = os.listdir(dataset_path)

# Open CSV file for write and initialize the header
with open('/content/drive/MyDrive/eeg_dataset_right_hand_task.csv', "w") as csv_file:
  for label in labels:
    csv_file.write(label + ",")
  csv_file.write("\n")

  # Navigate through participants
  for participant_folder in folder_list:
 
    # Navigate through run folders for each participant
    for folder in os.listdir(dataset_path + participant_folder):
      date = folder.split("_")[1]

      # Navigate through the .npz recording file for each run
      run_identifier = 1
      for folder_run in os.listdir(dataset_path + participant_folder + "/" + folder):    
        folder_run_name = folder_run.split('.')[0]
        npz_data = np.load(dataset_path + participant_folder + "/" + folder+ "/" + folder_run)
        npz_data_dict = dict(npz_data)
        sample_rate = npz_data_dict['SampleRate'][0]
        marked_tasks = npz_data_dict['MarkOnSignal'] 
        data = npz_data_dict['signal']

        # We navigate throuhg marked tasks to find the index of the task we want to copy
        # then write an entire line on file for each of the subsequent 2500 rows
        trial = 1
        for task in marked_tasks:
          if task[1] == right_hand_task: 
            start_index = task[0]
            end_index = start_index + (sample_rate * task_duration) # copy all the 2500 lines starting from start_index
            for sample_index in range(start_index, end_index):  
              csv_file.write(participant_folder + ",")
              csv_file.write(date + ",")
              csv_file.write("Run"+ str(run_identifier) + ",")     
              csv_file.write(str(task[1]) + ",")   
              csv_file.write(str(trial) + ",")
              try:
                for channel_index in range(0, data[sample_index].size-1):
                  csv_file.write(str(data[sample_index][channel_index]) + ",")
                csv_file.write("\n")
              except:
                print("Exception found in:" + folder_run, "at index:" + str(sample_index))
                break 
            trial += 1

        run_identifier += 1 
        
        print("Run " + folder_run + " for participant " + participant_folder + " done!") 
    print("Participant " + participant_folder + " done!") 

In [None]:
# Run this cell to navigate through the Drive filesystem and extract a subset of the dataset for a specific task

dataset_path = '/content/drive/MyDrive/ml2-eeg-biometrics/raw-data/'

task_duration = 5 # Task average duration is 5  seconds

dataset = {}

left_hand_task = 769
right_hand_task = 770
both_feet_task = 771
idle_task = 780

labels = np.array(['Participant', 'Date', 'Run', 'Task','Trial','F3', 'F4', 'FC3', 'FC4', 'C3', 'Cz', 'C4', 'CP3', 'CP4'])
subset_indexes = np.array([0, 4, 6, 10, 13, 15, 17, 20, 24])
folder_list = os.listdir(dataset_path)

# Open CSV file for write and initialize the header
with open('/content/drive/MyDrive/eeg_dataset_right_hand_task_subset_9channels.csv', "w") as csv_file:
  for label in labels:
    csv_file.write(label + ",")
  csv_file.write("\n")

  # Navigate through participants
  for participant_folder in folder_list:
 
    # Navigate through run folders for each participant
    for folder in os.listdir(dataset_path + participant_folder):
      date = folder.split("_")[1]

      # Navigate through the .npz recording file for each run
      run_identifier = 1
      for folder_run in os.listdir(dataset_path + participant_folder + "/" + folder):    
        folder_run_name = folder_run.split('.')[0]
        npz_data = np.load(dataset_path + participant_folder + "/" + folder+ "/" + folder_run)
        npz_data_dict = dict(npz_data)
        sample_rate = npz_data_dict['SampleRate'][0]
        marked_tasks = npz_data_dict['MarkOnSignal'] 
        data = npz_data_dict['signal']

        # We navigate throuhg marked tasks to find the index of the task we want to copy
        # then write an entire line on file for each of the subsequent 2500 rows
        trial = 1
        for task in marked_tasks:
          if task[1] == right_hand_task: 
            start_index = task[0]
            end_index = start_index + (sample_rate * task_duration) # copy all the 2500 lines starting from start_index
            for trial_index in range(start_index, end_index):  
              csv_file.write(participant_folder + ",")
              csv_file.write(date + ",")
              csv_file.write("Run"+ str(run_identifier) + ",")     
              csv_file.write(str(task[1]) + ",")   
              csv_file.write(str(trial) + ",")
              try:
                for index in subset_indexes:
                  csv_file.write(str(data[trial_index][index]) + ",")
                csv_file.write("\n")
              except:
                print("Exception found in:" + folder_run, "at trial index:" + str(trial_index))
                break 
            trial += 1

        run_identifier += 1 
        
        print("Run " + folder_run + " for participant " + participant_folder + " done!") 
    print("Participant " + participant_folder + " done!") 

In [None]:
# Dataset reader
for participant in dataset:
  print(participant)
  for date in dataset[participant]:
    print(date)
    for trial in dataset[participant][date]:
      print(trial + ":")
      print(dataset[participant][date][trial])
    print("\n")
  print("\n")

In [None]:
example_file_name = '/content/drive/MyDrive/ml2-eeg-biometrics/raw-data/A1/A1_20190613/NSsignal_2019_06_13_19_16_33.npz'

npz_data = np.load(example_file_name)
npz_data_dict = dict(npz_data)
sample_rate = npz_data_dict['SampleRate'][0]
marks = npz_data_dict['MarkOnSignal'] 
data = npz_data_dict['signal']
channel_labels = np.array(['F3', 'F1', 'Fz', 'F2', 'F4', 'FC5', 'FC3', 'FC1', 'FCz', 'FC2', 'FC4', 'FC6', 'C5', 'C3', 'C1', 'Cz', 'C2', 'C4', 'C6', 'CP5', 'CP3', 'CP1', 'CPz', 'CP2', 'CP4', 'CP6'])

with open('eeg_dataset_example.csv', "w") as csv_file:
        trial = 0
        csv_file.write("Participant A1")
        csv_file.write("\n")
        csv_file.write("Day 20190613")
        csv_file.write("\n")
        csv_file.write("Run 2019_06_13_19_16_33")        
        csv_file.write("\n")
        for mark in marks:
          if mark[1] == 770: 
            start_index = mark[0]
            #dataset["A1"]["20190613"]["trial_1"] = data[start_index]
            csv_file.write("Trial" + str(trial))
            csv_file.write("\n")
            for channel in channel_labels:
              csv_file.write(str(channel) + ",")
            csv_file.write("\n")
            for i in range(0, channel_labels.size):
              csv_file.write(str(data[start_index][i]) + ",")
            csv_file.write("\n")
            trial += 1
print("Done writing CSV file")

In [None]:
example_file_name = '/content/drive/MyDrive/ml2-eeg-biometrics/raw-data/S1/S1_20200724/NSsignal_2020_07_24_10_19_46.npz'

npz_data = np.load(example_file_name)
npz_data_dict = dict(npz_data)
sample_rate = npz_data_dict['SampleRate'][0]
marks = npz_data_dict['MarkOnSignal'] 
data = npz_data_dict['signal']
print(npz_data_dict)

In [None]:
# Run this cell to save the changes

drive.flush_and_unmount()
print('All changes made in this colab session should now be visible in Drive.')

All changes made in this colab session should now be visible in Drive.
