In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
root_folder = '/content/drive/MyDrive/MLMA_Data/BS-HMS-Dataset/NeuroSky-based-Data'

In [None]:
users = []
sessions = []
activities = []
csv_paths = []

for user_folder in os.listdir(root_folder):
    # folder name is in format "UserXX"
    if user_folder.startswith('User') and len(user_folder) == 6 and user_folder[4:].isdigit():
        user_path = os.path.join(root_folder, user_folder)

        # Both Sessions exist
        session_folders = [f for f in os.listdir(user_path) if os.path.isdir(os.path.join(user_path, f))]
        if 'Session-I' in session_folders and 'Session-II' in session_folders:
            for session_folder in session_folders:
                session_path = os.path.join(user_path, session_folder)

                activity_folders = [f for f in os.listdir(session_path) if os.path.isdir(os.path.join(session_path, f))]
                if len(activity_folders) == 4:  # 4 activities
                    for activity_folder in activity_folders:
                        activity_path = os.path.join(session_path, activity_folder)
                        neurosky_path = os.path.join(activity_path, 'Neurosky')

                        if os.path.exists(neurosky_path):
                            csv_file = os.path.join(neurosky_path, 'Neurosky.csv')
                            if os.path.isfile(csv_file):
                                users.append(user_folder)
                                sessions.append(session_folder)
                                activities.append(activity_folder)
                                csv_paths.append(str(csv_file))
                        else:
                            print("Neurosky folder not found in:", activity_folder)
                else:
                    print("Ignoring user", user_folder, "due to missing activity folders")
        else:
            print("Session folders not found in:", user_folder)

Ignoring user User02 due to missing activity folders


In [None]:
data = {'User': users, 'Session': sessions, 'Activity': activities, 'CSV_Path': csv_paths}
df = pd.DataFrame(data)

In [None]:
df_sorted = df.sort_values(by='User')
df_sorted

Unnamed: 0,User,Session,Activity,CSV_Path
0,User01,Session-II,Activity_One,/content/drive/MyDrive/MLMA_Data/BS-HMS-Datase...
1,User01,Session-II,Activity_Two,/content/drive/MyDrive/MLMA_Data/BS-HMS-Datase...
2,User01,Session-II,Activity_Three,/content/drive/MyDrive/MLMA_Data/BS-HMS-Datase...
3,User01,Session-II,Activity_Four,/content/drive/MyDrive/MLMA_Data/BS-HMS-Datase...
4,User01,Session-I,Activity_Three,/content/drive/MyDrive/MLMA_Data/BS-HMS-Datase...
...,...,...,...,...
122,User32,Session-II,Activity_Three,/content/drive/MyDrive/MLMA_Data/BS-HMS-Datase...
121,User32,Session-II,Activity_One,/content/drive/MyDrive/MLMA_Data/BS-HMS-Datase...
120,User32,Session-II,Activity_Two,/content/drive/MyDrive/MLMA_Data/BS-HMS-Datase...
127,User32,Session-I,Activity_Four,/content/drive/MyDrive/MLMA_Data/BS-HMS-Datase...


In [None]:
# Process csv to df
def process_to_df(csv_path):
  df = pd.read_csv(csv_path, sep='\t', header=None)
  df = df[0].str.split(',', expand=True)
  df = df.iloc[1:]
  df.columns = ['Column1', 'Column2', 'Column3']
  df2 = df[['Column2','Column3']]
  df2 = df2[df2.iloc[:, 0] != "Blink"]
  return df2

In [None]:
# Function to extract features
def extract_features(df):

  extracted_data = []
  chunk_start = 0
  chunk_end = 0

  while chunk_end < len(df):
      combined_data = []
      while chunk_end < len(df) and df.iloc[chunk_end, 0] != "Meditation":
          chunk_end += 1

      if chunk_end - chunk_start > 524:
        chunk_end += 1
        chunk_start = chunk_end
        continue

      # Check if the chunk contains "Raw" and "Meditation"
      if chunk_start < len(df) and chunk_end < len(df):
          chunk_data = df.iloc[chunk_start:chunk_end + 1]

          raw_data = chunk_data[chunk_data.iloc[:, 0] == "Raw"].iloc[:, 1].tolist()
          raw_data += [0] * (512 - len(raw_data))

          all_category_data = []
          for category in ['Delta', 'highAlpha', 'lowAlpha', 'lowBeta', 'highBeta', 'lowGamma', 'midGamma', 'theta', 'Attention', 'Meditation']:
                  category_data = chunk_data[chunk_data.iloc[:, 0] == category].iloc[:, 1].tolist()
                  all_category_data.append(category_data)


          flattened_data = [item for sublist in all_category_data for item in sublist]
          combined_data = raw_data + flattened_data

      # Move to next trunk
      extracted_data.append(combined_data)
      chunk_end += 1
      chunk_start = chunk_end

  return extracted_data

In [None]:
def convert_to_int(extracted_data):
  extracted_int = [[int(x) for x in sublist] for sublist in extracted_data]
  extracted_int = extracted_int[:-1]
  extracted_int = np.array(extracted_int)
  return extracted_int

In [None]:
matrix = []
for i in range(len(df_sorted['CSV_Path'])):
  if i % 30 == 0:
    print(i)
  df = process_to_df(df_sorted['CSV_Path'][i])
  extracted_data = extract_features(df)
  extracted_int = convert_to_int(extracted_data)
  matrix.append(extracted_int)

0
30
60
90
120
150
180
210
240


In [None]:
df_sorted['Features'] = matrix
df_sorted

Unnamed: 0,User,Session,Activity,CSV_Path,Features
0,User01,Session-II,Activity_One,/content/drive/MyDrive/MLMA_Data/BS-HMS-Datase...,"[[38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, ..."
1,User01,Session-II,Activity_Two,/content/drive/MyDrive/MLMA_Data/BS-HMS-Datase...,"[[38, 38, 39, 38, 38, 38, 38, 38, 38, 39, 38, ..."
2,User01,Session-II,Activity_Three,/content/drive/MyDrive/MLMA_Data/BS-HMS-Datase...,"[[38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, ..."
3,User01,Session-II,Activity_Four,/content/drive/MyDrive/MLMA_Data/BS-HMS-Datase...,"[[38, 38, 38, 37, 38, 37, 38, 38, 38, 38, 38, ..."
4,User01,Session-I,Activity_Three,/content/drive/MyDrive/MLMA_Data/BS-HMS-Datase...,"[[38, 39, 39, 39, 39, 39, 38, 39, 39, 39, 38, ..."
...,...,...,...,...,...
122,User32,Session-II,Activity_Three,/content/drive/MyDrive/MLMA_Data/BS-HMS-Datase...,"[[-282, -281, -278, -276, -275, -273, -269, -2..."
121,User32,Session-II,Activity_One,/content/drive/MyDrive/MLMA_Data/BS-HMS-Datase...,"[[-279, -277, -275, -273, -269, -267, -265, -2..."
120,User32,Session-II,Activity_Two,/content/drive/MyDrive/MLMA_Data/BS-HMS-Datase...,"[[-1011, -970, -935, -900, -1122, -1090, -1057..."
127,User32,Session-I,Activity_Four,/content/drive/MyDrive/MLMA_Data/BS-HMS-Datase...,"[[38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, ..."


In [None]:
df_sorted.to_csv("Neurosky_eeg.csv", index=False)

from google.colab import files
files.download("Neurosky_eeg.csv")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>