In [17]:
import plotly.graph_objects as go
import plotly.express as px
import numpy as np
import os
import sys

The dataset is publicly available at https://www.kaggle.com/datasets/daviderusso7/seed-dataset

## Read the dataset files

In [18]:
users = dict()
for file in os.listdir(f'./Data25/25-users/'):
    username = file.split('_')
    username = username[0] if len(username) <= 2 else f"{username[0]}_{username[1]}"
    users[username] = users[username] + 1 if username in users else 1
users

{'rajkumar': 42,
 'Vijay': 42,
 'Ankur_sir': 41,
 'pawan_sahu': 41,
 'mahendra': 42,
 'Suraj_sir': 42,
 'Sandeep': 42,
 'Rupak': 42,
 'Soumendu': 42,
 'Mohit': 42,
 'Rajesh_el': 42,
 'pradeep': 42,
 'Veerpal': 41,
 'Viraj_1': 42,
 'Kishore_babu': 42,
 'Vipin_1': 42,
 'Abhishek': 42,
 'Sachin': 42,
 'Rockysingh': 41,
 'Gautam_123': 42,
 'taufiq': 41,
 'Gautam': 42,
 'Ravi_baba': 42,
 'Girvar_yadav': 42,
 'Ravi_ph': 42}

In [19]:
usernames = [k for k in users.keys()]
usernames

['rajkumar',
 'Vijay',
 'Ankur_sir',
 'pawan_sahu',
 'mahendra',
 'Suraj_sir',
 'Sandeep',
 'Rupak',
 'Soumendu',
 'Mohit',
 'Rajesh_el',
 'pradeep',
 'Veerpal',
 'Viraj_1',
 'Kishore_babu',
 'Vipin_1',
 'Abhishek',
 'Sachin',
 'Rockysingh',
 'Gautam_123',
 'taufiq',
 'Gautam',
 'Ravi_baba',
 'Girvar_yadav',
 'Ravi_ph']

In [20]:
channels = ["AF3", "F7", "F3", "FC5", "T7", "P7", "O1", "O2", "P8", "T8", "FC6", "F4", "F8", "AF4"]

In [21]:
def readEEGFile(path):
    file_content = []
    with open(path,"r",encoding="utf-8") as f:
        for line in f:
            file_content.append(np.array(line.split(" ")).astype(np.float64))
    return np.array(file_content).T

def readLabel(path):
    file_content = []
    with open(path,"r",encoding="utf-8") as f:
        file_content.append(f.readline().strip())
    return np.array(file_content)

def getChannel(ch):
    return channels.index(ch)

def getUserName(index : int):
    return usernames[index]

def getUserIdx(user : str):
    return usernames.index(user)

In [22]:
getChannel("F3") # 2

2

In [23]:
eegSample = readEEGFile("Data25/25-users/Abhishek_1.txt")
px.line(eegSample[getChannel("FC5")])

In [24]:
labels_ex = readLabel("Data25/labels/Abhishek_1.lab")

In [25]:
dataset = []
labels = []

In [26]:
for userIdx in range(len(users.keys())):
    dataset.append([])
    for eegIdx in range(1, users[getUserName(userIdx)] + 1):
        dataset[userIdx].append(readEEGFile(f"./Data25/25-users/{getUserName(userIdx)}_{eegIdx}.txt"))
    dataset[userIdx] = np.array(dataset[userIdx])
dataset

[array([[[4238.461538, 4236.410256, 4222.564103, ..., 4200.      ,
          4195.897436, 4192.820513],
         [4244.615385, 4247.179487, 4229.74359 , ..., 4223.589744,
          4220.      , 4211.282051],
         [4234.358974, 4235.384615, 4223.589744, ..., 4209.230769,
          4207.692308, 4198.974359],
         ...,
         [4240.      , 4241.538462, 4236.410256, ..., 4216.410256,
          4219.487179, 4218.974359],
         [4230.25641 , 4228.717949, 4218.461538, ..., 4184.102564,
          4189.74359 , 4187.179487],
         [4238.461538, 4229.230769, 4231.282051, ..., 4203.076923,
          4199.487179, 4194.358974]],
 
        [[4195.384615, 4194.871795, 4196.923077, ..., 4176.410256,
          4177.948718, 4166.153846],
         [4213.333333, 4222.051282, 4221.538462, ..., 4180.512821,
          4187.692308, 4170.25641 ],
         [4203.076923, 4208.717949, 4201.538462, ..., 4242.051282,
          4243.076923, 4229.74359 ],
         ...,
         [4218.974359, 4218.97435

In [27]:
len(dataset)

25

## The dataset has the following shape: 

In [34]:
px.line(dataset[getUserIdx("rajkumar")][1][getChannel("AF3")])

In [35]:
for userIdx in range(len(dataset)):
    labels.append([])
    user = getUserName(userIdx)
    for eegIdx in range(1, users[user] + 1):
        label = readLabel(f"./Data25/labels/{user}_{eegIdx}.lab")
        labels[userIdx].append(1 if label == "Disike" else 0)
    labels[userIdx] = np.array(labels[userIdx])
labels[getUserIdx("rajkumar")]

array([0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0,
       0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1])

### Small EDA

In [229]:
print(f"Min feature value: {np.min(dataset[0])}")
print(f"Max feature value: {np.max(dataset[0])}")
print(f"Mean feature value: {np.mean(dataset[0])}")
print(f"Median feature value: {np.median(dataset[0])}")
print(f"Any negative values?: {np.any(dataset[0] < 0)}")

Min feature value: 2094.871795
Max feature value: 4497.948718
Mean feature value: 4222.53425209544
Median feature value: 4225.128205
Any negative values?: False
