In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import gdown
import os
import re

In [None]:
data_url = "https://drive.google.com/file/d/1-v02Ll1rRkc_GG9h3I1-WSYCI_8A4O6w/view?usp=sharing"
gdown.download(data_url, fuzzy=True)

Downloading...
From: https://drive.google.com/uc?id=1-v02Ll1rRkc_GG9h3I1-WSYCI_8A4O6w
To: /content/class.rar
100%|██████████| 95.1k/95.1k [00:00<00:00, 78.1MB/s]


'class.rar'

In [None]:
CLASS_MAP = {"yukari": "Up",
             "asagi": "Down",
             "sag": "Right",
             "sol": "Left",
             "kirp": "Blink"}

In [None]:
!unrar e class.rar data/


UNRAR 5.61 beta 1 freeware      Copyright (c) 1993-2018 Alexander Roshal


Extracting from class.rar


Would you like to replace the existing file data/asagi10h.txt
  1241 bytes, modified on 2008-12-30 07:24
with a new one
  1241 bytes, modified on 2008-12-30 07:24

[Y]es, [N]o, [A]ll, n[E]ver, [R]ename, [Q]uit 
User break

User break


In [None]:
re.search("\d+", "yukarı17h.txt")[0]

'17'

In [None]:
files = sorted(os.listdir("data"))

In [None]:
len(files)

278

In [None]:
labels = []
ids = []
polarities = []
data = np.ones(251)
for file_name in files:
  label = re.search("[a-zA-Zöı]+", file_name)[0]
  if label.lower() not in ["yukari", "asagi", "sag", "sol", "kirp"]:
    print("Discarding file: " + file_name)
    continue

  id = re.search("\d+", file_name)[0]
  if len(str(id)) > 2:
    print("Discarding file: " + file_name)
    continue


  polarity = file_name[-5]
  if polarity not in ["h", "v"]:
    print("Discarding file: " + file_name)
    continue

  samples = np.loadtxt("data/" + file_name)
  data = np.vstack((data, samples))

  labels.append(label)
  ids.append(id)
  polarities.append(polarity)

Discarding file: Edit2.txt
Discarding file: Edit2h.txt
Discarding file: Edit2v.txt
Discarding file: kayit.txt
Discarding file: kirp111h.txt
Discarding file: serkanh.txt
Discarding file: serkanv.txt
Discarding file: yukarih.txt
Discarding file: yukarı10h.txt
Discarding file: yukarı11h.txt
Discarding file: yukarı12h.txt
Discarding file: yukarı13h.txt
Discarding file: yukarı14h.txt
Discarding file: yukarı15h.txt
Discarding file: yukarı16h.txt
Discarding file: yukarı17h.txt
Discarding file: yukarı18h.txt
Discarding file: yukarı19h.txt
Discarding file: yukarı1h.txt
Discarding file: yukarı20h.txt
Discarding file: yukarı2h.txt
Discarding file: yukarı3h.txt
Discarding file: yukarı4h.txt
Discarding file: yukarı5h.txt
Discarding file: yukarı6h.txt
Discarding file: yukarı7h.txt
Discarding file: yukarı8h.txt
Discarding file: yukarı9h.txt
Discarding file: yukarıh.txt
Discarding file: örnek1.txt
Discarding file: örnek10h.txt
Discarding file: örnek11h.txt
Discarding file: örnek12h.txt
Discarding file

In [None]:
df = pd.DataFrame({"id": ids,
              "label": labels,
              "polarity": polarities})

In [None]:
samples = pd.DataFrame(data[1:, :])

In [None]:
dataset = pd.concat([df,samples], axis=1)

In [None]:
dataset['polarity'].value_counts()

h    100
v    100
Name: polarity, dtype: int64

In [None]:
dataset['label'].value_counts()

asagi     40
kirp      40
sag       40
sol       40
yukari    40
Name: label, dtype: int64

In [None]:
dataset['label'].replace({"yukarı": "yukari"}, inplace=True)

In [None]:
dataset['label'].value_counts()

asagi     40
kirp      40
sag       40
sol       40
yukari    40
Name: label, dtype: int64

In [None]:
dataset['label'].replace(CLASS_MAP, inplace=True)

In [None]:
dataset['label'].value_counts()

Down     40
Blink    40
Right    40
Left     40
Up       40
Name: label, dtype: int64

In [None]:
dataset

Unnamed: 0,id,label,polarity,0,1,2,3,4,5,6,...,241,242,243,244,245,246,247,248,249,250
0,10,Down,h,123.0,126.0,134.0,131.0,125.0,131.0,130.0,...,149.0,143.0,139.0,143.0,141.0,139.0,138.0,142.0,136.0,137.0
1,10,Down,v,123.0,122.0,127.0,130.0,123.0,123.0,127.0,...,137.0,134.0,133.0,130.0,136.0,135.0,130.0,139.0,136.0,129.0
2,11,Down,h,127.0,135.0,130.0,135.0,129.0,127.0,133.0,...,135.0,143.0,140.0,139.0,144.0,142.0,139.0,139.0,143.0,134.0
3,11,Down,v,120.0,132.0,120.0,127.0,126.0,125.0,120.0,...,121.0,128.0,137.0,131.0,124.0,133.0,129.0,124.0,131.0,130.0
4,12,Down,h,133.0,137.0,134.0,132.0,137.0,136.0,135.0,...,152.0,143.0,141.0,148.0,148.0,144.0,149.0,147.0,150.0,150.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195,7,Up,v,126.0,131.0,129.0,130.0,130.0,131.0,130.0,...,143.0,140.0,139.0,142.0,148.0,140.0,139.0,143.0,146.0,124.0
196,8,Up,h,146.0,145.0,144.0,147.0,143.0,143.0,146.0,...,125.0,135.0,131.0,129.0,128.0,132.0,132.0,126.0,133.0,153.0
197,8,Up,v,127.0,119.0,125.0,125.0,126.0,126.0,121.0,...,135.0,139.0,145.0,144.0,140.0,141.0,140.0,136.0,142.0,133.0
198,9,Up,h,147.0,141.0,140.0,140.0,145.0,137.0,145.0,...,143.0,133.0,142.0,138.0,141.0,138.0,145.0,138.0,142.0,144.0


In [None]:
dataset.to_csv("EOG_dataset_2.csv", encoding='utf-8-sig', index=False)