In [17]:
#from __future__ import annotations

#import random
#import shutil
#from datetime import datetime
from pathlib import Path
from typing import Callable, Dict, Iterator, List, Optional, Sequence, Tuple, Union

#import numpy as np
import tensorflow as tf
import torch
from loguru import logger
#from torch.nn.utils.rnn import pad_sequence
#from tqdm import tqdm
from scipy.io import arff

Tensor = torch.Tensor

In [18]:
def get_eeg(data_dir: Path="../data/raw") -> Path:
    dataset_url = "https://archive.ics.uci.edu/ml/machine-learning-databases/00264/EEG%20Eye%20State.arff"  # noqa: E501
    datapath = tf.keras.utils.get_file(
        "eeg_data", origin=dataset_url, untar=False, cache_dir=data_dir
    )

    datapath = Path(datapath)
    logger.info(f"Data is downloaded to {datapath}.")
    return datapath

In [19]:
df = get_eeg()

2022-06-09 12:26:40.285 | INFO     | __main__:get_eeg:8 - Data is downloaded to ../data/raw/datasets/eeg_data.


In [20]:
from __future__ import annotations
from typing import Tuple
from tqdm import tqdm
import random
Tensor = torch.Tensor


class BaseDataset:
    def __init__(self, datapath: Path) -> None:
        self.path = datapath
        self.data =  self.process_data()

    def process_data(self) -> None:
        data = arff.loadarff(self.path)
        cur_label = int(data[0][0][14]) #index 14 = label
        EEG_list = [] #Lege lijst waarin meerdere observaties worden opgeslagen
        EEG_full = [] #Lege lijst waarin meerdere batches in worden samengevoegd.
        for obs in data[0]:
            if int(obs[14]) == cur_label:
                EEG_dim = [] #Lege lijst waarin de EEG_dim van een bepaalde observatie in kunnen worden opgeslagen.
                for index, i in enumerate(obs):
                    if index != 14:
                        EEG_dim.append(i)
                EEG_dim = torch.Tensor(EEG_dim)
                EEG_list.append(EEG_dim)
            else:
                EEG_full_label = (cur_label, torch.stack(EEG_list))
                EEG_full.append(EEG_full_label)
                cur_label = int(obs[14])
                EEG_list = [] #Lege lijst waarin meerdere observaties in kunnen worden opgeslagen.
                EEG_dim = [] #Lege lijst waarin de EEG_dim van een bepaalde observatie in kunnen worden opgeslagen.
                for index, i in enumerate(obs):
                    if index != 14:
                        EEG_dim.append(i)
                EEG_dim = torch.Tensor(EEG_dim)
                EEG_list.append(EEG_dim)
        EEG_full_label = (cur_label, torch.stack(EEG_list))
        EEG_full.append(EEG_full_label)
        return EEG_full

    def __len__(self) -> int:
        return len(self.data)
   
    def __getitem__(self, idx: int) -> Tuple:
        return self.data[idx]

In [62]:
dataloader = BaseDataset(datapath=get_eeg())

2022-06-09 12:53:08.447 | INFO     | __main__:get_eeg:8 - Data is downloaded to ../data/raw/datasets/eeg_data.


In [22]:
dataloader.__len__()

24

In [23]:
dataloader.__getitem__(0)[1][1]

tensor([4324.6201, 4004.6201, 4293.8501, 4148.7202, 4342.0498, 4586.6699,
        4097.4399, 4638.9702, 4210.7700, 4226.6699, 4207.6899, 4279.4902,
        4632.8198, 4384.1001])

In [35]:

class BaseDataIterator:
    """This iterator will consume all data and stop automatically.
    The dataset should have a:
        __len__ method
        __getitem__ method

    """

    def __init__(self, dataset: BaseDataset, batchsize: int) -> None:
        self.dataset = dataset
        self.batchsize = batchsize

    def __len__(self) -> int:
        return int(len(self.dataset) / self.batchsize)

    def __iter__(self) -> BaseDataIterator:
        self.index = 0
        self.index_list = torch.randperm(len(self.dataset))
        return self

    def batchloop(self) -> Tuple[int, Tensor]: 
        for i in range(len(self.dataset)):
            n_window = len(self.dataset[i][1]) - 5 + 1
            time = torch.arange(0, 5).reshape(1, -1)
            window = torch.arange(0, n_window).reshape(-1, 1)
            self.idx = time + window
            self.test = self.dataset[i][1][idx]
        for _ in range(self.batchsize):
            x, y = self.test[int(self.index_list[self.index])]
            X.append(x)
            Y.append(y)
            self.index += 1
        return X, Y

    def __next__(self) -> Tuple[Tensor, Tensor]:
        if self.index <= (len(self.dataset) - self.batchsize):
            X, Y = self.batchloop()  # noqa N806
            return torch.tensor(X), torch.tensor(Y)
        else:
            raise StopIteration





In [63]:
def window(x: Tensor, n_time: int) -> Tensor:
    """
    Generates and index that can be used to window a timeseries.
    E.g. the single series [0, 1, 2, 3, 4, 5] can be windowed into 4 timeseries with
    length 3 like this:

    [0, 1, 2]
    [1, 2, 3]
    [2, 3, 4]
    [3, 4, 5]

    We now can feed 4 different timeseries into the model, instead of 1, all
    with the same length.
    """
    n_window = len(x) - n_time + 1
    time = torch.arange(0, n_time).reshape(1, -1)
    window = torch.arange(0, n_window).reshape(-1, 1)
    idx = time + window
    return idx

In [49]:
dataloader[0][1]

tensor([[4329.2300, 4009.2300, 4289.2300,  ..., 4280.5098, 4635.8999,
         4393.8501],
        [4324.6201, 4004.6201, 4293.8501,  ..., 4279.4902, 4632.8198,
         4384.1001],
        [4327.6899, 4006.6699, 4295.3799,  ..., 4282.0498, 4628.7202,
         4389.2300],
        ...,
        [4468.2100, 4044.6201, 4305.1299,  ..., 4367.6899, 4833.8501,
         4571.7900],
        [4461.0298, 4041.0300, 4300.0000,  ..., 4365.1299, 4826.6699,
         4558.4600],
        [4452.8198, 4032.3101, 4295.3799,  ..., 4353.3301, 4808.2100,
         4549.2300]])

In [57]:
idx = window(dataloader, 4)
idx.shape
# datasetset = dataloader[idx][0]
# datasetset.shape

torch.Size([21, 4])

In [69]:
len(dataloader[0][1])

188

In [71]:
 b = torch.tensor([[0, 1], [2, 3]])
  torch.reshape(b, (-1,))
tensor([ 0,  1,  2,  3])

TypeError: reshape(): argument 'shape' (position 2) must be tuple of ints, not int

In [93]:
for i in range(24):
    print(i)

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23


In [89]:
index_list = torch.randperm(len(dataloader))
x, y = dataloader[int(index_list[i])]
y

tensor([[4394.3599, 4020.5100, 4272.3101,  ..., 4312.3101, 4717.9502,
         4486.6699],
        [4382.0498, 4011.7900, 4260.0000,  ..., 4304.6201, 4714.8701,
         4472.3101],
        [4362.0498, 4003.5901, 4254.8701,  ..., 4294.3599, 4708.2100,
         4453.8501],
        ...,
        [4205.1299, 3963.0801, 4221.0298,  ..., 4241.5400, 4541.0298,
         4269.2300],
        [4210.2598, 3956.9199, 4218.9702,  ..., 4245.6401, 4533.8501,
         4273.3301],
        [4220.0000, 3966.1499, 4227.6899,  ..., 4250.7700, 4540.5098,
         4280.5098]])

In [120]:
for i in range(len(dataloader)):
    test_list =[]
    n_window = len(dataloader[i][1]) - 5 + 1
    time = torch.arange(0, 5).reshape(1, -1)
    window = torch.arange(0, n_window).reshape(-1, 1)
    idx = time + window
    test = dataloader[i][1][idx]
    #test_list.append(test)
    #test_full = (dataloader[i][0], torch.stack(test))


In [121]:
len(test)

17

tensor([[[4312.3101, 4022.0500, 4278.4600,  ..., 4284.6201, 4612.3101,
          4368.2100],
         [4304.1001, 4016.9199, 4273.8501,  ..., 4282.5601, 4606.1499,
          4364.1001],
         [4303.0801, 4016.9199, 4270.7700,  ..., 4276.9199, 4602.0498,
          4362.5601],
         [4304.6201, 4018.4600, 4272.3101,  ..., 4278.4600, 4602.0498,
          4360.0000],
         [4301.0298, 4013.8501, 4268.7202,  ..., 4278.9702, 4603.0801,
          4360.0000]],

        [[4304.1001, 4016.9199, 4273.8501,  ..., 4282.5601, 4606.1499,
          4364.1001],
         [4303.0801, 4016.9199, 4270.7700,  ..., 4276.9199, 4602.0498,
          4362.5601],
         [4304.6201, 4018.4600, 4272.3101,  ..., 4278.4600, 4602.0498,
          4360.0000],
         [4301.0298, 4013.8501, 4268.7202,  ..., 4278.9702, 4603.0801,
          4360.0000],
         [4300.0000, 4009.2300, 4263.5898,  ..., 4277.4399, 4601.0298,
          4362.0498]],

        [[4303.0801, 4016.9199, 4270.7700,  ..., 4276.9199, 4602.0

In [115]:

n_window = len(dataloader[0][1]) - 5 + 1
time = torch.arange(0, 5).reshape(1, -1)
window = torch.arange(0, n_window).reshape(-1, 1)
idx = time + window
test = dataloader[0][1][idx]
len(test)

184

In [36]:
test_1 = BaseDataIterator(dataloader,12)

In [38]:
test_1.__next__()

AttributeError: 'BaseDataIterator' object has no attribute 'index'

In [14]:
def window(x: Tensor, n_time: int) -> Tensor:
    """
    Generates and index that can be used to window a timeseries.
    E.g. the single series [0, 1, 2, 3, 4, 5] can be windowed into 4 timeseries with
    length 3 like this:

    [0, 1, 2]
    [1, 2, 3]
    [2, 3, 4]
    [3, 4, 5]

    We now can feed 4 different timeseries into the model, instead of 1, all
    with the same length.
    """
    n_window = len(x) - n_time + 1
    time = torch.arange(0, n_time).reshape(1, -1)
    window = torch.arange(0, n_window).reshape(-1, 1)
    idx = time + window
    return idx

In [18]:
window_size = 1
idx = window(dataloader, window_size)
idx.shape()


TypeError: 'torch.Size' object is not callable

In [None]:
dataloader.__getitem_

In [7]:
dataloader = BaseDataset(datapath=get_eeg())

2022-06-07 12:18:25.516 | INFO     | __main__:get_eeg:8 - Data is downloaded to ../data/raw/datasets/eeg_data.


In [7]:
dataloader.__len__()

24

In [8]:
dataloader.__getitem__(23)

(1,
 tensor([[4304.1001, 4016.9199, 4273.8501, 4145.1299, 4340.0000, 4623.5898,
          4084.1001, 4646.6699, 4223.0801, 4229.2300, 4182.0498, 4282.5601,
          4606.1499, 4364.1001],
         [4303.0801, 4016.9199, 4270.7700, 4138.9702, 4342.0498, 4621.5400,
          4082.5601, 4645.6401, 4220.0000, 4229.2300, 4180.5098, 4276.9199,
          4602.0498, 4362.5601],
         [4304.6201, 4018.4600, 4272.3101, 4139.4902, 4340.0000, 4619.4902,
          4082.5601, 4637.9502, 4203.0801, 4226.6699, 4179.4902, 4278.4600,
          4602.0498, 4360.0000],
         [4301.0298, 4013.8501, 4268.7202, 4140.0000, 4341.0298, 4617.4399,
          4082.0500, 4626.1499, 4196.4102, 4224.1001, 4177.9502, 4278.9702,
          4603.0801, 4360.0000],
         [4300.0000, 4009.2300, 4263.5898, 4138.4600, 4342.5601, 4617.9502,
          4083.5901, 4623.0801, 4205.6401, 4226.6699, 4177.9502, 4277.4399,
          4601.0298, 4362.0498],
         [4302.5601, 4008.7200, 4265.1299, 4138.9702, 4336.4102, 4618.4