In [18]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf

In [19]:
PASTA_EEG = "data/train_eegs/"
PASTA_FOURIER = "data/fourier/"
SPS = 200 #samples per second

frame_size = SPS
half_frame = frame_size//2
hop = (frame_size//2) * 4
frequency_bins = (SPS//2) +1
sampling_rate = SPS

In [52]:
def get_data(eeg):
    return pd.read_parquet(f'data/train_eegs/{eeg}.parquet', engine='pyarrow')


def generate_fourier(df, i, frame_size=frame_size, half_frame=half_frame):
    r_i = []
    for c in df.columns:
        f = np.fft.fft(  df[c][i:i+frame_size] - df[c][i:i+frame_size].mean() )
        r_i.append( np.concatenate([
                abs(f.real[:half_frame])/half_frame, 
                abs(f.imag[:half_frame])/half_frame
            ]
        ))

    return np.asarray(r_i)


def get_start_end(row, pos, max_size):
    if pos == len(row['janelas']):
        return max_size, max_size

    j = row['janelas'].split(',')[0].split('-')
    
    return float(j[0])*2, float(j[1])*2


def score_data(row, writer, hop=hop):
    df_data = get_data(row['eeg_id'])
    max_size = df_data.shape[0]
    j_pos = 0
    start, end = get_start_end(row, j_pos, max_size)

    for i in range(0, max_size-hop, hop):
        fourier = generate_fourier(df_data, i)
        print(fourier)

        if end < i:
            j_pos += 1
            start, end = get_start_end(row, j_pos, max_size)

        if i >= start and i <= end:
            scores = [row['seizure_vote'],row['lpd_vote'],row['gpd_vote'],row['lrda_vote'],row['grda_vote'],row['other_vote'], 0.0 ] 
        else:
            scores = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0] 


        writer.write(
            tf.train.Example(features=tf.train.Features(feature={
                "label": tf.train.Feature(float_list=tf.train.FloatList(value=scores)),
                "data": tf.train.Feature(float_list=tf.train.FloatList(value=fourier.reshape(4000) ))
            })).SerializeToString()
        )



In [None]:
df = pd.read_csv("data/scores.csv")

for i, split in enumerate(np.array_split(df, 5)):
    with tf.io.TFRecordWriter(f"{PASTA_FOURIER}data_{i}") as writer:
        split.apply(lambda x: score_data(x, writer), axis=1)

In [69]:
def generate_fourier(df, i, frame_size=frame_size, half_frame=half_frame):
    r_i = []
    for c in df.columns:
        f = np.fft.fft(  df[c][i:i+frame_size] - df[c][i:i+frame_size].mean() )
        r_i.append( np.concatenate([
                abs(f.real[:half_frame])/half_frame, 
                abs(f.imag[:half_frame])/half_frame
            ], axis=0
        ))

    return np.asarray(r_i)

In [70]:
t = generate_fourier(get_data(f[0]['eeg_id'].iloc[0]), 1)

In [71]:
t.shape

(20, 1, 200)

In [72]:
t

array([[[1.49536133e-05, 1.88909999e+01, 4.01110684e+01, ...,
         7.51017606e-02, 8.42409136e-04, 2.36584420e-02]],

       [[7.62939453e-08, 1.38098349e+01, 2.66036086e+01, ...,
         2.71526151e-02, 3.19597452e-02, 1.49903573e-02]],

       [[9.53674316e-09, 2.30658534e+01, 2.99881093e+01, ...,
         7.32917111e-02, 5.24045856e-03, 4.76459790e-02]],

       ...,

       [[1.70898438e-05, 5.98634083e+01, 1.13652924e+01, ...,
         5.13531670e-02, 1.93578020e-03, 2.25081060e-02]],

       [[2.31170654e-05, 7.19830511e+01, 2.64199295e+01, ...,
         9.72250342e-02, 4.25423313e-02, 2.02626579e-02]],

       [[2.01416016e-04, 1.60618940e+02, 3.71664930e+02, ...,
         4.77039597e+02, 1.11810829e+01, 2.43749069e+02]]])

In [53]:
with tf.io.TFRecordWriter(f"{PASTA_FOURIER}data_test") as writer:
    f[0].apply(lambda x: score_data(x, writer), axis=1)

[[5.49316406e-06 2.02674666e+01 4.07293879e+01 ... 8.84922593e-02
  7.45495398e-03 1.78099397e-02]
 [1.90734863e-06 1.51647413e+01 2.76923959e+01 ... 6.15789935e-02
  6.01219619e-03 5.31035451e-04]
 [9.53674316e-09 2.45629855e+01 3.01735238e+01 ... 6.54672549e-02
  9.79173281e-03 4.84844952e-02]
 ...
 [3.89099121e-06 6.30331836e+01 1.15566569e+01 ... 9.76675109e-02
  3.19930480e-02 6.18665466e-03]
 [4.95910645e-06 7.47009903e+01 2.46935355e+01 ... 1.50713752e-01
  2.60977801e-03 3.98896214e-02]
 [2.23388672e-04 1.80333527e+02 4.07862560e+02 ... 4.96546674e+02
  2.38325251e+01 2.51742958e+02]]
[[7.78198242e-06 6.08862594e+00 7.12402533e+00 ... 1.74762940e-02
  4.41419615e-02 4.85803221e-02]
 [5.53131104e-07 2.55739785e+01 2.21975791e+01 ... 4.71336462e-02
  5.06637492e-02 7.79333284e-03]
 [8.20159912e-07 3.86420952e+01 2.14993259e+01 ... 2.06576759e-02
  6.92236000e-04 1.67526271e-03]
 ...
 [5.53131104e-06 6.58458221e+01 1.84837635e+01 ... 5.48103084e-02
  2.67131419e-02 6.10959236e-02]

In [50]:
def parse_data(example):
    parsed = tf.io.parse_sequence_example(example,  {
        "label": tf.io.FixedLenFeature(shape=7, dtype=tf.float32),
        "data" : tf.io.FixedLenFeature(shape=(20,100,2), dtype=tf.float32)
    })

    return parsed[0]["data"], parsed[0]["label"]

In [51]:
next(iter(tf.data.TFRecordDataset(f"{PASTA_FOURIER}data_test").map(parse_data, num_parallel_calls=tf.data.AUTOTUNE)))

(<tf.Tensor: shape=(20, 100, 2), dtype=float32, numpy=
 array([[[5.49316428e-06, 2.02674675e+01],
         [4.07293892e+01, 7.42632151e+00],
         [2.22083759e+00, 5.23366117e+00],
         ...,
         [1.38732335e-02, 2.12338306e-02],
         [3.83852497e-02, 8.84922594e-02],
         [7.45495409e-03, 1.78099405e-02]],
 
        [[1.90734863e-06, 1.51647415e+01],
         [2.76923962e+01, 9.10976887e+00],
         [3.75745130e+00, 2.12716579e+00],
         ...,
         [1.00929685e-01, 5.88363297e-02],
         [2.09963210e-02, 6.15789928e-02],
         [6.01219619e-03, 5.31035475e-04]],
 
        [[9.53674295e-09, 2.45629864e+01],
         [3.01735229e+01, 5.70249367e+00],
         [4.68954897e+00, 1.53889036e+00],
         ...,
         [4.93815951e-02, 7.04703405e-02],
         [5.83215756e-03, 6.54672533e-02],
         [9.79173277e-03, 4.84844968e-02]],
 
        ...,
 
        [[3.89099114e-06, 6.30331841e+01],
         [1.15566568e+01, 2.55961895e+01],
         [3.4277439