In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from data_wrangling.datamanager import DataLoader as DL
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import scipy.signal as sig
import sys
import pickle
from welford import Welford

In [2]:
t_in = 100

In [3]:
nperseg = 100
noverlap = 99
nhop = nperseg - noverlap
logfft = 8

In [4]:
dl = DL()
sessions = ['s1', 's4', 's5', 's6', 's7', 's10']
data = dl.get_fcx2(sessions)

In [5]:
def spectrogram_base(
        signal,
        nperseg = 100,
        noverlap = 99,
        nfft = 2 ** 8,
        f_lo=10,
        f_hi=100,
        window = 'boxcar'
    ):
        freqs, times, spect = sig.spectrogram(
            signal,
            1000,
            window,
            nperseg = nperseg,
            noverlap = noverlap,
            nfft = nfft,
            scaling = 'spectrum'
        )
        freqs_idxs = (freqs >= f_lo) & (freqs <= f_hi)
        spect = spect[freqs_idxs].T
        spect = np.sqrt(spect)

        return freqs, freqs_idxs, times, spect

In [6]:
def get_as_array(session):
    print("Retrieving data from session %s..."%session)
    spects = []
    arrs = data[session]['data']
    mn = arrs.mean(0).reshape((1, -1))
    st = arrs.std (0).reshape((1, -1))
    arrs -= mn
    arrs /= st
    print("...finished")
    return arrs


Xs = []
spect_params = []
for s in sessions:
    X = get_as_array(s)
    Xs.append(X)

Retrieving data from session s1...
...finished
Retrieving data from session s4...
...finished
Retrieving data from session s5...
...finished
Retrieving data from session s6...
...finished
Retrieving data from session s7...
...finished
Retrieving data from session s10...
...finished


In [7]:
n_channelss = [X.shape[1] for X in Xs]
print(n_channelss)

[73, 66, 172, 91, 110, 110]


In [8]:
data = 0
# Indexing/setup

train_frac = 0.5
val_frac   = 0.2

train_ranges = np.array([
    (
        0,
        int(X.shape[0] * train_frac) - t_in - nperseg
    )
    for X in Xs
])

val_ranges = np.array([
    (
        train_max + t_in, 
        train_max + int(X.shape[0] * val_frac) - t_in - nperseg
    )
    for (_, train_max), X in zip(train_ranges, Xs)
])

test_ranges = np.array([
    (
        val_max + t_in,
        len(X) - t_in - nperseg
    )
    for (val_min, val_max), X in zip(val_ranges, Xs)
])

np.concatenate([
    train_ranges,
    val_ranges,
    test_ranges
], 1)

array([[      0,  651776,  651876,  912366,  912466, 1303752],
       [      0,  488509,  488609,  683792,  683892,  977218],
       [      0,  508799,  508899,  712198,  712298, 1017799],
       [      0,  419299,  419399,  586898,  586998,  838799],
       [      0,  381299,  381399,  533698,  533798,  762799],
       [      0,  407746,  407846,  570724,  570824,  815692]])

In [9]:
# Data generator

n_sessions = len(sessions)

def get_random_data_idxs(sess_no, n, mode = 'train'):
    if mode == 'train':
        _idxs = np.arange(*list(train_ranges[sess_no].copy()))
    elif mode == 'val':
        _idxs = np.arange(*list(val_ranges[sess_no].copy()))
    elif mode == 'test':
        _idxs = np.arange(*list(test_ranges[sess_no].copy()))
        
    np.random.shuffle(_idxs)
    channels = np.random.choice(n_channelss[sess_no], size=n, replace=True)
    return np.array([
            _idxs[:n], 
            _idxs[:n] + t_in + nperseg - nhop, 
            channels
    ]).T

In [10]:
def data_generator(
    batch_size,
    steps_per_epoch,
    epochs,
    normalize = False,
    mode = 'train'
):
    
    for _ in range(steps_per_epoch * epochs):
        
        # Select session indices
        # number of samples for each session
        counts = pd.Series(
            np.random.randint(n_sessions, size = batch_size)
        ).value_counts()

        # Session-wise indices
        # [[start, end, chan] per session]
        idxs = [
            get_random_data_idxs(idx, n, mode = mode)
            for idx, n in enumerate(counts)
        ]
        
        
        samples = np.concatenate([
            np.array([
                X[lo : hi, chan] 
                for lo, hi, chan in idx
            ])
            for X, idx in zip(Xs, idxs)
        ])
        
        # Alt 1. False
        if normalize == False:
            samples = np.stack([
                spectrogram_base(
                    samp,
                    nperseg,
                    noverlap,
                    2 ** logfft
                )[3]
                for samp in samples
            ])
            
        # Alt 2. Mean, std tuple
        else:
            mn, st = normalize
            samples = (np.stack([
                spectrogram_base(
                    samp,
                    nperseg,
                    noverlap,
                    2 ** logfft
                )[3]
                for samp in samples
            ]) - mn) / st
        
        yield (samples, samples)

In [11]:
welford_from_generator = Welford()
w_epochs = 5
w_batch_size = 5000
# 5 * t_in * 5000 = 12.5M
for x, _ in data_generator(w_batch_size, 1, w_epochs):
    for spects in x:
        welford_from_generator.add_all(spects)

In [12]:
mn, st = welford_from_generator.mean.reshape((1, -1)), np.sqrt(welford_from_generator.var_p).reshape((1, -1))

In [13]:
model = keras.Sequential([
    layers.Input((t_in, 23)),
    layers.Bidirectional(
        layers.LSTM(64, return_sequences = True)
    ),
    layers.Bidirectional(
        layers.LSTM(64)
    ),
    layers.RepeatVector(t_in),
    layers.LSTM(128, return_sequences = True),
    layers.Bidirectional(
        layers.LSTM(64, return_sequences = True)
    ),
    layers.Bidirectional(
        layers.LSTM(64, return_sequences = True)
    ),
    layers.Dense(23)
])
model.compile(loss = 'mse', optimizer = 'adam')
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
bidirectional (Bidirectional (None, 100, 128)          45056     
_________________________________________________________________
bidirectional_1 (Bidirection (None, 128)               98816     
_________________________________________________________________
repeat_vector (RepeatVector) (None, 100, 128)          0         
_________________________________________________________________
lstm_2 (LSTM)                (None, 100, 128)          131584    
_________________________________________________________________
bidirectional_2 (Bidirection (None, 100, 128)          98816     
_________________________________________________________________
bidirectional_3 (Bidirection (None, 100, 128)          98816     
_________________________________________________________________
dense (Dense)                (None, 100, 23)           2

In [14]:
# Training part 1

batch_size      = 1024
steps_per_epoch = 100
epochs          = 100

validation_batch_size     = batch_size
vaidation_steps_per_epoch = 30
# Necessary for function
validation_epochs         = 1

hist1 = model.fit(
    # Training data and configuration
    x = data_generator(
        batch_size, 
        steps_per_epoch, 
        epochs,
        normalize = (mn, st)
    ),
    batch_size      = batch_size,
    steps_per_epoch = steps_per_epoch,
    epochs          = epochs,
    
    # Validation data
    validation_data = tf.data.Dataset.from_generator(
        lambda: data_generator(
            validation_batch_size, 
            vaidation_steps_per_epoch, 
            validation_epochs,
            mode = 'val',
            normalize = (mn, st)
        ),
        output_signature = (
            tf.TensorSpec(shape = (validation_batch_size, t_in, 23), dtype = tf.float64),
            tf.TensorSpec(shape = (validation_batch_size, t_in, 23), dtype = tf.float64),
        )
    ),
    validation_batch_size = validation_batch_size,
    validation_steps      = vaidation_steps_per_epoch,
    
    callbacks = [keras.callbacks.EarlyStopping(
        patience = 2,
        min_delta = 1e-3
    )]
)

batch_size      = 128
steps_per_epoch = 300
epochs          = 100

hist2 = model.fit(
    # Training data and configuration
    x = data_generator(
        batch_size, 
        steps_per_epoch, 
        epochs,
        normalize = (mn, st)
    ),
    batch_size      = batch_size,
    steps_per_epoch = steps_per_epoch,
    epochs          = epochs,
    
    # Validation data
    validation_data = tf.data.Dataset.from_generator(
        lambda: data_generator(
            validation_batch_size, 
            vaidation_steps_per_epoch, 
            validation_epochs,
            mode = 'val',
            normalize = (mn, st)
        ),
        output_signature = (
            tf.TensorSpec(shape = (validation_batch_size, t_in, 23), dtype = tf.float64),
            tf.TensorSpec(shape = (validation_batch_size, t_in, 23),     dtype = tf.float64),
        )
    ),
    validation_batch_size = validation_batch_size,
    validation_steps      = vaidation_steps_per_epoch,
    
    callbacks = [keras.callbacks.EarlyStopping(
        patience = 3,
        min_delta = 5e-4
    )]
)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100


In [15]:
mse = model.evaluate(
    x = data_generator(
        1024,
        1000,
        1,
        mode = 'test',
        normalize = (mn, st)
    )
)
mse



0.22196829319000244

In [16]:
model = keras.Sequential([
    layers.Input((t_in, 23)),
    layers.Bidirectional(
        layers.LSTM(64, return_sequences = True)
    ),
    layers.Bidirectional(
        layers.LSTM(64, return_sequences = True)
    ),
    layers.Bidirectional(
        layers.LSTM(64, return_sequences = True)
    ),
    layers.Dense(23)
])
model.compile(loss = 'mse', optimizer = 'adam')
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
bidirectional_4 (Bidirection (None, 100, 128)          45056     
_________________________________________________________________
bidirectional_5 (Bidirection (None, 100, 128)          98816     
_________________________________________________________________
bidirectional_6 (Bidirection (None, 100, 128)          98816     
_________________________________________________________________
dense_1 (Dense)              (None, 100, 23)           2967      
Total params: 245,655
Trainable params: 245,655
Non-trainable params: 0
_________________________________________________________________


In [17]:
# Training part 1

batch_size      = 1024
steps_per_epoch = 100
epochs          = 100

validation_batch_size     = batch_size
vaidation_steps_per_epoch = 30
# Necessary for function
validation_epochs         = 1

hist1 = model.fit(
    # Training data and configuration
    x = data_generator(
        batch_size, 
        steps_per_epoch, 
        epochs,
        normalize = (mn, st)
    ),
    batch_size      = batch_size,
    steps_per_epoch = steps_per_epoch,
    epochs          = epochs,
    
    # Validation data
    validation_data = tf.data.Dataset.from_generator(
        lambda: data_generator(
            validation_batch_size, 
            vaidation_steps_per_epoch, 
            validation_epochs,
            mode = 'val',
            normalize = (mn, st)
        ),
        output_signature = (
            tf.TensorSpec(shape = (validation_batch_size, t_in, 23), dtype = tf.float64),
            tf.TensorSpec(shape = (validation_batch_size, t_in, 23), dtype = tf.float64),
        )
    ),
    validation_batch_size = validation_batch_size,
    validation_steps      = vaidation_steps_per_epoch,
    
    callbacks = [keras.callbacks.EarlyStopping(
        patience = 2,
        min_delta = 1e-3
    )]
)

batch_size      = 128
steps_per_epoch = 300
epochs          = 100

hist2 = model.fit(
    # Training data and configuration
    x = data_generator(
        batch_size, 
        steps_per_epoch, 
        epochs,
        normalize = (mn, st)
    ),
    batch_size      = batch_size,
    steps_per_epoch = steps_per_epoch,
    epochs          = epochs,
    
    # Validation data
    validation_data = tf.data.Dataset.from_generator(
        lambda: data_generator(
            validation_batch_size, 
            vaidation_steps_per_epoch, 
            validation_epochs,
            mode = 'val',
            normalize = (mn, st)
        ),
        output_signature = (
            tf.TensorSpec(shape = (validation_batch_size, t_in, 23), dtype = tf.float64),
            tf.TensorSpec(shape = (validation_batch_size, t_in, 23),     dtype = tf.float64),
        )
    ),
    validation_batch_size = validation_batch_size,
    validation_steps      = vaidation_steps_per_epoch,
    
    callbacks = [keras.callbacks.EarlyStopping(
        patience = 3,
        min_delta = 5e-4
    )]
)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100


In [18]:
mse = model.evaluate(
    x = data_generator(
        1024,
        1000,
        1,
        mode = 'test',
        normalize = (mn, st)
    )
)
mse



0.015673896297812462

In [19]:
model = keras.Sequential([
    layers.Input((t_in, 23)),
    layers.Bidirectional(
        layers.LSTM(64, return_sequences = True)
    ),
    layers.Bidirectional(
        layers.LSTM(64, return_sequences = True)
    ),
    layers.Bidirectional(
        layers.LSTM(64, return_sequences = True)
    ),
    layers.Bidirectional(
        layers.LSTM(64, return_sequences = True)
    ),
    layers.Dense(23)
])
model.compile(loss = 'mse', optimizer = 'adam')
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
bidirectional_7 (Bidirection (None, 100, 128)          45056     
_________________________________________________________________
bidirectional_8 (Bidirection (None, 100, 128)          98816     
_________________________________________________________________
bidirectional_9 (Bidirection (None, 100, 128)          98816     
_________________________________________________________________
bidirectional_10 (Bidirectio (None, 100, 128)          98816     
_________________________________________________________________
dense_2 (Dense)              (None, 100, 23)           2967      
Total params: 344,471
Trainable params: 344,471
Non-trainable params: 0
_________________________________________________________________


In [20]:
# Training part 1

batch_size      = 1024
steps_per_epoch = 100
epochs          = 100

validation_batch_size     = batch_size
vaidation_steps_per_epoch = 30
# Necessary for function
validation_epochs         = 1

hist1 = model.fit(
    # Training data and configuration
    x = data_generator(
        batch_size, 
        steps_per_epoch, 
        epochs,
        normalize = (mn, st)
    ),
    batch_size      = batch_size,
    steps_per_epoch = steps_per_epoch,
    epochs          = epochs,
    
    # Validation data
    validation_data = tf.data.Dataset.from_generator(
        lambda: data_generator(
            validation_batch_size, 
            vaidation_steps_per_epoch, 
            validation_epochs,
            mode = 'val',
            normalize = (mn, st)
        ),
        output_signature = (
            tf.TensorSpec(shape = (validation_batch_size, t_in, 23), dtype = tf.float64),
            tf.TensorSpec(shape = (validation_batch_size, t_in, 23), dtype = tf.float64),
        )
    ),
    validation_batch_size = validation_batch_size,
    validation_steps      = vaidation_steps_per_epoch,
    
    callbacks = [keras.callbacks.EarlyStopping(
        patience = 2,
        min_delta = 1e-3
    )]
)

batch_size      = 128
steps_per_epoch = 300
epochs          = 100

hist2 = model.fit(
    # Training data and configuration
    x = data_generator(
        batch_size, 
        steps_per_epoch, 
        epochs,
        normalize = (mn, st)
    ),
    batch_size      = batch_size,
    steps_per_epoch = steps_per_epoch,
    epochs          = epochs,
    
    # Validation data
    validation_data = tf.data.Dataset.from_generator(
        lambda: data_generator(
            validation_batch_size, 
            vaidation_steps_per_epoch, 
            validation_epochs,
            mode = 'val',
            normalize = (mn, st)
        ),
        output_signature = (
            tf.TensorSpec(shape = (validation_batch_size, t_in, 23), dtype = tf.float64),
            tf.TensorSpec(shape = (validation_batch_size, t_in, 23),     dtype = tf.float64),
        )
    ),
    validation_batch_size = validation_batch_size,
    validation_steps      = vaidation_steps_per_epoch,
    
    callbacks = [keras.callbacks.EarlyStopping(
        patience = 3,
        min_delta = 5e-4
    )]
)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100


In [21]:
mse = model.evaluate(
    x = data_generator(
        1024,
        1000,
        1,
        mode = 'test',
        normalize = (mn, st)
    )
)
mse



0.013722263276576996