In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from data_wrangling.datamanager import DataLoader as DL
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [2]:
dl = DL()

In [4]:
sessions = ['s1', 's4', 's5', 's6', 's7', 's10']
data = dl.get_fcx2(sessions)

In [9]:
[d['data'].shape for d in data.values()]

[(1303952, 73),
 (977418, 66),
 (1017999, 172),
 (838999, 91),
 (762999, 110),
 (815892, 110)]

In [4]:
def get_as_array(session):
    arrs = data[session]['data']
    mn = arrs.mean(0).reshape((1, -1))
    st = arrs.std (0).reshape((1, -1))
    arrs -= mn
    arrs /= st
    return arrs

Xs = [
    get_as_array(s)
    for s in sessions
]

data = 0

In [5]:
n_c_max = max(Xs, key = lambda X: X.shape[1]).shape[1]
n_c_max

172

In [6]:
n_channelss = [X.shape[1] for X in Xs]

def pad_with_zeros(X):
    zeros = np.zeros((X.shape[0], n_c_max - X.shape[1]))
    out = np.concatenate([X, zeros], 1)
    return out

Xs = [pad_with_zeros(X) for X in Xs]

In [7]:
[X.shape for X in Xs], n_channelss

([(1303952, 172),
  (977418, 172),
  (1017999, 172),
  (838999, 172),
  (762999, 172),
  (815892, 172)],
 [73, 66, 172, 91, 110, 110])

In [8]:
t_in  = 100

train_frac = 0.5
val_frac   = 0.2

train_ranges = np.array([
    (
        0,
        int(X.shape[0] * train_frac) - t_in
    )
    for X in Xs
])

val_ranges = np.array([
    (
        train_max + t_in, 
        train_max + int(X.shape[0] * val_frac) - t_in
    )
    for (_, train_max), X in zip(train_ranges, Xs)
])

test_ranges = np.array([
    (
        val_max + t_in,
        len(X) - t_in
    )
    for (val_min, val_max), X in zip(val_ranges, Xs)
])

np.concatenate([
    train_ranges,
    val_ranges,
    test_ranges
], 1)

array([[      0,  651876,  651976,  912566,  912666, 1303852],
       [      0,  488609,  488709,  683992,  684092,  977318],
       [      0,  508899,  508999,  712398,  712498, 1017899],
       [      0,  419399,  419499,  587098,  587198,  838899],
       [      0,  381399,  381499,  533898,  533998,  762899],
       [      0,  407846,  407946,  570924,  571024,  815792]])

In [9]:
n_sessions = len(sessions)

def get_random_data_idxs(sess_no, n, mode = 'train'):
    if mode == 'train':
        _idxs = np.arange(*list(train_ranges[sess_no].copy()))
    elif mode == 'val':
        _idxs = np.arange(*list(val_ranges[sess_no].copy()))
    elif mode == 'test':
        _idxs = np.arange(*list(test_ranges[sess_no].copy()))
        
    np.random.shuffle(_idxs)
    return _idxs[:n]

def data_generator(
    batch_size,
    steps_per_epoch,
    epochs,
    mode = 'train'
):
    
    for _ in range(steps_per_epoch * epochs):
        
        # Select session indices
        counts = pd.Series(
            np.random.randint(n_sessions, size = batch_size)
        ).value_counts()

        # Session-wise indices
        idxs = [
            get_random_data_idxs(idx, n, mode = mode)
            for idx, n in enumerate(counts)
        ]

        # Create data
        x = np.concatenate([
            np.stack([
                np.concatenate([
                    X[idx + t], 
                    np.array([n_channels] * len(idx)).reshape((-1, 1))
                ], 1)
                for t in range(t_in) 
            ], 1)
            for n_channels, idx, X in zip(n_channelss, idxs, Xs)
        ])

        y = np.concatenate([
            np.stack([
                X[idx + t]
                for t in range(t_in) 
            ], 1)
            for idx, X in zip(idxs, Xs)
        ])
        
        yield (x, y)

In [10]:
class Splitter(layers.Layer):
    def call(self, inputs):
        signals = inputs[:,:,:-1]
        labels  = tf.cast(inputs[:, :t_in, -1], tf.int32)
        return signals, labels
    
class Combiner(layers.Layer):
    def call(self, inputs):
        outputs, labels = inputs
        
        idxs_for_mask = tf.ones(tf.shape(outputs), dtype = tf.int32) * np.arange(outputs.shape[-1])
        lims_for_mask = tf.repeat(tf.expand_dims(labels, 2), outputs.shape[-1], 2)
        return outputs * tf.cast(idxs_for_mask < lims_for_mask, tf.float32)
    
input_layer = layers.Input((t_in, n_c_max + 1))
signals, labels = Splitter()(input_layer)

encoder = layers.Bidirectional(
    layers.LSTM(64)
)(signals)

repeater = layers.RepeatVector(t_in)(encoder)

decoder1 = layers.LSTM(128, return_sequences = True)(repeater)
decoder2 = layers.Bidirectional(
    layers.LSTM(64, return_sequences = True)
)(decoder1)
decoder3 = layers.Bidirectional(
    layers.LSTM(64, return_sequences = True)
)(decoder2)

regressor = layers.Dense(n_c_max)(decoder3)
cleaner = Combiner()([regressor, labels])

model = keras.Model(input_layer, cleaner)
model.compile(loss = 'mse', optimizer = 'adam')
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 100, 173)]   0                                            
__________________________________________________________________________________________________
splitter (Splitter)             ((None, 100, 172), ( 0           input_1[0][0]                    
__________________________________________________________________________________________________
bidirectional (Bidirectional)   (None, 128)          121344      splitter[0][0]                   
__________________________________________________________________________________________________
repeat_vector (RepeatVector)    (None, 100, 128)     0           bidirectional[0][0]              
______________________________________________________________________________________________

In [11]:
batch_size      = 1024
steps_per_epoch = 80
epochs          = 50

validation_batch_size     = batch_size
vaidation_steps_per_epoch = 20
# Necessary for function
validation_epochs         = 1

hist1 = model.fit(
    # Training data and configuration
    x = data_generator(
        batch_size, 
        steps_per_epoch, 
        epochs
    ),
    batch_size      = batch_size,
    steps_per_epoch = steps_per_epoch,
    epochs          = epochs,
    
    # Validation data
    validation_data = tf.data.Dataset.from_generator(
        lambda: data_generator(
            validation_batch_size, 
            vaidation_steps_per_epoch, 
            validation_epochs,
            mode = 'val'
        ),
        output_signature = (
            tf.TensorSpec(shape = (validation_batch_size, t_in, n_c_max + 1), dtype = tf.float64),
            tf.TensorSpec(shape = (validation_batch_size, t_in, n_c_max),     dtype = tf.float64),
        )
    ),
    validation_batch_size = validation_batch_size,
    validation_steps      = vaidation_steps_per_epoch,
    
    callbacks = [keras.callbacks.EarlyStopping(
        patience = 2,
        min_delta = 1e-3
    )]
)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50


In [12]:
batch_size      = 128
steps_per_epoch = 200
epochs          = 50

validation_batch_size     = batch_size
vaidation_steps_per_epoch = 20
validation_epochs         = 1

hist2 = model.fit(
    # Training data and configuration
    x = data_generator(
        batch_size, 
        steps_per_epoch, 
        epochs
    ),
    batch_size      = batch_size,
    steps_per_epoch = steps_per_epoch,
    epochs          = epochs,
    
    # Validation data
    validation_data = tf.data.Dataset.from_generator(
        lambda: data_generator(
            validation_batch_size, 
            vaidation_steps_per_epoch, 
            validation_epochs,
            mode = 'val'
        ),
        output_signature = (
            tf.TensorSpec(shape = (validation_batch_size, t_in, n_c_max + 1), dtype = tf.float64),
            tf.TensorSpec(shape = (validation_batch_size, t_in, n_c_max),     dtype = tf.float64),
        )
    ),
    validation_batch_size = validation_batch_size,
    validation_steps      = vaidation_steps_per_epoch,
    
    callbacks = [keras.callbacks.EarlyStopping(
        patience = 3,
        min_delta = 5e-4
    )]
)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50


In [13]:
mse = model.evaluate(
    x = data_generator(
        1024,
        1000,
        1,
        mode = 'test'
    )
)



In [14]:
input_layer = layers.Input((t_in, n_c_max + 1))
signals, labels = Splitter()(input_layer)

encoder1 = layers.Bidirectional(
    layers.LSTM(64, return_sequences = True),
)(signals)

encoder2 = layers.Bidirectional(
    layers.LSTM(64, return_sequences = True),
)(encoder1)

encoder3 = layers.Bidirectional(
    layers.LSTM(64, return_sequences = True),
)(encoder2)

regressor = layers.Dense(n_c_max)(encoder3)
cleaner = Combiner()([regressor, labels])

model = keras.Model(input_layer, cleaner)
model.compile(loss = 'mse', optimizer = 'adam')
model.summary()

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            [(None, 100, 173)]   0                                            
__________________________________________________________________________________________________
splitter_1 (Splitter)           ((None, 100, 172), ( 0           input_2[0][0]                    
__________________________________________________________________________________________________
bidirectional_3 (Bidirectional) (None, 100, 128)     121344      splitter_1[0][0]                 
__________________________________________________________________________________________________
bidirectional_4 (Bidirectional) (None, 100, 128)     98816       bidirectional_3[0][0]            
____________________________________________________________________________________________

In [15]:
batch_size      = 1024
steps_per_epoch = 80
epochs          = 50

validation_batch_size     = batch_size
vaidation_steps_per_epoch = 20
# Necessary for function
validation_epochs         = 1

hist1 = model.fit(
    # Training data and configuration
    x = data_generator(
        batch_size, 
        steps_per_epoch, 
        epochs
    ),
    batch_size      = batch_size,
    steps_per_epoch = steps_per_epoch,
    epochs          = epochs,
    
    # Validation data
    validation_data = tf.data.Dataset.from_generator(
        lambda: data_generator(
            validation_batch_size, 
            vaidation_steps_per_epoch, 
            validation_epochs,
            mode = 'val'
        ),
        output_signature = (
            tf.TensorSpec(shape = (validation_batch_size, t_in, n_c_max + 1), dtype = tf.float64),
            tf.TensorSpec(shape = (validation_batch_size, t_in, n_c_max),     dtype = tf.float64),
        )
    ),
    validation_batch_size = validation_batch_size,
    validation_steps      = vaidation_steps_per_epoch,
    
    callbacks = [keras.callbacks.EarlyStopping(
        patience = 2,
        min_delta = 1e-3
    )]
)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50


In [16]:
batch_size      = 128
steps_per_epoch = 200
epochs          = 50

validation_batch_size     = batch_size
vaidation_steps_per_epoch = 20
validation_epochs         = 1

hist2 = model.fit(
    # Training data and configuration
    x = data_generator(
        batch_size, 
        steps_per_epoch, 
        epochs
    ),
    batch_size      = batch_size,
    steps_per_epoch = steps_per_epoch,
    epochs          = epochs,
    
    # Validation data
    validation_data = tf.data.Dataset.from_generator(
        lambda: data_generator(
            validation_batch_size, 
            vaidation_steps_per_epoch, 
            validation_epochs,
            mode = 'val'
        ),
        output_signature = (
            tf.TensorSpec(shape = (validation_batch_size, t_in, n_c_max + 1), dtype = tf.float64),
            tf.TensorSpec(shape = (validation_batch_size, t_in, n_c_max),     dtype = tf.float64),
        )
    ),
    validation_batch_size = validation_batch_size,
    validation_steps      = vaidation_steps_per_epoch,
    
    callbacks = [keras.callbacks.EarlyStopping(
        patience = 3,
        min_delta = 5e-4
    )]
)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50


In [17]:
mse = model.evaluate(
    x = data_generator(
        1024,
        1000,
        1,
        mode = 'test'
    )
)



In [18]:
input_layer = layers.Input((t_in, n_c_max + 1))
signals, labels = Splitter()(input_layer)

encoder1 = layers.Bidirectional(
    layers.LSTM(64, return_sequences = True),
)(signals)

encoder2 = layers.Bidirectional(
    layers.LSTM(64, return_sequences = True),
)(encoder1)

decoder1 = layers.Bidirectional(
    layers.LSTM(64, return_sequences = True)
)(encoder2)
decoder2 = layers.Bidirectional(
    layers.LSTM(64, return_sequences = True)
)(decoder1)

regressor = layers.Dense(n_c_max)(decoder2)
cleaner = Combiner()([regressor, labels])

model = keras.Model(input_layer, cleaner)
model.compile(loss = 'mse', optimizer = 'adam')
model.summary()

Model: "model_2"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_3 (InputLayer)            [(None, 100, 173)]   0                                            
__________________________________________________________________________________________________
splitter_2 (Splitter)           ((None, 100, 172), ( 0           input_3[0][0]                    
__________________________________________________________________________________________________
bidirectional_6 (Bidirectional) (None, 100, 128)     121344      splitter_2[0][0]                 
__________________________________________________________________________________________________
bidirectional_7 (Bidirectional) (None, 100, 128)     98816       bidirectional_6[0][0]            
____________________________________________________________________________________________

In [19]:
batch_size      = 1024
steps_per_epoch = 80
epochs          = 50

validation_batch_size     = batch_size
vaidation_steps_per_epoch = 20
# Necessary for function
validation_epochs         = 1

hist1 = model.fit(
    # Training data and configuration
    x = data_generator(
        batch_size, 
        steps_per_epoch, 
        epochs
    ),
    batch_size      = batch_size,
    steps_per_epoch = steps_per_epoch,
    epochs          = epochs,
    
    # Validation data
    validation_data = tf.data.Dataset.from_generator(
        lambda: data_generator(
            validation_batch_size, 
            vaidation_steps_per_epoch, 
            validation_epochs,
            mode = 'val'
        ),
        output_signature = (
            tf.TensorSpec(shape = (validation_batch_size, t_in, n_c_max + 1), dtype = tf.float64),
            tf.TensorSpec(shape = (validation_batch_size, t_in, n_c_max),     dtype = tf.float64),
        )
    ),
    validation_batch_size = validation_batch_size,
    validation_steps      = vaidation_steps_per_epoch,
    
    callbacks = [keras.callbacks.EarlyStopping(
        patience = 2,
        min_delta = 1e-3
    )]
)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50


In [20]:
batch_size      = 128
steps_per_epoch = 200
epochs          = 50

validation_batch_size     = batch_size
vaidation_steps_per_epoch = 20
validation_epochs         = 1

hist2 = model.fit(
    # Training data and configuration
    x = data_generator(
        batch_size, 
        steps_per_epoch, 
        epochs
    ),
    batch_size      = batch_size,
    steps_per_epoch = steps_per_epoch,
    epochs          = epochs,
    
    # Validation data
    validation_data = tf.data.Dataset.from_generator(
        lambda: data_generator(
            validation_batch_size, 
            vaidation_steps_per_epoch, 
            validation_epochs,
            mode = 'val'
        ),
        output_signature = (
            tf.TensorSpec(shape = (validation_batch_size, t_in, n_c_max + 1), dtype = tf.float64),
            tf.TensorSpec(shape = (validation_batch_size, t_in, n_c_max),     dtype = tf.float64),
        )
    ),
    validation_batch_size = validation_batch_size,
    validation_steps      = vaidation_steps_per_epoch,
    
    callbacks = [keras.callbacks.EarlyStopping(
        patience = 3,
        min_delta = 5e-4
    )]
)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50


In [21]:
mse = model.evaluate(
    x = data_generator(
        1024,
        1000,
        1,
        mode = 'test'
    )
)

