In [None]:
import tensorflow as tf
import pandas as pd
import numpy as np
import time
from datetime import datetime
import itertools
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """      

    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    
# srce: http://scikit-learn.org/stable/auto_examples/model_selection/plot_confusion_matrix.html    

In [None]:
#helper funcion for data reading
def readIt(dataType, _path='data//'):    
    Inputs  = pd.read_csv(_path + 'I' + dataType+'.csv',decimal=',',sep=';',header=None).as_matrix()
    Lengths = pd.read_csv(_path + 'L' + dataType+'.csv',decimal=',',sep=';',header=None).as_matrix()
    Targets = pd.read_csv(_path + 'T' + dataType+'.csv',decimal=',',sep=';',header=None).as_matrix()
    
    return Inputs, Lengths, Targets

In [None]:
#helper function for reshaping 2D array of stacked sequences into 3D array, zero padded. shape=[n_examples,longest_seq,n_features]
def ReshapeMyData(INP, LEN, TAR, n_features, longest_seq):
    
    #data resampling
    scaler = 2 # -- select every _?_ sample
           
    a2 = np.cumsum(LEN)        
    a1 = np.insert(a2, [0], [0])    
    a1 = a1[:-1]    
    
    maxLength = int(np.floor(longest_seq / scaler))
    LEN = np.floor(LEN / scaler).astype(dtype='int32')        
    
    zero_padded_INP = np.empty((LEN.shape[0], maxLength, n_features))

    for i in range(LEN.shape[0]):
        tmp = INP[a1[i]:a2[i], :]
        tmp = tmp[1::scaler, :]
        zero_padded = np.zeros((maxLength, n_features))
        zero_padded[:tmp.shape[0], :] = tmp
        zero_padded_INP[i, :, :] = zero_padded
         
    LEN = LEN.squeeze()
    TAR = TAR.squeeze()
            
    return zero_padded_INP, TAR, LEN

In [None]:
#helper function for selecting only the last output of sequence of given length (for classification)
def last_relevant(output, length):
    # srce: https://danijar.com/variable-sequence-lengths-in-tensorflow/    
    batch_size = tf.shape(output)[0]
    max_length = tf.shape(output)[1]
    out_size = int(output.get_shape()[2])    
    index = tf.range(0, batch_size) * max_length + (length - 1)
    flat = tf.reshape(output, [-1, out_size])        
    relevant = tf.gather(flat, index)    
    return relevant

#memory efficient version @: #https://stackoverflow.com/questions/45882401/how-to-deal-with-userwarning-converting-sparse-indexedslices-to-a-dense-tensor?noredirect=1&lq=1

In [None]:
#helper function for waveforms chopping 
def SmartDataChop(biosignals, select_every_nth, inp_steps, out_steps, shift, ratio, tail_for_testing=False):
    #2500 are hardcoded
    Xtemp = np.zeros((2500,inp_steps,biosignals.shape[1]-1)) #-1 as target vector should be taken out of inputs
    Ytemp = np.zeros((2500,out_steps))

    if(shift>=0):    
        cnt = 0
        for i in range(0,50000,20):    
            #       array_name[ start_index : end_ index : step]
            Xtemp[cnt,:,:] = biosignals.iloc[i : 
                                             i+select_every_nth*(inp_steps) : select_every_nth,1:]    
            Ytemp[cnt,:]   = biosignals.iloc[i+select_every_nth*(inp_steps+shift-out_steps) : 
                                             i+select_every_nth*(inp_steps+shift) : select_every_nth,0]   
            cnt = cnt + 1

        np.random.seed(seed=888)

        if(tail_for_testing):     
            indexes=np.arange(2500) # cut last 1-ratio of data to constitute testing set
        else:
            indexes=np.random.permutation(2500) # take testing examples from within whole waveform
        
        Xtemp = Xtemp[indexes,:,:]
        Ytemp = Ytemp[indexes,:]
    else:
        print('Error! shift must be >=0!')

    training_samples = int(ratio*Xtemp.shape[0]) # lets get ratio*100% of the data for training    
    
    X_train = Xtemp[0:training_samples,:,:]
    X_test = Xtemp[training_samples: ,:,:]

    _Y_train = Ytemp[0:training_samples,:]
    _Y_test = Ytemp[training_samples :,:]
    
    return X_train, X_test, _Y_train, _Y_test