In [None]:
import sys
!{sys.executable} -m pip install pywavelets

In [None]:
import tensorflow as tf
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import f1_score
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm_notebook as tq
from biosppy.signals import ecg
from collections import Counter
from biosppy.signals import tools
import seaborn as sns
from scipy.signal import resample

In [None]:
# Read data, we store them as a list of times series
train_signals = []
for x in tq(open('X_train.csv')):
    # Header
    if x.startswith('id'):
        continue
    # Remove empty string and special character string
    raw = x.strip().split(',')[1:]
    raw = [string for string in raw if string != "" and string != '\\n']
    train_signals.append(list(map(int, raw)))
    
test_signals = []
for x in tq(open('X_test.csv')):
    # Header
    if x.startswith('id'):
        continue
    # Remove empty string and special character string
    raw = x.strip().split(',')[1:]
    raw = [string for string in raw if string != "" and string != '\\n']
    test_signals.append(list(map(int, raw)))
    
#Read the labels, does not save the id
train_labels = np.array(pd.read_csv('y_train.csv')['y'])

#just read the sample
sample=pd.read_csv("sample.csv")

In [None]:
#Magic numbers
MAX_DIM =  max([len(i) for i in train_signals])
MIN_DIM = min([len(i) for i in train_signals])
# 1.2 from https://www.sciencedirect.com/science/article/pii/S1566253518307632
MAX_DIM_OPTI = int(MAX_DIM * 1.2)
print("Max lenght is {} and min lenght is {}. Also, the optimized max length is {}".format(MAX_DIM, MIN_DIM, MAX_DIM_OPTI))
SAMPLING_FREQ = 300
print("Frequence of sampling {} Hz".format(SAMPLING_FREQ))
CLASSES = 4
EXP_SIZE = 3


In [None]:
def signal_processing(sig):
    # FILTER THE DATA
    # how to determine the frequency
    signal, _, _ = tools.filter_signal(sig,
                                       ftype='FIR',
                                       band='bandpass',
                                       order=90,
                                       frequency=[3,45],
                                       sampling_rate=SAMPLING_FREQ)
    return signal

def normalization(sig_filtered):
    return tools.normalize(sig_filtered)[0]

def preprocessing():
    
    x_data = np.zeros((len(list(range(len(train_signals))))*(EXP_SIZE),
                       MAX_DIM_OPTI,
                       1))
    x_label = []
    idx = 0
    
    for i in tq(list(range(len(train_signals)))):
        
        def add_signal(sig,train_labels, idx):
            signal = signal_processing(sig)
            signal = normalization(signal)
            diff = MAX_DIM_OPTI - signal.shape[0]
            if diff:
                signal = np.hstack((signal,np.zeros((diff,))))
            x_data[idx] = signal.reshape((MAX_DIM_OPTI,1))
            x_label.append(train_labels)
            return
        add_signal(train_signals[i],train_labels[i],idx)
        idx += 1
        
        #strech randomly [1,1.2]
        stretch_ratio = 1. + np.random.random()/5
        stretch_len = int(len(train_signals[i])*stretch_ratio)
        add_signal(resample(train_signals[i],stretch_len), train_labels[i], idx)
        idx += 1
        
        #masked
        sig = np.array(train_signals[i])
        # what is the better size for the mask ?
        mask_size = np.random.randint(10,450)
        sig_size = len(train_signals[i])
        mask_start = np.random.randint(0,sig_size-mask_size)
        #set to 0 the part that is masked
        sig[mask_start:mask_start+mask_size] = 0
        add_signal(sig, train_labels[i], idx)
        idx += 1
        
    return x_data, np.array(x_label)
        

In [None]:
# based on https://www.sciencedirect.com/science/article/pii/S1566253518307632
def model():
    model = tf.keras.models.Sequential([
        tf.keras.layers.Conv1D(64,3,strides=1, activation='relu'),
        tf.keras.layers.Conv1D(64,3, activation='relu'),
        tf.keras.layers.MaxPool1D(pool_size=3,strides=3),
        
        tf.keras.layers.Conv1D(128,3, activation='relu'),
        tf.keras.layers.Conv1D(128,3, activation='relu'),
        tf.keras.layers.MaxPool1D(pool_size=3,strides=3),
        
        tf.keras.layers.Conv1D(256,3, activation='relu'),
        tf.keras.layers.Conv1D(256,3, activation='relu'),
        tf.keras.layers.Conv1D(256,3, activation='relu'),
        tf.keras.layers.MaxPool1D(pool_size=3,strides=3),
        
        tf.keras.layers.Conv1D(256,3, activation='relu'),
        tf.keras.layers.Conv1D(256,3, activation='relu'),
        tf.keras.layers.Conv1D(256,3, activation='relu'),
        tf.keras.layers.MaxPool1D(pool_size=3,strides=3),
        
        tf.keras.layers.Conv1D(256,3, activation='relu'),
        tf.keras.layers.Conv1D(256,3, activation='relu'),
        tf.keras.layers.Conv1D(256,3, activation='relu'),
        tf.keras.layers.GlobalMaxPool1D(),
        tf.keras.layers.Dense(256,activation="relu"),
        tf.keras.layers.Dense(30,activation="relu"),
        tf.keras.layers.Dense(4,activation="softmax")
    ])
    
    return model

In [None]:
from tensorflow.keras.callbacks import Callback
class TestSubmit(Callback):

    def __init__(self,X_test, *args,**kwargs):
        super().__init__(*args,**kwargs)
        self.X_test = X_test


    def on_train_begin(self, logs={}):
        self.preds = {}
  
    def on_epoch_end(self, epoch, logs={}):
        self.preds[epoch] = self.model.predict_classes(self.X_test)
        return

In [None]:
############## ADJUST SETS #####################
X_train, y_train = preprocessing()
X_test_submit = []
for sig in tq(test_signals):
    signal = signal_processing(sig)
    signal = normalization(signal)
    diff = MAX_DIM_OPTI - signal.shape[0]
    # eq. if diff != 0
    if diff:
        # concatenate zero so the size is the same for each sample
        signal = np.hstack((signal, np.zeros((diff,))))
    X_test_submit.append(signal.reshape((MAX_DIM_OPTI,1)))
X_test_submit =  np.array(X_test_submit) 

In [None]:
print(len(X_train))
print(len(y_train))

In [None]:
#callbacks
test_submit = TestSubmit(X_test_submit)

#################### CREATE THE MODEL AND TRAIN IT ####################
my_model = model()
print('--- COMPILE ---')
opti = tf.keras.optimizers.Adam(learning_rate=0.001),
my_model.compile(optimizer="Adam",
             loss=tf.keras.losses.SparseCategoricalCrossentropy(),
             metrics=["sparse_categorical_accuracy"])
print('--- FIT ---')
# batchsize ??
my_model.fit(X_train, y_train, batch_size=256, epochs=20)

In [None]:
################## PREDICTION #######################
y_preds = model.preds(X_test_submit)

In [None]:
############ SUBMISSION ##################
ids = sample.to_numpy()[:,0]
sub = {"id" : ids, "y": y_preds}
sub = pd.DataFrame(sub)
print(sub.head())
sub.to_csv("submission_y_test")