In [2]:
# %% [code]
# Import Library
#!pip install seaborn
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import joblib
from tqdm import tqdm
from scipy.io import loadmat
from glob import glob
from sklearn.model_selection import train_test_split, KFold
import gc
sns.set()
sns.set_context('poster')

# Setting Random Seed

import random
import tensorflow as tf


In [3]:
def seed_everything(seed=1234):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)
seed_everything(seed=42)

In [4]:
# TPU
# Detect hardware, return appropriate distribution strategy
try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()  # TPU detection. No parameters necessary if TPU_NAME environment variable is set. On Kaggle this is always the case.
    print('Running on TPU ', tpu.master())
except ValueError:
    tpu = None

if tpu:
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.experimental.TPUStrategy(tpu)
else:
    strategy = tf.distribute.get_strategy() # default distribution strategy in Tensorflow. Works on CPU and single GPU.

print("REPLICAS: ", strategy.num_replicas_in_sync)

#
_input_path = os.path.join('..', 'input', '1056lab-cardiac-arrhythmia-detection')
os.listdir(_input_path)
normal_files = sorted(glob(os.path.join(_input_path, 'normal', '*.mat')))
print(normal_files[: 10])
af_files = sorted(glob(os.path.join(_input_path, 'af', '*.mat')))
print(af_files[: 10])
test_files = sorted(glob(os.path.join(_input_path, 'test', '*.mat')))
print(test_files[: 10])

REPLICAS:  1


FileNotFoundError: [WinError 3] The system cannot find the path specified: '..\\input\\1056lab-cardiac-arrhythmia-detection'

In [5]:
# Loading Datasets
def load_data(pathes, label=None, prefix=None, max_length=0, min_length=np.inf, verbose=True):
    verbose = not verbose  # tqdm用に反転

    if prefix is not None:
        if prefix.endswith("/") or prefix.endswith("\\"):
            prefix += "//"

        for i, val in enumerate(pathes):
            pathes[i] = "{}{}".format(prefix, val)

    data_array = []
    labels = []
    for i, f in enumerate(tqdm(pathes, disable=verbose)):
        tmp = loadmat(f)
        data_array.append(tmp["val"].flatten())
        tmp_len = len(data_array[i])

        if max_length < tmp_len:
            max_length = tmp_len
        elif min_length > tmp_len:
            min_length = tmp_len

        if label is not None:
            labels.append(label)

    return data_array, labels, max_length, min_length

In [8]:
dump_path = os.path.join("C:\\Users\\houfo\\Documents\\Kaggle\\yoshida146", "dump")
os.makedirs(dump_path, exist_ok=True)
if "normals.pkl" in os.listdir(dump_path):
    print("Load Local Files.")

    normals = joblib.load(os.path.join(dump_path, "normals.pkl"))
    afs = joblib.load(os.path.join(dump_path, "afs.pkl"))
    tests = joblib.load(os.path.join(dump_path, "tests.pkl"))
    labels = joblib.load(os.path.join(dump_path, "labels.pkl"))

    max_length = joblib.load(os.path.join(dump_path, "max_length.pkl"))
    min_length = joblib.load(os.path.join(dump_path, "min_length.pkl"))
else:
    print("Load RAW Files.")
    normals, normal_label, max_length, min_length = load_data(normal_files, label=0)
    afs, af_label, max_length, min_length = load_data(af_files, label=1, max_length=max_length, min_length=min_length)
    tests, _, max_length, min_length = load_data(test_files, label=None, max_length=max_length, min_length=min_length)
    labels = np.append(normal_label, af_label)

    joblib.dump(normals, os.path.join(dump_path, "normals.pkl"))
    joblib.dump(afs, os.path.join(dump_path, "afs.pkl"))
    joblib.dump(tests, os.path.join(dump_path, "tests.pkl"))
    joblib.dump(labels, os.path.join(dump_path, "labels.pkl"))

    joblib.dump(max_length, os.path.join(dump_path, "max_length.pkl"))
    joblib.dump(min_length, os.path.join(dump_path, "min_length.pkl"))
print("Max Length : {}\nMin Length : {}".format(max_length, min_length))

Load Local Files.
Max Length : 18286
Min Length : 2714


In [9]:
TRIM_METHOD = "center"

print("Triming Method : {}".format(TRIM_METHOD))
if TRIM_METHOD == "max":
    normals_ = normals.copy()
    for i, v in enumerate(tqdm(normals_)):
        diff = max_length - len(v)
        #         normals_[i] = np.append(v, [v[-1]] * diff).reshape(-1, max_length)
        normals_[i] = np.append([0] * diff, v).reshape(-1, max_length)

    afs_ = afs.copy()
    for i, v in enumerate(tqdm(afs_)):
        diff = max_length - len(v)
        #         afs_[i] = np.append(v, [v[-1]] * diff).reshape(-1, max_length)
        afs_[i] = np.append([0] * diff, v).reshape(-1, max_length)

    tests_ = tests.copy()
    for i, v in enumerate(tqdm(tests_)):
        diff = max_length - len(v)
        #         tests_[i] = np.append(v, [v[-1]] * diff).reshape(-1, max_length)
        tests_[i] = np.append([0] * diff, v).reshape(-1, max_length)

elif TRIM_METHOD == "min":
    normals_ = normals.copy()
    for i, v in enumerate(tqdm(normals_)):
        normals_[i] = v[: min_length]

    afs_ = afs.copy()
    for i, v in enumerate(tqdm(afs_)):
        afs_[i] = v[: min_length]

    tests_ = tests.copy()
    for i, v in enumerate(tqdm(tests_)):
        tests_[i] = v[: min_length]

elif TRIM_METHOD == "center":
    def clipping(x, n):
        del_width = (len(x) - n) // 2
        a_extracted = x[del_width: n + del_width]
        return a_extracted


    normals_ = normals.copy()
    for i, v in enumerate(tqdm(normals_)):
        normals_[i] = clipping(v, min_length)

    afs_ = afs.copy()
    for i, v in enumerate(tqdm(afs_)):
        afs_[i] = clipping(v, min_length)

    tests_ = tests.copy()
    for i, v in enumerate(tqdm(tests_)):
        tests_[i] = clipping(v, min_length)

Triming Method : center


100%|██████████████████████████████████████████████████████████████████████████| 3551/3551 [00:00<00:00, 592252.80it/s]
100%|████████████████████████████████████████████████████████████████████████████| 533/533 [00:00<00:00, 211982.18it/s]
100%|██████████████████████████████████████████████████████████████████████████| 1750/1750 [00:00<00:00, 347473.58it/s]


In [10]:
class AudioDataAugment():
    def __init__(self):
        pass

    def _check_array(self, x):
        return np.array(x) if type(x) == list else x

    def add_noise(self, x, rate=0.05):
        x = self._check_array(x)
        x_ = x + rate * np.random.randn(len(x), 1)
        x_ = x_.astype(np.float16)
        return x_

    def shift(self, x, rate=2):
        x_ = np.roll(x, int(len(x) // rate))
        x_ = x_.astype(np.float16)
        return x_

if TRIM_METHOD == "max":
    normals_ = np.array(normals_).reshape(-1, max_length, 1)
    afs_ = np.array(afs_).reshape(-1, max_length, 1)
    tests_ = np.array(tests_).reshape(-1, max_length, 1)
else:
    normals_ = np.array(normals_).reshape(-1, min_length, 1)
    afs_ = np.array(afs_).reshape(-1, min_length, 1)
    tests_ = np.array(tests_).reshape(-1, min_length, 1)

In [12]:
from tensorflow.keras.layers import Dense, BatchNormalization, LSTM, Dropout, Conv1D, MaxPooling1D, SpatialDropout1D, Bidirectional, Input, Flatten
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import EarlyStopping

In [13]:
def build(X_shape, conv_activation="tanh", conv_filters=64, dense_act="tanh", optimizer="adam"):
    model = Sequential()

    input_shape = X_shape[1], X_shape[2]
    model.add(Input(shape=input_shape))
    model.add(Conv1D(conv_filters, 2, padding="same", activation=conv_activation))
    model.add(MaxPooling1D(2))

    model.add(Conv1D(conv_filters, 2, padding="same", activation=conv_activation))
    model.add(MaxPooling1D(2))

    model.add(Bidirectional(LSTM(128, activation="tanh", return_sequences=True)))
    model.add(Bidirectional(LSTM(64, activation="tanh", return_sequences=False)))

    model.add(Flatten())
    model.add(Dense(32, activation=dense_act))
    model.add(Dropout(0.5))
    model.add(BatchNormalization())
    model.add(Dense(1, activation="sigmoid"))

    model.compile(loss="binary_crossentropy", optimizer=optimizer, metrics=["AUC"])

    return model

In [14]:
# @profile
def augment(data, label, N=10, verbose=True):
    ada = AudioDataAugment()

    add_y_array = []
    for _ in tqdm(range(N), disable=not verbose):
        tmp = []
        for x, y in zip(data, label):
            noise_rate = np.random.rand() * np.random.randint(0, 10)
            shift_rate = np.random.randint(1, 50)

            # Add Noise
            tmp.append(ada.add_noise(x, noise_rate))
            add_y_array.append(y)

            tmp.append(ada.shift(x, shift_rate))
            add_y_array.append(y)

        data = np.vstack([data, tmp])
    label = np.append(label, add_y_array)

    return data, label

In [22]:
def fit_and_predict(X, y, X_test):
    p = np.random.permutation(np.arange(len(X)))
    X_ = X[p]
    y_ = y[p]

    X_train, X_valid, y_train, y_valid = train_test_split(X_, y_)
    X_train, y_train = augment(X_train, y_train, N=10)

    model = build(X_train.shape)
    es_cb = EarlyStopping(monitor="val_auc", mode="auto", patience=10)
    model.fit(X_train, y_train, epochs=2, batch_size=32, validation_data=(X_valid, y_valid), callbacks=[es_cb],
              verbose=True)

    predict = model.predict(X_test)
    del X_, y_, p, X_train, X_valid, y_train, y_valid, model;
    gc.collect()

    return predict

In [None]:
from sklearn.model_selection import KFold

K = len(normals_) // len(afs_)
k_fold = KFold(n_splits=K, shuffle=True)

oof = np.zeros(len(tests_))

for i, ids in enumerate(k_fold.split(normals_)):
    print("{} Fold".format(i + 1))
    gc.collect()

    X = np.vstack([normals_[ids[1]], afs_])
    y = np.append(np.zeros(len(ids[1])), np.ones(len(afs_)))

    oof += fit_and_predict(X, y, tests_).flatten()

oof /= K

df_submit = pd.read_csv(os.path.join(dump_path, "sampleSubmission.csv"))
df_submit["af"] = oof
df_submit.to_csv("submit_Keras.csv", index=False)

1 Fold


100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:02<00:00,  4.93it/s]


Epoch 1/2
Epoch 2/2
2 Fold


100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:02<00:00,  4.47it/s]


Epoch 1/2
Epoch 2/2
3 Fold


100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:01<00:00,  7.06it/s]


Epoch 1/2
Epoch 2/2
4 Fold


100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:01<00:00,  6.85it/s]


Epoch 1/2
Epoch 2/2
5 Fold


100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:01<00:00,  6.60it/s]


Epoch 1/2
 35/554 [>.............................] - ETA: 29:40 - loss: 0.8335 - auc: 0.5160

In [1]:
def RGB(r, g, b):
    r = hex(r)[2:]
    g = hex(g)[2:]
    b = hex(b)[2:]
    return r+g+b

In [6]:
RGB(6, 143, 143)

'68f8f'