In [31]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [32]:
import pandas as pd
import matplotlib.pyplot as plt
import json
import numpy as np
import keras
from keras import layers
import tensorflow as tf
from sklearn import preprocessing, model_selection
import random

QUALITY_THRESHOLD = 128
BATCH_SIZE = 64
SHUFFLE_BUFFER_SIZE = BATCH_SIZE * 2
num_classes = 6

In [51]:
features = np.load("/kaggle/input/logsig3v2/eeg_data.npy")
labels = np.load("/kaggle/input/logsig3v2/targets.npy")

In [52]:
rows_without_nans = ~np.isnan(features).any(axis=1)

# Filter both features and labels to exclude rows with NaNs in features
features = features[rows_without_nans]
labels = labels[rows_without_nans]

In [53]:
features.shape

(106789, 2109)

In [54]:
labels.shape

(106789, 6)

In [62]:
def percentage_between(matrix, lower_bound, upper_bound):
    """
    Calculate the percentage of points in a matrix that are between two values.
    
    Parameters:
    - matrix: NumPy array, the input matrix.
    - lower_bound: Lower bound of the range (exclusive).
    - upper_bound: Upper bound of the range (exclusive).
    
    Returns:
    - The percentage of points in the matrix that fall between the lower and upper bounds.
    """
    points_within_bounds = np.sum((matrix > lower_bound) & (matrix < upper_bound))
    total_points = np.size(matrix)
    percentage = (points_within_bounds / total_points) * 100
    return percentage


percentage_between(features, 100000, 100000 * 100)

21.882137653819246

In [56]:
np.max(features)

4566965400000000.0

In [39]:
shuffled_indices = np.random.permutation(len(features))
features = features[shuffled_indices]
labels = labels[shuffled_indices]

In [40]:
labels[1]

array([0.        , 0.        , 0.83333333, 0.        , 0.        ,
       0.16666667])

In [41]:
np.sum(np.isnan(labels))

0

In [42]:
from sklearn.model_selection import train_test_split

labels_argmax = np.argmax(labels, axis=1)

In [43]:
# vals_dict = {}
# for i in eeg["label"]:
#     if i in vals_dict.keys():
#         vals_dict[i] += 1
#     else:
#         vals_dict[i] = 1
# total = sum(vals_dict.values())

# # Formula used - Naive method where
# # weight = 1 - (no. of samples present / total no. of samples)
# # So more the samples, lower the weight

# weight_dict = {k: (1 - (v / total)) for k, v in vals_dict.items()}
# print(weight_dict)

In [44]:
X_train, X_test, Y_train, Y_test = train_test_split(features, labels, test_size=0.2, stratify=labels_argmax)

In [45]:
train_dataset = tf.data.Dataset.from_tensor_slices((X_train, Y_train))
test_dataset = tf.data.Dataset.from_tensor_slices((X_test, Y_test))

train_dataset = train_dataset.shuffle(SHUFFLE_BUFFER_SIZE).batch(BATCH_SIZE)
test_dataset = test_dataset.batch(BATCH_SIZE)

In [46]:
def plot_history_metrics(history: keras.callbacks.History):
    total_plots = len(history.history)
    cols = total_plots // 2

    rows = total_plots // cols

    if total_plots % cols != 0:
        rows += 1

    pos = range(1, total_plots + 1)
    plt.figure(figsize=(15, 10))
    for i, (key, value) in enumerate(history.history.items()):
        plt.subplot(rows, cols, pos[i])
        plt.plot(range(len(value)), value)
        plt.title(str(key))
    plt.show()

In [47]:
def create_model():
    input_layer = keras.Input(shape=(1390, 1))

    x = layers.Conv1D(
        filters=32, kernel_size=3, strides=2, activation="relu", padding="same"
    )(input_layer)
    x = layers.BatchNormalization()(x)

    x = layers.Conv1D(
        filters=64, kernel_size=3, strides=2, activation="relu", padding="same"
    )(x)
    x = layers.BatchNormalization()(x)

    x = layers.Conv1D(
        filters=128, kernel_size=5, strides=2, activation="relu", padding="same"
    )(x)
    x = layers.BatchNormalization()(x)

    x = layers.Conv1D(
        filters=256, kernel_size=5, strides=2, activation="relu", padding="same"
    )(x)
    x = layers.BatchNormalization()(x)

    x = layers.Conv1D(
        filters=512, kernel_size=7, strides=2, activation="relu", padding="same"
    )(x)
    x = layers.BatchNormalization()(x)

    x = layers.Conv1D(
        filters=1024,
        kernel_size=7,
        strides=2,
        activation="relu",
        padding="same",
    )(x)
    x = layers.BatchNormalization()(x)

    x = layers.Dropout(0.2)(x)

    x = layers.Flatten()(x)

    x = layers.Dense(4096, activation="relu")(x)
    x = layers.Dropout(0.2)(x)

    x = layers.Dense(
        2048, activation="relu", kernel_regularizer=keras.regularizers.L2()
    )(x)
    x = layers.Dropout(0.2)(x)

    x = layers.Dense(
        1024, activation="relu", kernel_regularizer=keras.regularizers.L2()
    )(x)
    x = layers.Dropout(0.2)(x)
    x = layers.Dense(
        128, activation="relu", kernel_regularizer=keras.regularizers.L2()
    )(x)
    output_layer = layers.Dense(num_classes, activation="softmax")(x)

    return keras.Model(inputs=input_layer, outputs=output_layer)

In [48]:
conv_model = create_model()
conv_model.summary()

In [49]:
epochs = 30

callbacks = [
    keras.callbacks.ModelCheckpoint(
        "best_model.keras", save_best_only=True, monitor="loss"
    ),
    keras.callbacks.ReduceLROnPlateau(
        monitor="val_loss",
        factor=0.2,
        patience=2,
        min_lr=0.000001,
    ),
]

optimizer = keras.optimizers.Adam(amsgrad=True, learning_rate=0.001)
loss = keras.losses.KLDivergence()

In [50]:
conv_model.compile(
    optimizer=optimizer,
    loss=loss,
    metrics=[
    ],
)

conv_model_history = conv_model.fit(
    train_dataset,
    epochs=epochs,
    callbacks=callbacks,
    validation_data=test_dataset,
)

Epoch 1/30


KeyboardInterrupt: 

In [None]:
plot_history_metrics(conv_model_history)

In [None]:
loss, accuracy, auc, precision, recall = conv_model.evaluate(test_dataset)
print(f"Loss : {loss}")
print(f"Top 3 Categorical Accuracy : {accuracy}")
print(f"Area under the Curve (ROC) : {auc}")
print(f"Precision : {precision}")
print(f"Recall : {recall}")


def view_evaluated_eeg_plots(model):
    start_index = random.randint(10, len(eeg))
    end_index = start_index + 11
    data = eeg.loc[start_index:end_index, "raw_values"]
    data_array = [scaler.fit_transform(np.asarray(i).reshape(-1, 1)) for i in data]
    data_array = [np.asarray(data_array).astype(np.float32).reshape(-1, 512, 1)]
    original_labels = eeg.loc[start_index:end_index, "label"]
    predicted_labels = np.argmax(model.predict(data_array, verbose=0), axis=1)
    original_labels = [
        le.inverse_transform(np.array(label).reshape(-1))[0]
        for label in original_labels
    ]
    predicted_labels = [
        le.inverse_transform(np.array(label).reshape(-1))[0]
        for label in predicted_labels
    ]
    total_plots = 12
    cols = total_plots // 3
    rows = total_plots // cols
    if total_plots % cols != 0:
        rows += 1
    pos = range(1, total_plots + 1)
    fig = plt.figure(figsize=(20, 10))
    for i, (plot_data, og_label, pred_label) in enumerate(
        zip(data, original_labels, predicted_labels)
    ):
        plt.subplot(rows, cols, pos[i])
        plt.plot(plot_data)
        plt.title(f"Actual Label : {og_label}\nPredicted Label : {pred_label}")
        fig.subplots_adjust(hspace=0.5)
    plt.show()


view_evaluated_eeg_plots(conv_model)