In [7]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow_hub as hub

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder,MinMaxScaler
from skimage.transform import resize
from skimage.restoration import denoise_wavelet

In [3]:
tf.keras.backend.clear_session()

In [4]:
NUM_CLASESS = 6
NUM_FILES = 100
BATCH_SIZE = 32
IMG_SIZE = (224, 224,3)  # len, width, rgb

In [5]:
# def convert_parquet_to_npy(input_folder, output_folder):
#     npy_output_folder = os.path.join(output_folder, 'npy_data')
    
#     # Ensure the output directory exists
#     os.makedirs(npy_output_folder, exist_ok=True)
    
#     for root, dirs, files in os.walk(input_folder):
#         for file in files:
#             if file.endswith('.parquet'):
#                 parquet_path = os.path.join(root, file)
#                 df = pd.read_parquet(parquet_path)
#                 eeg_data = df.to_numpy()
#                 relative_path = os.path.relpath(parquet_path, input_folder)
                
#                 # Create the corresponding directory structure in the npy_data folder
#                 output_subfolder = os.path.join(npy_output_folder, os.path.dirname(relative_path))
#                 os.makedirs(output_subfolder, exist_ok=True)
#                 np.save(os.path.join(output_subfolder, file.replace('.parquet', '.npy')), eeg_data)

In [6]:
def read_data(data_folder, num_files=None):
    """
    Read Spectrograms data from .npy files in the specified data folder.

    Parameters:
    - data_folder (str): Path to the main data folder containing 'train' and 'test' subfolders.
    - num_files (int or None): Number of files to read from each subfolder. If None, all files will be read.

    Returns:
    - train (array[Tuple[np.ndarray, np.ndarray]]): List of tuples containing train EEG data.
    - test (array[Tuple[np.ndarray, np.ndarray]]): List of tuples containing test EEG data.
    - train_labels (pd.DataFrame): DataFrame containing train labels.
    - test_labels (pd.DataFrame): DataFrame containing test labels.
    """
    train_spec_folder = os.path.join(data_folder, 'train_spectrograms')
    test_spec_folder = os.path.join(data_folder, 'test_spectrograms')

    def read_npy_folder(folder_path, n_files=None):
        arrays = []
        files_to_read = os.listdir(folder_path)[:n_files] if n_files else os.listdir(folder_path)
        for file in files_to_read:
            if file.endswith('.npy'):
                file_path = os.path.join(folder_path, file)
                array = np.load(file_path)
                arrays.append(array)
        print(f"Read {len(arrays)} files from {folder_path}.")
        return arrays

    # Read EEG data
    train_spec = read_npy_folder(train_spec_folder, num_files)
    test_spec = read_npy_folder(test_spec_folder)

    train_labels = pd.read_csv(os.path.join(data_folder, 'train.csv'), nrows=num_files)
    test_labels = pd.read_csv(os.path.join(data_folder, 'test.csv'))

    return train_spec, test_spec, train_labels, test_labels

In [35]:
def preprocess_spec(X_train_spec, img_size=(224, 224), n_channels=3):
    """ Preprocess spectrograms:
        1. Resize spectrograms
        3. Denoise

    Args:
        X_train_spec : array-like, shape (n_samples, n_features)
            Spectrograms data.
        img_size : tuple, optional
            Size to which spectrograms should be resized, defaults to (224, 224).
        n_channels : int, optional
            Number of color channels, defaults to 3.

    Returns:
        array-like, shape (n_samples, img_size[0], img_size[1], n_channels)
            Preprocessed spectrograms.
    """

    preprocessed_specs = []
    for spec in X_train_spec:
        if spec.size == 0:
            preprocessed_specs.append(np.zeros((*img_size, n_channels)))
        else:
            resized_spec = resize(spec, (*img_size, n_channels))
            denoised_spec = denoise_wavelet(resized_spec)
            preprocessed_specs.append(denoised_spec)
    return np.array(preprocessed_specs, dtype=np.float32)

def create_model(input_shape_spec, num_classes=6):
    """Create a model that can be trained for variable duration
    spectrograms data.

    Args:
        input_shape_spec : shape of one Spectrogram sample
        num_classes : 6 for seizure, lpd, gpd, lrda, grda, other

    Returns:
        keras model
    """
    MODEL_URL = "https://tfhub.dev/google/imagenet/mobilenet_v2_140_224/classification/5"
    model = tf.keras.Sequential([
        tf.keras.layers.InputLayer(input_shape=(224, 224, 3)),
        hub.KerasLayer(MODEL_URL),
        tf.keras.layers.Dense(num_classes, activation='softmax')
        
    ])
    
    return model


In [36]:
train,_test,train_labels,_test_labels = read_data('data/npy_data/npy_data',num_files=NUM_FILES)
data = preprocess_spec(train)
labels = pd.read_csv('train.csv', nrows=NUM_FILES)
X_train, X_val, y_train, y_val = train_test_split(
    data, train_labels,
    test_size=0.2,
    random_state=42,
    shuffle=True
)

Read 100 files from data/npy_data/npy_data/train_spectrograms.
Read 1 files from data/npy_data/npy_data/test_spectrograms.


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


In [37]:
y_train = np.array(y_train)
y_val = np.array(y_val)

label_encoder = LabelEncoder()
encoded_labels_train = label_encoder.fit_transform(y_train[:, 8])
encoded_labels_val = label_encoder.fit_transform(y_val[:, 8])

y_train = tf.keras.utils.to_categorical(encoded_labels_train, num_classes=NUM_CLASESS).astype('float32')
y_val = tf.keras.utils.to_categorical(encoded_labels_val, num_classes=NUM_CLASESS).astype('float32')

In [38]:
model = create_model(NUM_CLASESS)
optimizer = tf.keras.optimizers.Adam(learning_rate=0.01)
model.compile(optimizer=optimizer, loss = tf.keras.losses.KLDivergence(), metrics=['accuracy'])

In [39]:
model.fit(X_train, y_train, epochs=10, batch_size=BATCH_SIZE)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x7f15d8747950>