# 1. Baseline
classify for math and relax in synchronized_brainwave_dataset data

In [131]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow import keras
import tsgm
from tsgm.models.architectures.zoo import zoo  
import ast
%matplotlib inline

In [68]:
X, y = tsgm.utils.get_synchronized_brainwave_dataset()

INFO:utils:File exist


In [3]:
X.shape

(30013, 12)

In [4]:
y.shape

(30013,)

In [69]:
df = pd.read_csv("../data/synchronized_brainwave_dataset.csv")

In [70]:
# we want to classify label 'relax' and 'math'
relax = df[df.label == 'relax']
math = df[(df.label == 'math1') |
          (df.label == 'math2') |
          (df.label == 'math3') |
          (df.label == 'math4') |
          (df.label == 'math5') |
          (df.label == 'math6') |
          (df.label == 'math7') |
          (df.label == 'math8') |
          (df.label == 'math9') |
          (df.label == 'math10') |
          (df.label == 'math11') |
          (df.label == 'math12') ]

print(len(relax))
print(len(math))

934
936


In [71]:
relax_math = pd.concat([relax, math], axis=0)

In [72]:
relax_math['raw_values'] = relax_math['raw_values'].apply(ast.literal_eval)


In [73]:
# A signal values over 128 indicate that the headset was placed incorrectly.
relax_math = relax_math[relax_math['signal_quality'] < 128]

relax_math['label'] = relax_math['label'].str.replace(r'^math.*$', 'math', regex=True)

relax_math['raw_values'] = relax_math['raw_values'].apply(lambda x: np.array(x, dtype=float))

# Ensure all arrays have the same length
max_len = max(relax_math['raw_values'].apply(len))
relax_math['raw_values'] = relax_math['raw_values'].apply(lambda x: np.pad(x, (0, max_len - len(x)), 'constant') if len(x) < max_len else x)

label_encoder = LabelEncoder()
relax_math['label'] = label_encoder.fit_transform(relax_math['label'])

features_matrix = np.stack(relax_math['raw_values'].values)


In [74]:
# relax_math['label']

13274    1
13275    1
13276    1
13277    1
13278    1
        ..
23828    0
23829    0
23830    0
23831    0
23832    0
Name: label, Length: 1870, dtype: int64

In [75]:
X = relax_math['raw_values']
y = relax_math['label']

In [87]:
print(relax_math.shape)
print(X.shape)
print(y.shape)
print(X.index)
print(X[13274].shape)

(1870, 13)
(1870,)
(1870,)
Index([13274, 13275, 13276, 13277, 13278, 13279, 13280, 13281, 13282, 13283,
       ...
       23823, 23824, 23825, 23826, 23827, 23828, 23829, 23830, 23831, 23832],
      dtype='int64', length=1870)
(512,)


In [127]:
features_matrix

array([[285., 241., 200., ...,  32.,  23.,  21.],
       [-12., -60., -70., ...,  20.,  19.,  -7.],
       [ 37.,  43.,  42., ...,  18.,  13.,  35.],
       ...,
       [106., 108.,  91., ...,  28.,  42.,  49.],
       [ 48.,  37.,  18., ...,  49.,  42.,  26.],
       [ 96.,  75.,  64., ...,  71.,  86.,  92.]])

In [111]:
# time series model

seq_len = 64  # Number of timesteps per sequence
feat_dim = 8  # Number of features per timestep
output_dim = 2  # Number of output classes

model_ts_architecture = zoo['clf_cn'](seq_len, feat_dim, output_dim)
model_ts = model_ts_architecture.model

model_ts.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy', 
    metrics=['accuracy']
)

# Split data
X_train, X_val, y_train, y_val = train_test_split(features_matrix, relax_math['label'], test_size=0.2, random_state=42)

X_train_ts = X_train.reshape(-1, seq_len, feat_dim) 
X_val_ts = X_val.reshape(-1, seq_len, feat_dim)

In [120]:
# Model training
history_ts = model_ts.fit(
    X_train_ts, y_train,
    epochs=10,
    batch_size=32,
    validation_data=(X_val_ts, y_val)
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [121]:
val_loss_ts, val_acc_ts = model_ts.evaluate(X_val_ts, y_val)
print('val loss in ts model:', val_loss_ts)
print("val accuracy in ts model:", val_acc_ts)

val loss in ts model: 0.7702283263206482
val accuracy in ts model: 0.49732619524002075


In [122]:
model = Sequential([
    Dense(10, activation='relu', input_shape=(max_len,)),
    Dense(3, activation='softmax')
])

model.compile(optimizer='adam', 
              loss='sparse_categorical_crossentropy', 
              metrics=['accuracy'])

In [123]:
history = model.fit(
    X_train, y_train,
    epochs=10,
    batch_size=32,
    validation_data=(X_val, y_val)
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [124]:
val_loss, val_acc = model.evaluate(X_val, y_val)
print('val loss in normal model:', val_loss)
print("val accuracy in normal model:", val_acc)

val loss in normal model: 3.366678237915039
val accuracy in normal model: 0.5


# 2. Augmentations
augment X and y using GAN

In [139]:
feature_dim = 8
seq_len = 64
batch_size = 128

# generator_in_channels = latent_dim + output_dim
# discriminator_in_channels = feature_dim + output_dim

In [156]:
# adjust its shape to series
X_ts = X.reshape(-1, seq_len, feat_dim) 
X_ts.shape

(1870, 64, 8)

In [170]:
# scaler = MinMaxScaler(feature_range=(-1, 1))
# X = np.stack(relax_math['raw_values'].apply(lambda x: scaler.fit_transform(x.reshape(-1, 1)).flatten()))
y = keras.utils.to_categorical(relax_math['label'], num_classes=2)

scaler = tsgm.utils.TSFeatureWiseScaler((-1, 1))
X_train = scaler.fit_transform(X_ts)

X_train = X_train.astype(np.float32)
y = y.astype(np.float32)

print(X_train.shape)
print(y.shape)

(1870, 64, 8)
(1870, 2)


In [171]:
dataset = tf.data.Dataset.from_tensor_slices((X_train, y))
dataset = dataset.shuffle(buffer_size=1024).batch(batch_size)

In [172]:
latent_dim = 64
output_dim = 2

architecture = tsgm.models.architectures.zoo["cgan_base_c4_l1"](
    seq_len=seq_len, feat_dim=feature_dim,
    latent_dim=latent_dim, output_dim=output_dim)
discriminator, generator = architecture.discriminator, architecture.generator

In [173]:
cond_gan = tsgm.models.cgan.ConditionalGAN(
    discriminator=discriminator, generator=generator, latent_dim=latent_dim
)
cond_gan.compile(
    d_optimizer=keras.optimizers.Adam(learning_rate=0.002, beta_1=0.5),
    g_optimizer=keras.optimizers.Adam(learning_rate=0.002, beta_1=0.5),
    loss_fn=keras.losses.BinaryCrossentropy(),
)



In [174]:
cbk = tsgm.models.monitors.GANMonitor(num_samples=3, latent_dim=latent_dim, save=False, labels=y, save_path="/tmp")
cond_gan.fit(dataset, epochs=1000, callbacks=[cbk])



Epoch 1/1000

KeyboardInterrupt: 