In [1]:
import pickle
from keras.src.metrics import F1Score
from keras.src.callbacks import LearningRateScheduler
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.metrics import accuracy_score, f1_score
from keras.src.models import Model
from keras.src.callbacks import EarlyStopping
from keras.src.layers import Dropout, Dense, BatchNormalization, Input, Flatten

In [2]:
from sklearn.metrics import accuracy_score, f1_score
import numpy as np
def evaluate(model, X, y):
    if len(y.shape) == 2:
        y = np.argmax(y, axis=1)

    pred = model.predict(X)
    pred = np.argmax(pred, axis=1)
    acc, f1 = accuracy_score(y, pred), f1_score(y, pred, average='weighted')
    print(f"## Accuracy: {acc}\n\n## F1 Score: {f1}\n\n")


# Baseline

In [26]:
with open("/Users/eden.yavin/Projects/Tabular-Cloud-ML/store/emb_dataset_w_cl_wo_em.pkl", "rb") as f:
    emb_dataset = pickle.load(f)

emb_dataset.train

EmbeddingBaselineFeatures(embeddings=array([[0.        , 0.04318882, 0.09121387, ..., 0.11157572, 0.06733229,
        0.        ],
       [0.        , 0.        , 0.06966318, ..., 0.04338145, 0.15025526,
        0.        ],
       [0.        , 0.06783015, 0.22331883, ..., 0.05096863, 0.        ,
        0.        ],
       ...,
       [0.        , 0.06515576, 0.        , ..., 0.        , 0.08230814,
        0.        ],
       [0.        , 0.17265853, 0.08966093, ..., 0.00556558, 0.        ,
        0.        ],
       [0.        , 0.04383548, 0.12643135, ..., 0.07490431, 0.        ,
        0.        ]], dtype=float32), labels=array([[1., 0.],
       [1., 0.],
       [0., 1.],
       ...,
       [0., 1.],
       [1., 0.],
       [0., 1.]]))

In [27]:


inputs = Input(shape=(emb_dataset.train.embeddings.shape[1],))  # Dynamic input shape

# Define the hidden layers
x = BatchNormalization()(inputs)
x = Dense(units=1024, activation='leaky_relu')(x)
x = Dropout(0.3)(x)

x = BatchNormalization()(x)
x = Dense(units=512, activation='leaky_relu')(x)
x = Dropout(0.3)(x)

x = BatchNormalization()(x)
x = Dense(units=256, activation='leaky_relu')(x)
x = Dropout(0.3)(x)

x = BatchNormalization()(x)
x = Dense(units=128, activation='leaky_relu')(x)
x = Dropout(0.3)(x)

# Define the output layer
outputs = Dense(units=2, activation='softmax')(x)

# Create the model
model = Model(inputs=inputs, outputs=outputs)

# Compile the model with F1 Score
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy', F1Score()]
              )
model.summary()

In [28]:
lr_scheduler = LearningRateScheduler(lambda epoch: 0.0001 * (0.9 ** epoch))
early_stopping = EarlyStopping(patience=2, monitor='loss')
model.fit(emb_dataset.train.embeddings, emb_dataset.train.labels, epochs=100, batch_size=64, callbacks=[lr_scheduler, early_stopping], verbose=2)

Epoch 1/100
105/105 - 3s - 32ms/step - accuracy: 0.5914 - f1_score: 0.5914 - loss: 0.8003 - learning_rate: 1.0000e-04
Epoch 2/100
105/105 - 1s - 13ms/step - accuracy: 0.6399 - f1_score: 0.6399 - loss: 0.6939 - learning_rate: 9.0000e-05
Epoch 3/100
105/105 - 1s - 13ms/step - accuracy: 0.6781 - f1_score: 0.6781 - loss: 0.6425 - learning_rate: 8.1000e-05
Epoch 4/100
105/105 - 1s - 13ms/step - accuracy: 0.6836 - f1_score: 0.6836 - loss: 0.6331 - learning_rate: 7.2900e-05
Epoch 5/100
105/105 - 1s - 13ms/step - accuracy: 0.7105 - f1_score: 0.7105 - loss: 0.6070 - learning_rate: 6.5610e-05
Epoch 6/100
105/105 - 1s - 13ms/step - accuracy: 0.7116 - f1_score: 0.7116 - loss: 0.5909 - learning_rate: 5.9049e-05
Epoch 7/100
105/105 - 1s - 13ms/step - accuracy: 0.7141 - f1_score: 0.7141 - loss: 0.5802 - learning_rate: 5.3144e-05
Epoch 8/100
105/105 - 1s - 13ms/step - accuracy: 0.7210 - f1_score: 0.7210 - loss: 0.5658 - learning_rate: 4.7830e-05
Epoch 9/100
105/105 - 1s - 13ms/step - accuracy: 0.7254 

<keras.src.callbacks.history.History at 0x41ef750f0>

In [29]:
evaluate(model, emb_dataset.test.embeddings, emb_dataset.test.labels)

[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step


(0.7635135135135135, 0.760351607679852)

In [4]:
del emb_dataset, model

# No Embedding | With Cloud | With Triangulation Dataset

In [3]:
with open("/Users/eden.yavin/Projects/Tabular-Cloud-ML/store/dataset_w_cl_wo_em.pkl", "rb") as f:
    dataset = pickle.load(f)

## Dense IIN Model

In [90]:
try:
    del model
except:
    pass

inputs = Input(shape=(dataset.train.features.shape[1],))  # Dynamic input shape

x = BatchNormalization()(inputs)
x = Dense(units=2048, activation='leaky_relu')(x)
x = Dropout(0.1)(x)

x = BatchNormalization()(inputs)
x = Dense(units=1024, activation='leaky_relu')(x)
x = Dropout(0.1)(x)

x = BatchNormalization()(x)
x = Dense(units=512, activation='leaky_relu')(x)
x = Dropout(0.1)(x)

x = BatchNormalization()(x)
x = Dense(units=256, activation='leaky_relu')(x)
x = Dropout(0.1)(x)

x = BatchNormalization()(x)
x = Dense(units=128, activation='leaky_relu')(x)
x = Dropout(0.1)(x)

# Define the output layer
outputs = Dense(units=2, activation='softmax')(x)

# Create the model
model = Model(inputs=inputs, outputs=outputs)

# Compile the model with F1 Score
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy']
              )
model.summary()

In [91]:
import tensorflow as tf
with tf.device('/gpu:0'):
    lr_scheduler = LearningRateScheduler(lambda epoch: 0.0001 * (0.9 ** epoch))
    early_stopping = EarlyStopping(patience=2, monitor='loss')
    model.fit(dataset.train.features, dataset.train.labels, validation_data=(dataset.test.features, dataset.test.labels),
 epochs=20, batch_size=64, callbacks=[lr_scheduler, early_stopping], verbose=2)

Epoch 1/20
521/521 - 16s - 30ms/step - accuracy: 0.5125 - loss: 0.7704 - val_accuracy: 0.5143 - val_loss: 0.7007 - learning_rate: 1.0000e-04
Epoch 2/20
521/521 - 14s - 27ms/step - accuracy: 0.5174 - loss: 0.7277 - val_accuracy: 0.5092 - val_loss: 0.6969 - learning_rate: 9.0000e-05
Epoch 3/20
521/521 - 14s - 27ms/step - accuracy: 0.5220 - loss: 0.7122 - val_accuracy: 0.4965 - val_loss: 0.7113 - learning_rate: 8.1000e-05
Epoch 4/20
521/521 - 14s - 27ms/step - accuracy: 0.5276 - loss: 0.7068 - val_accuracy: 0.5032 - val_loss: 0.6962 - learning_rate: 7.2900e-05
Epoch 5/20
521/521 - 14s - 27ms/step - accuracy: 0.5229 - loss: 0.7050 - val_accuracy: 0.5057 - val_loss: 0.7068 - learning_rate: 6.5610e-05
Epoch 6/20
521/521 - 14s - 27ms/step - accuracy: 0.5245 - loss: 0.7013 - val_accuracy: 0.5149 - val_loss: 0.6984 - learning_rate: 5.9049e-05
Epoch 7/20
521/521 - 14s - 27ms/step - accuracy: 0.5301 - loss: 0.6984 - val_accuracy: 0.5143 - val_loss: 0.6988 - learning_rate: 5.3144e-05
Epoch 8/20
52

In [92]:
print(evaluate(model, dataset.test.features, dataset.test.labels))

[1m116/116[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
## Accuracy: 0.5102702702702703

## F1 Score: 0.49427609610474016


(0.5102702702702703, 0.49427609610474016)


## LSTM

In [11]:
from keras.api.layers import LSTM, RepeatVector

try:
    del lstm
except:
    pass
# Define input shape with sequential data
# 512 is the shape of the clip embedding that is why I'm choosing that
inputs = Input(shape=(1, dataset.train.features.shape[1]))  # Add timesteps dimension

# # LSTM Embedding Layer
# x = LSTM(units=256, return_sequences=False)(inputs)  # Return the final state output as embedding
#
# # Optionally add dense layers after LSTM for better feature processing
# x = Dense(units=128, activation='leaky_relu')(x)
#
# x = Dense(units=64, activation='leaky_relu')(x)
# # Define the output layer for classification
# outputs = Dense(units=2, activation='softmax')(x)
# Stack multiple LSTM layers
x = LSTM(units=256, return_sequences=True)(inputs)  # Outputs full sequence
x = LSTM(units=128, return_sequences=True)(x)  # Another stacked LSTM
x = LSTM(units=32, return_sequences=False)(x)  # Final LSTM only provides the final state

# Optionally add Dense layers on top
x = Dense(64, activation='relu')(x)
x = Dropout(0.3)(x)
outputs = Dense(2, activation='softmax')(x)


# Create the model
lstm = Model(inputs=inputs, outputs=outputs)

# Compile the model
lstm.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Print summary
lstm.summary()


In [13]:
import tensorflow as tf
from keras.api.callbacks import LearningRateScheduler, EarlyStopping, Callback


X_train_reshaped = dataset.train.features.reshape(-1, 1, dataset.train.features.shape[1])
X_test_reshaped  = dataset.test.features.reshape(-1, 1, dataset.test.features.shape[1])


# Assuming dataset.train.features and dataset.train.labels are re-shaped correctly
with tf.device('/gpu:0'):
    lr_scheduler = LearningRateScheduler(lambda epoch: 0.0001 * (0.9 ** epoch))
    early_stopping = EarlyStopping(patience=2, monitor='loss', restore_best_weights=True)
    lstm.fit(X_train_reshaped, dataset.train.labels,
              validation_data=(X_test_reshaped, dataset.test.labels),
              epochs=20, batch_size=64,
                callbacks=[lr_scheduler, early_stopping],
              verbose=2)



Epoch 1/20
521/521 - 14s - 27ms/step - accuracy: 0.5292 - loss: 0.6898 - val_accuracy: 0.5146 - val_loss: 0.6910 - learning_rate: 1.0000e-04
Epoch 2/20
521/521 - 14s - 26ms/step - accuracy: 0.5338 - loss: 0.6891 - val_accuracy: 0.5173 - val_loss: 0.6908 - learning_rate: 9.0000e-05
Epoch 3/20
521/521 - 13s - 26ms/step - accuracy: 0.5351 - loss: 0.6890 - val_accuracy: 0.5149 - val_loss: 0.6908 - learning_rate: 8.1000e-05
Epoch 4/20
521/521 - 14s - 28ms/step - accuracy: 0.5367 - loss: 0.6885 - val_accuracy: 0.5186 - val_loss: 0.6906 - learning_rate: 7.2900e-05
Epoch 5/20
521/521 - 14s - 26ms/step - accuracy: 0.5346 - loss: 0.6883 - val_accuracy: 0.5130 - val_loss: 0.6907 - learning_rate: 6.5610e-05
Epoch 6/20
521/521 - 14s - 27ms/step - accuracy: 0.5352 - loss: 0.6881 - val_accuracy: 0.5154 - val_loss: 0.6905 - learning_rate: 5.9049e-05
Epoch 7/20
521/521 - 13s - 26ms/step - accuracy: 0.5392 - loss: 0.6879 - val_accuracy: 0.5170 - val_loss: 0.6914 - learning_rate: 5.3144e-05
Epoch 8/20
52

In [10]:
# 128 LSTM without dropout
evaluate(lstm, X_test_reshaped, dataset.test.labels)

[1m116/116[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
## Accuracy: 0.5197297297297298

## F1 Score: 0.5005535773631486




# No Cloud | With Triangulation

In [3]:
with open("/Users/eden.yavin/Projects/Tabular-Cloud-ML/store/dataset_wo_cl_wo_em.pkl", "rb") as f:
    dataset = pickle.load(f)

## Dense IIN

In [4]:
try:
    del model
except:
    pass

inputs = Input(shape=(dataset.train.features.shape[1],))  # Dynamic input shape

x = BatchNormalization()(inputs)
x = Dense(units=2048, activation='leaky_relu')(x)
x = Dropout(0.1)(x)

x = BatchNormalization()(inputs)
x = Dense(units=1024, activation='leaky_relu')(x)
x = Dropout(0.1)(x)

x = BatchNormalization()(x)
x = Dense(units=512, activation='leaky_relu')(x)
x = Dropout(0.1)(x)

x = BatchNormalization()(x)
x = Dense(units=256, activation='leaky_relu')(x)
x = Dropout(0.1)(x)

x = BatchNormalization()(x)
x = Dense(units=128, activation='leaky_relu')(x)
x = Dropout(0.1)(x)

# Define the output layer
outputs = Dense(units=2, activation='softmax')(x)

# Create the model
model = Model(inputs=inputs, outputs=outputs)

# Compile the model with F1 Score
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy']
              )
model.summary()

2025-05-12 19:52:24.477151: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M3 Pro
2025-05-12 19:52:24.477337: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 36.00 GB
2025-05-12 19:52:24.477344: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 13.50 GB
I0000 00:00:1747068744.477811 1363900 pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
I0000 00:00:1747068744.477870 1363900 pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [5]:
import tensorflow as tf
with tf.device('/gpu:0'):
    lr_scheduler = LearningRateScheduler(lambda epoch: 0.0001 * (0.9 ** epoch))
    early_stopping = EarlyStopping(patience=2, monitor='loss', retrieve_best_weights=True)
    model.fit(dataset.train.features, dataset.train.labels, validation_data=(dataset.test.features, dataset.test.labels),
 epochs=20, batch_size=64, callbacks=[lr_scheduler, early_stopping], verbose=2)

Epoch 1/20


2025-05-12 19:52:45.315001: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


521/521 - 16s - 31ms/step - accuracy: 0.5118 - loss: 0.7741 - val_accuracy: 0.5246 - val_loss: 0.6940 - learning_rate: 1.0000e-04
Epoch 2/20
521/521 - 14s - 26ms/step - accuracy: 0.5119 - loss: 0.7268 - val_accuracy: 0.5162 - val_loss: 0.6979 - learning_rate: 9.0000e-05
Epoch 3/20
521/521 - 14s - 26ms/step - accuracy: 0.5201 - loss: 0.7125 - val_accuracy: 0.5197 - val_loss: 0.7037 - learning_rate: 8.1000e-05
Epoch 4/20
521/521 - 14s - 27ms/step - accuracy: 0.5203 - loss: 0.7083 - val_accuracy: 0.5151 - val_loss: 0.6959 - learning_rate: 7.2900e-05
Epoch 5/20
521/521 - 14s - 26ms/step - accuracy: 0.5240 - loss: 0.7029 - val_accuracy: 0.5173 - val_loss: 0.6935 - learning_rate: 6.5610e-05
Epoch 6/20
521/521 - 13s - 26ms/step - accuracy: 0.5222 - loss: 0.7020 - val_accuracy: 0.5208 - val_loss: 0.6922 - learning_rate: 5.9049e-05
Epoch 7/20
521/521 - 13s - 25ms/step - accuracy: 0.5300 - loss: 0.6980 - val_accuracy: 0.5200 - val_loss: 0.6967 - learning_rate: 5.3144e-05
Epoch 8/20
521/521 - 13s

In [6]:
evaluate(model, dataset.test.features, dataset.test.labels)

[1m116/116[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
## Accuracy: 0.5254054054054054

## F1 Score: 0.5156159651403336


None


## LSTM

In [7]:
from keras.api.layers import LSTM, RepeatVector

try:
    del lstm
except:
    pass
# Define input shape with sequential data
# 512 is the shape of the clip embedding that is why I'm choosing that
inputs = Input(shape=(1, dataset.train.features.shape[1]))  # Add timesteps dimension

# LSTM Embedding Layer
x = LSTM(units=128, return_sequences=False)(inputs)  # Return the final state output as embedding

# Optionally add dense layers after LSTM for better feature processing
x = Dense(units=128, activation='leaky_relu')(x)

x = Dense(units=64, activation='leaky_relu')(x)
# Define the output layer for classification
outputs = Dense(units=2, activation='softmax')(x)

# Create the model
lstm = Model(inputs=inputs, outputs=outputs)

# Compile the model
lstm.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Print summary
lstm.summary()


In [9]:
import tensorflow as tf
from keras.api.callbacks import LearningRateScheduler, EarlyStopping, Callback


X_train_reshaped = dataset.train.features.reshape(-1, 1, dataset.train.features.shape[1])
X_test_reshaped  = dataset.test.features.reshape(-1, 1, dataset.test.features.shape[1])


# Assuming dataset.train.features and dataset.train.labels are re-shaped correctly
with tf.device('/gpu:0'):
    lr_scheduler = LearningRateScheduler(lambda epoch: 0.0001 * (0.9 ** epoch))
    early_stopping = EarlyStopping(patience=2, monitor='loss', restore_best_weights=True)
    lstm.fit(X_train_reshaped, dataset.train.labels,
              validation_data=(X_test_reshaped, dataset.test.labels),
              epochs=50, batch_size=64,
                callbacks=[lr_scheduler, early_stopping],
              verbose=2)



Epoch 1/20
521/521 - 8s - 16ms/step - accuracy: 0.5345 - loss: 0.6881 - val_accuracy: 0.5219 - val_loss: 0.6906 - learning_rate: 1.0000e-04
Epoch 2/20
521/521 - 8s - 15ms/step - accuracy: 0.5381 - loss: 0.6873 - val_accuracy: 0.5184 - val_loss: 0.6918 - learning_rate: 9.0000e-05
Epoch 3/20
521/521 - 8s - 15ms/step - accuracy: 0.5371 - loss: 0.6866 - val_accuracy: 0.5249 - val_loss: 0.6924 - learning_rate: 8.1000e-05
Epoch 4/20
521/521 - 8s - 15ms/step - accuracy: 0.5384 - loss: 0.6868 - val_accuracy: 0.5200 - val_loss: 0.6928 - learning_rate: 7.2900e-05
Epoch 5/20
521/521 - 8s - 15ms/step - accuracy: 0.5362 - loss: 0.6863 - val_accuracy: 0.5241 - val_loss: 0.6902 - learning_rate: 6.5610e-05
Epoch 6/20
521/521 - 8s - 15ms/step - accuracy: 0.5384 - loss: 0.6861 - val_accuracy: 0.5208 - val_loss: 0.6903 - learning_rate: 5.9049e-05
Epoch 7/20
521/521 - 8s - 15ms/step - accuracy: 0.5423 - loss: 0.6858 - val_accuracy: 0.5232 - val_loss: 0.6916 - learning_rate: 5.3144e-05
Epoch 8/20
521/521 -

In [10]:
evaluate(lstm, X_test_reshaped, dataset.test.labels)

[1m116/116[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
## Accuracy: 0.5259459459459459

## F1 Score: 0.517483401363579




In [11]:
dataset.train.features.shape

(33300, 3584)

# With Embedding | No Cloud | With Triangulation Dataset

In [93]:
with open("/Users/eden.yavin/Projects/Tabular-Cloud-ML/store/dataset_wo_cl_w_em.pkl", "rb") as f:
    dataset = pickle.load(f)

## Dense IIM


In [94]:
try:
    del model
except:
    pass

inputs = Input(shape=(dataset.train.features.shape[1],))  # Dynamic input shape

x = BatchNormalization()(inputs)
x = Dense(units=2048, activation='leaky_relu')(x)
x = Dropout(0.1)(x)

x = BatchNormalization()(inputs)
x = Dense(units=1024, activation='leaky_relu')(x)
x = Dropout(0.1)(x)

x = BatchNormalization()(x)
x = Dense(units=512, activation='leaky_relu')(x)
x = Dropout(0.1)(x)

x = BatchNormalization()(x)
x = Dense(units=256, activation='leaky_relu')(x)
x = Dropout(0.1)(x)

x = BatchNormalization()(x)
x = Dense(units=128, activation='leaky_relu')(x)
x = Dropout(0.1)(x)

# Define the output layer
outputs = Dense(units=2, activation='softmax')(x)

# Create the model
model = Model(inputs=inputs, outputs=outputs)

# Compile the model with F1 Score
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy']
              )
model.summary()

In [95]:
import tensorflow as tf
with tf.device('/gpu:0'):
    lr_scheduler = LearningRateScheduler(lambda epoch: 0.0001 * (0.9 ** epoch))
    early_stopping = EarlyStopping(patience=2, monitor='loss', restore_best_weights=True)
    model.fit(dataset.train.features, dataset.train.labels, validation_data=(dataset.test.features, dataset.test.labels), epochs=20, batch_size=64, callbacks=[lr_scheduler, early_stopping], verbose=2)

Epoch 1/20
521/521 - 15s - 30ms/step - accuracy: 0.5642 - loss: 0.7377 - val_accuracy: 0.6197 - val_loss: 0.6631 - learning_rate: 1.0000e-04
Epoch 2/20
521/521 - 13s - 26ms/step - accuracy: 0.6264 - loss: 0.6569 - val_accuracy: 0.6503 - val_loss: 0.6264 - learning_rate: 9.0000e-05
Epoch 3/20
521/521 - 13s - 26ms/step - accuracy: 0.6584 - loss: 0.6191 - val_accuracy: 0.6803 - val_loss: 0.5969 - learning_rate: 8.1000e-05
Epoch 4/20
521/521 - 13s - 26ms/step - accuracy: 0.6820 - loss: 0.5929 - val_accuracy: 0.6673 - val_loss: 0.6026 - learning_rate: 7.2900e-05
Epoch 5/20
521/521 - 14s - 26ms/step - accuracy: 0.6978 - loss: 0.5760 - val_accuracy: 0.6916 - val_loss: 0.5774 - learning_rate: 6.5610e-05
Epoch 6/20
521/521 - 13s - 26ms/step - accuracy: 0.7129 - loss: 0.5588 - val_accuracy: 0.7005 - val_loss: 0.5753 - learning_rate: 5.9049e-05
Epoch 7/20
521/521 - 14s - 26ms/step - accuracy: 0.7217 - loss: 0.5448 - val_accuracy: 0.7038 - val_loss: 0.5724 - learning_rate: 5.3144e-05
Epoch 8/20
52

In [96]:
print(evaluate(model, dataset.test.features, dataset.test.labels))


[1m116/116[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
## Accuracy: 0.7105405405405405

## F1 Score: 0.7084809338497355


(0.7105405405405405, 0.7084809338497355)


## LSTM

In [97]:
from keras.api.layers import LSTM, RepeatVector

try:
    del lstm
except:
    pass
# Define input shape with sequential data
# 512 is the shape of the clip embedding that is why I'm choosing that
inputs = Input(shape=(1, dataset.train.features.shape[1]))  # Add timesteps dimension

# LSTM Embedding Layer
x = LSTM(units=128, return_sequences=False)(inputs)  # Return the final state output as embedding

# Optionally add dense layers after LSTM for better feature processing
x = Dense(units=128, activation='leaky_relu')(x)

x = Dense(units=64, activation='leaky_relu')(x)
# Define the output layer for classification
outputs = Dense(units=2, activation='softmax')(x)

# Create the model
lstm = Model(inputs=inputs, outputs=outputs)

# Compile the model
lstm.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Print summary
lstm.summary()


In [99]:
import tensorflow as tf
from keras.api.callbacks import LearningRateScheduler, EarlyStopping, Callback


X_train_reshaped = dataset.train.features.reshape(-1, 1, dataset.train.features.shape[1])
X_test_reshaped  = dataset.test.features.reshape(-1, 1, dataset.test.features.shape[1])


# Assuming dataset.train.features and dataset.train.labels are re-shaped correctly
with tf.device('/gpu:0'):
    lr_scheduler = LearningRateScheduler(lambda epoch: 0.0001 * (0.9 ** epoch))
    early_stopping = EarlyStopping(patience=2, monitor='loss', restore_best_weights=True)
    lstm.fit(X_train_reshaped, dataset.train.labels,
              validation_data=(X_test_reshaped, dataset.test.labels),
              epochs=20, batch_size=64,
                callbacks=[lr_scheduler, early_stopping],
              verbose=2)



Epoch 1/20
521/521 - 8s - 16ms/step - accuracy: 0.6114 - loss: 0.6559 - val_accuracy: 0.5981 - val_loss: 0.6532 - learning_rate: 1.0000e-04
Epoch 2/20
521/521 - 8s - 15ms/step - accuracy: 0.6380 - loss: 0.6355 - val_accuracy: 0.6616 - val_loss: 0.6273 - learning_rate: 9.0000e-05
Epoch 3/20
521/521 - 8s - 15ms/step - accuracy: 0.6542 - loss: 0.6204 - val_accuracy: 0.5951 - val_loss: 0.6600 - learning_rate: 8.1000e-05
Epoch 4/20
521/521 - 8s - 15ms/step - accuracy: 0.6681 - loss: 0.6069 - val_accuracy: 0.6781 - val_loss: 0.6069 - learning_rate: 7.2900e-05
Epoch 5/20
521/521 - 8s - 15ms/step - accuracy: 0.6802 - loss: 0.5968 - val_accuracy: 0.6732 - val_loss: 0.6135 - learning_rate: 6.5610e-05
Epoch 6/20
521/521 - 8s - 15ms/step - accuracy: 0.6808 - loss: 0.5936 - val_accuracy: 0.6808 - val_loss: 0.6085 - learning_rate: 5.9049e-05
Epoch 7/20
521/521 - 8s - 15ms/step - accuracy: 0.6925 - loss: 0.5829 - val_accuracy: 0.6803 - val_loss: 0.6065 - learning_rate: 5.3144e-05
Epoch 8/20
521/521 -

In [100]:
# 128 LSTM without dropout
evaluate(lstm, X_test_reshaped, dataset.test.labels)

[1m116/116[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
## Accuracy: 0.727027027027027

## F1 Score: 0.7265895703395703




(0.727027027027027, 0.7265895703395703)

# With Embedding | With Cloud | With Triangulation Dataset

In [7]:
with open("/Users/eden.yavin/Projects/Tabular-Cloud-ML/store/dataset_w_cl_w_em.pkl", "rb") as f:
    dataset = pickle.load(f)

## Dense

In [8]:
from keras.src.metrics import F1Score
from keras.src.callbacks import LearningRateScheduler
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.metrics import accuracy_score, f1_score
from keras.src.models import Model
from keras.src.callbacks import EarlyStopping
from keras.src.layers import Dropout, Dense, BatchNormalization, Input, Flatten


try:
    del model
except:
    pass

inputs = Input(shape=(dataset.train.features.shape[1],))  # Dynamic input shape

x = BatchNormalization()(inputs)
x = Dense(units=2048, activation='leaky_relu')(x)
x = Dropout(0.1)(x)

x = BatchNormalization()(inputs)
x = Dense(units=1024, activation='leaky_relu')(x)
x = Dropout(0.1)(x)

x = BatchNormalization()(x)
x = Dense(units=512, activation='leaky_relu')(x)
x = Dropout(0.1)(x)

x = BatchNormalization()(x)
x = Dense(units=256, activation='leaky_relu')(x)
x = Dropout(0.1)(x)

x = BatchNormalization()(x)
x = Dense(units=128, activation='leaky_relu')(x)
x = Dropout(0.1)(x)

# Define the output layer
outputs = Dense(units=2, activation='softmax')(x)

# Create the model
model = Model(inputs=inputs, outputs=outputs)

# Compile the model with F1 Score
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy']
              )
model.summary()

In [9]:
import tensorflow as tf
with tf.device('/gpu:0'):
    lr_scheduler = LearningRateScheduler(lambda epoch: 0.0001 * (0.9 ** epoch))
    early_stopping = EarlyStopping(patience=2, monitor='loss', restore_best_weights=True)
    model.fit(dataset.train.features, dataset.train.labels, validation_data=(dataset.test.features, dataset.test.labels), epochs=20, batch_size=64, callbacks=[lr_scheduler, early_stopping], verbose=2)

Epoch 1/20
521/521 - 16s - 31ms/step - accuracy: 0.5559 - loss: 0.7425 - val_accuracy: 0.5800 - val_loss: 0.6848 - learning_rate: 1.0000e-04
Epoch 2/20
521/521 - 14s - 27ms/step - accuracy: 0.6180 - loss: 0.6639 - val_accuracy: 0.6603 - val_loss: 0.6173 - learning_rate: 9.0000e-05
Epoch 3/20
521/521 - 15s - 28ms/step - accuracy: 0.6550 - loss: 0.6241 - val_accuracy: 0.6900 - val_loss: 0.5779 - learning_rate: 8.1000e-05
Epoch 4/20
521/521 - 14s - 26ms/step - accuracy: 0.6894 - loss: 0.5904 - val_accuracy: 0.6303 - val_loss: 0.6533 - learning_rate: 7.2900e-05
Epoch 5/20
521/521 - 14s - 26ms/step - accuracy: 0.7062 - loss: 0.5709 - val_accuracy: 0.6565 - val_loss: 0.6329 - learning_rate: 6.5610e-05
Epoch 6/20
521/521 - 14s - 26ms/step - accuracy: 0.7150 - loss: 0.5556 - val_accuracy: 0.7049 - val_loss: 0.5667 - learning_rate: 5.9049e-05
Epoch 7/20
521/521 - 14s - 26ms/step - accuracy: 0.7244 - loss: 0.5415 - val_accuracy: 0.6978 - val_loss: 0.5780 - learning_rate: 5.3144e-05
Epoch 8/20
52

In [12]:
evaluate(model,dataset.test.features, dataset.test.labels)

[1m116/116[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
## Accuracy: 0.7313513513513513

## F1 Score: 0.7307281512596429




## LSTM

In [13]:
from keras.api.layers import LSTM, RepeatVector

try:
    del lstm
except:
    pass
# Define input shape with sequential data
# 512 is the shape of the clip embedding that is why I'm choosing that
inputs = Input(shape=(1, dataset.train.features.shape[1]))  # Add timesteps dimension

# LSTM Embedding Layer
x = LSTM(units=128, return_sequences=False)(inputs)  # Return the final state output as embedding

# Optionally add dense layers after LSTM for better feature processing
x = Dense(units=128, activation='leaky_relu')(x)

x = Dense(units=64, activation='leaky_relu')(x)
# Define the output layer for classification
outputs = Dense(units=2, activation='softmax')(x)

# Create the model
lstm = Model(inputs=inputs, outputs=outputs)

# Compile the model
lstm.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Print summary
lstm.summary()


In [14]:
import tensorflow as tf
from keras.api.callbacks import LearningRateScheduler, EarlyStopping, Callback


X_train_reshaped = dataset.train.features.reshape(-1, 1, dataset.train.features.shape[1])
X_test_reshaped  = dataset.test.features.reshape(-1, 1, dataset.test.features.shape[1])


# Assuming dataset.train.features and dataset.train.labels are re-shaped correctly
with tf.device('/gpu:0'):
    lr_scheduler = LearningRateScheduler(lambda epoch: 0.0001 * (0.9 ** epoch))
    early_stopping = EarlyStopping(patience=2, monitor='loss', restore_best_weights=True)
    lstm.fit(X_train_reshaped, dataset.train.labels,
              validation_data=(X_test_reshaped, dataset.test.labels),
              epochs=20, batch_size=64,
                callbacks=[lr_scheduler, early_stopping],
              verbose=2)


Epoch 1/20
521/521 - 10s - 19ms/step - accuracy: 0.5190 - loss: 0.6920 - val_accuracy: 0.5214 - val_loss: 0.6907 - learning_rate: 1.0000e-04
Epoch 2/20
521/521 - 9s - 17ms/step - accuracy: 0.5410 - loss: 0.6871 - val_accuracy: 0.5376 - val_loss: 0.6877 - learning_rate: 9.0000e-05
Epoch 3/20
521/521 - 9s - 16ms/step - accuracy: 0.5576 - loss: 0.6819 - val_accuracy: 0.5508 - val_loss: 0.6791 - learning_rate: 8.1000e-05
Epoch 4/20
521/521 - 9s - 16ms/step - accuracy: 0.5942 - loss: 0.6640 - val_accuracy: 0.5576 - val_loss: 0.6736 - learning_rate: 7.2900e-05
Epoch 5/20
521/521 - 9s - 16ms/step - accuracy: 0.6367 - loss: 0.6374 - val_accuracy: 0.6438 - val_loss: 0.6341 - learning_rate: 6.5610e-05
Epoch 6/20
521/521 - 8s - 16ms/step - accuracy: 0.6577 - loss: 0.6191 - val_accuracy: 0.6719 - val_loss: 0.6155 - learning_rate: 5.9049e-05
Epoch 7/20
521/521 - 9s - 17ms/step - accuracy: 0.6648 - loss: 0.6100 - val_accuracy: 0.6578 - val_loss: 0.6295 - learning_rate: 5.3144e-05
Epoch 8/20
521/521 

In [15]:
evaluate(lstm, X_test_reshaped, dataset.test.labels)

[1m116/116[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
## Accuracy: 0.7089189189189189

## F1 Score: 0.7065706049708702


