In [92]:
import datetime

import os

os.getcwd()
if '.git' not in os.listdir():
    os.chdir('./../../../../')
os.listdir()

from ml.ml_core.src.core.model_factory import model_factory

import keras_core as keras
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorboard.plugins.hparams import api as hp


## Configurations

In [93]:
params_grid = {
    "model": "vision-transformer",
    "embedder": "fasttext",
    "embedding_data": "fasttext",
    #"dataset": "fr_hf.csv",
    "dataset": "corrected_format_all_data.csv",
    "batch_size": 500
}

In [94]:
data_folder_src = 'ml/ml_core/data/processed/'
ml_core_folder_path = 'ml/ml_core/'
data_path_src = f'{data_folder_src}{params_grid["dataset"]}'
date_str = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
experiment_name = f'deepwoke_{params_grid["embedder"]}_{params_grid["model"]}_{params_grid["dataset"]}_{date_str}'

log_dir = f"{ml_core_folder_path}log/fit/{experiment_name}"
model_weight_dst = f'{ml_core_folder_path}model_weights/{experiment_name}-model.keras'

In [95]:

vector_path = f'{ml_core_folder_path}embedded_vector/{params_grid["embedding_data"]}_{params_grid["dataset"]}'
vector_x_dst = f'{vector_path}.x.npy'
vector_y_dst = f'{vector_path}.y.npy'

In [96]:
print(vector_x_dst)
if os.path.exists(vector_x_dst):
    print("the file exists")
else:
    print("the file doesn't exist")

ml/ml_core/embedded_vector/fasttext_corrected_format_all_data.csv.x.npy
the file exists


# loading dataset

In [97]:
X = np.load(vector_x_dst, allow_pickle=True)
y = np.load(vector_y_dst)

In [98]:
if params_grid["embedder"] == "camembert":
    x_squeezed = np.squeeze(X)
    X = [x[:, 0, :] for x in x_squeezed]
    X = np.array(X)

In [99]:
if params_grid["embedder"] == "bert" or params_grid["embedder"] == "small-bert" or params_grid["embedder"] == "electra" or params_grid["embedder"] == "fasttext":
    X = np.expand_dims(X, axis=1)

# Model

## callbacks

In [100]:
early_stopping = keras.callbacks.EarlyStopping(
    patience=30,
    min_delta=0.0001,
    restore_best_weights=True,
    monitor='loss'
)


In [101]:

class EarlyStoppingLogging(keras.callbacks.Callback):
    def __init__(self, early_stopping_callback, log_dir):
        super().__init__()
        self.early_stopping = early_stopping_callback
        self.stopped_epoch = 0
        self.writer = tf.summary.create_file_writer(log_dir)

    def on_epoch_end(self, epoch, logs=None):
        if self.early_stopping.stopped_epoch > 0:
            self.stopped_epoch = self.early_stopping.stopped_epoch
            with self.writer.as_default():
                tf.summary.scalar('early_stopping_epoch', self.stopped_epoch, step=epoch)
                self.writer.flush()


In [102]:
tensorboard_callback = keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
early_stopping_logging_callback = EarlyStoppingLogging(early_stopping, log_dir)

In [103]:
callback = [
    early_stopping,
    tensorboard_callback,
    hp.KerasCallback(log_dir, params_grid),
    early_stopping_logging_callback,
]

## build the model

In [104]:
def model_builder(input_dim, output_dim):
    activation_function = 'softmax'
    loss = 'categorical_crossentropy'
    if output_dim == 1:
        activation_function = 'sigmoid'
        loss = 'binary_crossentropy'
    print(input_dim, output_dim)

    inputs = input_dim[1:]
    input_tensor = keras.layers.Input(shape=inputs)
    
    
    head_count = 6
    attention_embedding_dim = 16
    encoder_blocks = 3
    patch_embedding_size = head_count*attention_embedding_dim

    embedded_patches = keras.layers.Dense(patch_embedding_size, activation=keras.activations.linear)(input_tensor)

    sequence_length = embedded_patches.shape[1]

    reshaped_embedded_patches = keras.layers.Reshape((sequence_length, patch_embedding_size))(embedded_patches)

    encoder_input = reshaped_embedded_patches

    for l in range(encoder_blocks):
        normalized_reshaped_embedded_patches = keras.layers.LayerNormalization()(encoder_input)

        attention_outputs = []
        for _ in range(head_count):
            query_vector = keras.layers.Dense(attention_embedding_dim, activation=keras.activations.gelu)(
                normalized_reshaped_embedded_patches)
            key_vector = keras.layers.Dense(attention_embedding_dim, activation=keras.activations.gelu)(
                normalized_reshaped_embedded_patches)
            value_vector = keras.layers.Dense(attention_embedding_dim, activation=keras.activations.gelu)(
                normalized_reshaped_embedded_patches)

            transposed_key_vector = keras.ops.swapaxes(key_vector, 1, 2)

            mat_mul_result = keras.ops.matmul(query_vector, transposed_key_vector)

            scaled_mat_mul_result = mat_mul_result / keras.ops.sqrt(attention_embedding_dim)

            scores = keras.ops.softmax(scaled_mat_mul_result)

            attention_output = keras.ops.matmul(scores, value_vector)
            attention_outputs.append(attention_output)

        attention_outputs = keras.layers.Concatenate()(attention_outputs)

        residual_summed_tensor = keras.layers.Add()([reshaped_embedded_patches, attention_outputs])

        normalized_residual_summed_tensor = keras.layers.LayerNormalization()(residual_summed_tensor)

        hidden = keras.layers.Dense(patch_embedding_size, activation=keras.activations.gelu)(normalized_residual_summed_tensor)
        hidden = keras.layers.Dense(patch_embedding_size, activation=keras.activations.gelu)(hidden)

        residual_post_mlp_tensor = keras.layers.Add()([hidden, residual_summed_tensor])
        encoder_input = residual_post_mlp_tensor
        
    global_average_pooling = keras.layers.GlobalAveragePooling1D()(encoder_input)
    output = keras.layers.Dense(output_dim, activation=activation_function)(global_average_pooling)

    model = keras.models.Model(inputs=input_tensor, outputs=output)
    model.compile(optimizer='adam', loss=loss, metrics=['accuracy'])
    
    return model

### training the model

In [105]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [106]:
model = model_builder(X_train.shape, 1)
print(model.summary)

(11828, 1, 300) 1
<bound method Model.summary of <Functional name=functional_11, built=True>>


In [None]:
model.fit(X_train,
          y_train,
          epochs=1000,
          batch_size=params_grid['batch_size'],
          callbacks=callback,
          validation_data=(X_test, y_test)
          )

Epoch 1/1000
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 544ms/step - accuracy: 0.6129 - loss: 0.7490 - val_accuracy: 0.7456 - val_loss: 0.5079
Epoch 2/1000
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 44ms/step - accuracy: 0.7505 - loss: 0.5036 - val_accuracy: 0.7686 - val_loss: 0.4719
Epoch 3/1000
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 41ms/step - accuracy: 0.7833 - loss: 0.4613 - val_accuracy: 0.7899 - val_loss: 0.4481
Epoch 4/1000
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 42ms/step - accuracy: 0.8041 - loss: 0.4287 - val_accuracy: 0.7939 - val_loss: 0.4477
Epoch 5/1000
[1m22/24[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m0s[0m 13ms/step - accuracy: 0.8147 - loss: 0.3992

# save model

In [None]:
model.save(model_weight_dst)