In [1]:
import tensorflow as tf

from tensorflow.keras import layers

import numpy as np

2022-12-07 18:14:39.409241: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


In [2]:
class PositionalEmbedding(layers.Layer):
    def __init__(self, d_model, vocab_size, seq_len):
        super(PositionalEmbedding, self).__init__()
        self.seq_len = seq_len
        self.d_model = d_model
        self.embedding = layers.Embedding(vocab_size, d_model, input_length=seq_len, trainable=False)
        self.positions = self.build_pos_embedding()
        
    def build_pos_embedding(self):
        pos_e = np.zeros(shape=(self.seq_len, self.d_model))

        for idx, x in enumerate(pos_e.T): # idx is I
            for tidx, _ in enumerate(x):  # tidx is POS
                if idx % 2 == 0:
                    pos_e.T[idx][tidx] = np.sin(tidx / (10_000 ** (2 * idx / self.d_model)))
                else:
                    pos_e.T[idx][tidx] = np.cos(tidx / (10_000 ** (2 * idx / self.d_model)))

            return pos_e

    def call(self, inputs):
        return self.embedding(inputs) + self.positions

In [3]:
class SelfAttention(layers.Layer):
    def __init__(self, d_k, d_v):
        super(SelfAttention, self).__init__()
        self.d_k = d_k
        self.d_v = d_v

    def build(self, input_shape):
        self.wq = self.add_weight(shape=(input_shape[-1], self.d_k))
        self.wk = self.add_weight(shape=(input_shape[-1], self.d_k))
        self.wv = self.add_weight(shape=(input_shape[-1], self.d_v))

    def call(self, inputs):         
        q = tf.matmul(inputs, self.wq)
        k = tf.matmul(inputs, self.wk)
        v = tf.matmul(inputs, self.wv)

        qk = tf.matmul(q, tf.transpose(k, perm=[0, 2, 1]))
        sqk = qk / tf.sqrt(tf.cast(tf.shape(k)[-1], dtype=tf.float32))
        sqk = tf.math.softmax(sqk)
        z = tf.matmul(sqk, v)

        return z   

In [4]:
class MultiHeadAttention(layers.Layer):
    def __init__(self, d_k, d_v, num_heads):
        super(MultiHeadAttention, self).__init__()
        self.heads = [SelfAttention(d_k, d_v) for _ in range(num_heads)]
        self.d_model = d_k * num_heads

    def build(self, input_shape):
        self.wz = self.add_weight(shape=(input_shape[-1], self.d_model))

    def call(self, inputs):
        z_s = tf.concat([head(inputs) for head in self.heads], axis=2)
        z = tf.matmul(z_s, self.wz)

        return z   

In [5]:
class AddNorm(layers.Layer):
    def __init__(self):
        super(AddNorm, self).__init__()
        self.norm = layers.LayerNormalization()
        self.add = layers.Add()

    def call(self, x, y):
        z = self.norm(self.add([x, y]))
        
        return z   

In [6]:
class FeedForward(layers.Layer):
    def __init__(self, ff_d):
        super(FeedForward, self).__init__()
        self.ff_1 = layers.Dense(ff_d * 4)
        self.ff_2 = layers.Dense(ff_d, activation="relu")
    
    def call(self, inputs):
        x = self.ff_1(inputs)
        x = self.ff_2(x)

        return x

In [7]:
class EncoderLayer(layers.Layer):
    def __init__(self, d_k, d_v, num_heads, **kwargs):
        super(EncoderLayer, self).__init__(**kwargs)
        self.mha = MultiHeadAttention(d_k, d_v, num_heads)
        self.add_norm = AddNorm()
        self.ff = FeedForward(d_k * num_heads)
        self.dropout1 = layers.Dropout(0.2)
        self.dropout2 = layers.Dropout(0.2)
        
    def call(self, inputs, training):        
        z = self.mha(inputs)
        z = self.dropout1(z, training=training)
        z = self.add_norm(z, inputs)
        y = self.ff(z)
        y = self.dropout2(y, training=training)
        x = self.add_norm(y, z)
        
        return x

In [8]:
class Encoder(layers.Layer):
    def __init__(self, d_k, d_v, num_heads, num_layers, **kwargs):
        super(Encoder, self).__init__(**kwargs)
        self.encoders = [EncoderLayer(d_k, d_v, num_heads, **kwargs) for _ in range(num_layers)]

    def call(self, inputs):
        x = self.encoders[0](inputs)
        for encoder in self.encoders[1:]:
            x = encoder(x)
            
        return x

In [9]:
import pandas as pd

In [10]:
df = pd.read_csv("data/spamming.csv")

In [11]:
tv = layers.TextVectorization(max_tokens=10_000, output_sequence_length=128)

2022-12-07 18:14:40.776719: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-12-07 18:14:40.801231: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-12-07 18:14:40.801374: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-12-07 18:14:40.802955: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compi

In [12]:
train = df.sample(frac=0.8, random_state=42)
test = df.drop(train.index)

In [13]:
tv.adapt(train.MESSAGE)

In [14]:
train_labels = train.CATEGORY.values
test_labels = test.CATEGORY.values

In [15]:
text_input = layers.Input(shape=(1, ), dtype=tf.string)
x = tv(text_input)
x = PositionalEmbedding(768, len(tv.get_vocabulary()), 128)(x)
x = Encoder(64, 64, 12, 5)(x)
x = layers.Flatten()(x)
x = layers.Dense(512, activation="relu")(x)
x = layers.Dropout(0.2)(x)
x = layers.Dense(1, activation="sigmoid")(x)

model = tf.keras.Model(text_input, x)

In [16]:
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 1)]               0         
                                                                 
 text_vectorization (TextVec  (None, 128)              0         
 torization)                                                     
                                                                 
 positional_embedding (Posit  (None, 128, 768)         7680000   
 ionalEmbedding)                                                 
                                                                 
 encoder (Encoder)           (None, 128, 768)          35416320  
                                                                 
 flatten (Flatten)           (None, 98304)             0         
                                                                 
 dense_10 (Dense)            (None, 512)               503321

In [17]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(1e-5),
    loss="binary_crossentropy",
    metrics="accuracy"
)

In [18]:
model.fit(train.MESSAGE.values, train_labels, epochs=15, validation_data=(test.MESSAGE.values, test_labels))

Epoch 1/15
  1/145 [..............................] - ETA: 22:00 - loss: 0.7024 - accuracy: 0.5625

2022-12-07 18:14:52.724387: I tensorflow/stream_executor/cuda/cuda_dnn.cc:384] Loaded cuDNN version 8600
2022-12-07 18:14:52.767288: I tensorflow/stream_executor/cuda/cuda_blas.cc:1786] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.


Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<keras.callbacks.History at 0x7f41a019fd90>