In [1]:
import tensorflow as tf
import numpy as np
from tensorflow.keras.layers import (
    Input, Embedding, Dense, Dropout,
    LayerNormalization, Add, MultiHeadAttention, Flatten, Layer , GlobalAveragePooling1D
) # Added Layer import
from tensorflow.keras import Model # Moved Model import here
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import pandas as pd

# Creating the dataset for training
df = pd.read_csv('../content/twitter_training.csv')
df.columns = ['1','2','Sentiment','Sentence']
df.drop(['1','2'],axis = 1,inplace = True)
Y = np.array(pd.get_dummies(df['Sentiment']))
classes = list(pd.get_dummies(df['Sentiment']))
Y_train = []
sen = []
sentences = list(df['Sentence'])
for i in range(len(sentences)):
  if type(sentences[i]) != str:
    pass
  else:
    sentences[i] = sentences[i].lower()
    Y_train.append(Y[i])
    sen.append(sentences[i])
tokenizer = Tokenizer()
tokenizer.fit_on_texts(sen)
seq = tokenizer.texts_to_sequences(sen)
X_train = pad_sequences(seq , padding = 'post')
seq_len = X_train.shape[1]
vocab_size = len(tokenizer.word_index) + 1
Y_train = np.array(np.argmax(Y_train,axis = 1))


def positional_encoding(seq_len , model_size):
  output = []
  for pos in range(seq_len):
    PE = np.zeros(model_size)
    for i in range(model_size):
      if i % 2 == 0:
        PE[i] = np.sin(pos / (10000 ** (i/model_size)))
      else:
        PE[i] = np.cos(pos / (10000 ** ((i-1)/model_size)))

    output.append(PE)
  out = np.expand_dims(output , axis = 0)

  return out

# Wrap create_padding_mask in a Keras Layer
class PaddingMaskLayer(Layer):
  def call(self, input):
      mask = tf.math.not_equal(input, 0)
      expanded_mask = tf.expand_dims(mask, axis=1)
      return expanded_mask

# --- Encoder Block (Modified to accept encoder_padding_mask) ---
def encoder_block(x, num_heads, d_model, d_ff, encoder_padding_mask, dropout=0.1):
    # Self-attention with padding mask
    # The attention_mask should be a boolean tensor where True indicates positions to be masked.
    attn_output = MultiHeadAttention(
        num_heads=num_heads,
        key_dim=d_model
    )(x, x , attention_mask = encoder_padding_mask) # Pass the correct padding mask

    attn_normalized_output = LayerNormalization()(attn_output + x)
    attn_normalized_output = Dropout(dropout)(attn_normalized_output)

    ffn_expanded = Dense(d_ff, activation='relu')(attn_normalized_output)
    ffn_projected = Dense(d_model)(ffn_expanded)

    return LayerNormalization()(ffn_projected + attn_normalized_output)


# Hyperparameters
vocab_size = len(tokenizer.word_index) + 1
max_len = X_train.shape[1]
d_model = 256
num_heads = 4
d_ff = 1028
num_layers = 1

# --- Model Construction ---

# Encoder Inputs
encoder_inputs = Input(shape=(max_len,), name="encoder_input", dtype=tf.int32)
# Create encoder padding mask using the custom layer
enc_padding_mask_tensor = PaddingMaskLayer()(encoder_inputs)

enc_embed = Embedding(
    vocab_size, d_model
)(encoder_inputs)

enc_embed += positional_encoding(max_len, d_model)

x = enc_embed
for _ in range(num_layers):
    # Pass the tensor mask, not the lambda function
    x = encoder_block(x, num_heads, d_model, d_ff, enc_padding_mask_tensor)

encoder_output = x

x = Flatten()(encoder_output)
x = Dropout(0.2)(x)

outputs = Dense(4, activation="softmax")(
    x
)

model = Model(
    inputs=encoder_inputs,
    outputs=outputs
)

model.compile(
    optimizer="adam",
    loss="sparse_categorical_crossentropy",
    metrics=["accuracy"]
)

model.summary()

In [2]:
model.fit(X_train[:1000],Y_train[:1000] ,epochs=10)

Epoch 1/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m67s[0m 2s/step - accuracy: 0.2888 - loss: 13.3474
Epoch 2/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 2s/step - accuracy: 0.3199 - loss: 3.4438
Epoch 3/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m63s[0m 2s/step - accuracy: 0.3271 - loss: 2.3960
Epoch 4/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m63s[0m 2s/step - accuracy: 0.3188 - loss: 1.9062
Epoch 5/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 2s/step - accuracy: 0.3170 - loss: 1.7902
Epoch 6/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m80s[0m 2s/step - accuracy: 0.3692 - loss: 1.8371
Epoch 7/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 2s/step - accuracy: 0.7234 - loss: 0.6734
Epoch 8/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m63s[0m 2s/step - accuracy: 0.8649 - loss: 0.3889
Epoch 9/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m

<keras.src.callbacks.history.History at 0x7ea9cb09ffb0>

In [3]:
pred = np.argmax(model.predict(X_train[1001].reshape(1,-1)),axis = 1)
print(Y_train[1001],pred)

from sklearn.metrics import accuracy_score,classification_report
pred = np.argmax(model.predict(X_train[:1000]),axis = 1)
print(accuracy_score(Y_train[:1000],pred))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 347ms/step
1 [1]
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 545ms/step
0.985


In [4]:
def padding_mask(inputs):
  mask = tf.math.not_equal(inputs, 0)
  expanded_mask = tf.expand_dims(mask, axis=1)
  return expanded_mask
def causal_mask(inputs):
  mask = tf.linalg.band_part(tf.ones((tf.shape(inputs)[1],tf.shape(inputs)[1]),dtype=tf.bool), -1, 0)
  mask = tf.expand_dims(mask, axis=0)
  pad_mask = padding_mask(inputs)
  return tf.logical_and(mask, pad_mask)

causal_mask(np.array([[1,2,4,7,0,0,0],
                      [2,6,8,5,3,8,0]]))

<tf.Tensor: shape=(2, 7, 7), dtype=bool, numpy=
array([[[ True, False, False, False, False, False, False],
        [ True,  True, False, False, False, False, False],
        [ True,  True,  True, False, False, False, False],
        [ True,  True,  True,  True, False, False, False],
        [ True,  True,  True,  True, False, False, False],
        [ True,  True,  True,  True, False, False, False],
        [ True,  True,  True,  True, False, False, False]],

       [[ True, False, False, False, False, False, False],
        [ True,  True, False, False, False, False, False],
        [ True,  True,  True, False, False, False, False],
        [ True,  True,  True,  True, False, False, False],
        [ True,  True,  True,  True,  True, False, False],
        [ True,  True,  True,  True,  True,  True, False],
        [ True,  True,  True,  True,  True,  True, False]]])>