In [None]:
import tensorflow as tf### models
import numpy as np### math computations
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Layer
from tensorflow.keras.layers import (Dense,Flatten,SimpleRNN,InputLayer,Conv1D,Bidirectional,GRU,LSTM,BatchNormalization,Dropout,Input,GlobalMaxPooling1D,Embedding,TextVectorization,LayerNormalization,MultiHeadAttention)
from tensorflow.keras.losses import BinaryCrossentropy,CategoricalCrossentropy, SparseCategoricalCrossentropy
from tensorflow.keras.metrics import Accuracy,TopKCategoricalAccuracy, CategoricalAccuracy, SparseCategoricalAccuracy
from tensorflow.keras.optimizers import Adam
import pandas as pd
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences


In [None]:
df = pd.read_csv('../content/twitter_training.csv')
df.columns = ['1','2','Sentiment','Sentence']
df.drop(['1','2'],axis = 1,inplace = True)
Y = np.array(pd.get_dummies(df['Sentiment']))
classes = list(pd.get_dummies(df['Sentiment']))
Y_train = []
sen = []
sentences = list(df['Sentence'])
for i in range(len(sentences)):
  if type(sentences[i]) != str:
    pass
  else:
    sentences[i] = sentences[i].lower()
    Y_train.append(Y[i])
    sen.append(sentences[i])
tokenizer = Tokenizer()
tokenizer.fit_on_texts(sen)
seq = tokenizer.texts_to_sequences(sen)
X_train = pad_sequences(seq , padding = 'post')
seq_len = X_train.shape[1]
vocab_size = len(tokenizer.word_index) + 1
Y_train = np.array(np.argmax(Y_train,axis = 1))

In [None]:

def positional_encoding(seq_len , model_size):
  output = []
  for pos in range(seq_len):
    PE = np.zeros(model_size)
    for i in range(model_size):
      if i % 2 == 0:
        PE[i] = np.sin(pos / (10000 ** (i/model_size)))
      else:
        PE[i] = np.cos(pos / (10000 ** ((i-1)/model_size)))

    output.append(PE)
    out = np.expand_dims(output , axis = 0)

  return out

class Embeddings(Layer):
  def __init__(self , vocab_size , seq_len , model_size):
    super(Embeddings, self).__init__()
    self.emb = Embedding(input_dim = vocab_size , output_dim = model_size)
    self.pos_encoding = positional_encoding(seq_len,model_size)

  def call(self,input):
    embs = self.emb(input)
    return (self.pos_encoding + embs)

  def compute_masks(self,input):
    mask = tf.math.not_equal(input , 0)
    mask = tf.cast(mask[:,tf.newaxis,:],tf.int32)
    T = tf.shape(mask)[2]
    mask = tf.repeat(mask , T , axis = 1)

    return mask

class TransformerEncoder(Layer):
  def __init__(self, num_heads , emb_dim , dense_dim):
    super(TransformerEncoder,self).__init__()
    self.layernorm_1 = LayerNormalization()
    self.layernorm_2 = LayerNormalization()
    self.dense = tf.keras.Sequential([
        Dense(dense_dim,activation = 'relu'),
        Dense(emb_dim)
    ])
    self.attn = MultiHeadAttention(num_heads=num_heads,key_dim=emb_dim)

  def call(self,inputs , mask):
    attn_out = self.attn(query = inputs , key = inputs , value = inputs , attention_mask = mask)
    out = self.layernorm_1(attn_out + inputs)

    dense_out = self.dense(out)

    return self.layernorm_2(dense_out + out)



In [None]:
EMBEDDING_DIM=256
D_FF=1024
NUM_HEADS=4
NUM_LAYERS=1
NUM_EPOCHS=20

In [None]:
encoder_input=Input(shape=(None,))
emb = Embeddings(vocab_size,seq_len,EMBEDDING_DIM)
x = emb(encoder_input)
padding_mask = emb.compute_masks(encoder_input)

for _ in range(NUM_LAYERS):
  x=TransformerEncoder(NUM_HEADS,EMBEDDING_DIM,D_FF)(x,padding_mask)

x = Flatten()(x)
x = Dropout(0.5)(x)
output=Dense(4, activation="softmax")(x)

transformer = tf.keras.Model(
    encoder_input, output
)
transformer.summary()

Model: "model_5"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_6 (InputLayer)        [(None, None)]               0         []                            
                                                                                                  
 tf.math.not_equal_5 (TFOpL  (None, None)                 0         ['input_6[0][0]']             
 ambda)                                                                                           
                                                                                                  
 tf.__operators__.getitem_1  (None, 1, None)              0         ['tf.math.not_equal_5[0][0]'] 
 0 (SlicingOpLambda)                                                                              
                                                                                            

## Training

In [None]:
transformer.compile(loss='sparse_categorical_crossentropy',
              optimizer=tf.keras.optimizers.Adam(),
              metrics=['accuracy'])

In [None]:
history=transformer.fit(
    X_train[:1000] , Y_train[:1000],
    epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
