Building the model

In [1]:
#Imports
import numpy as np
import joblib
import tensorflow as tf

2023-07-30 23:55:30.407592: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [16]:
#Load the data

#Train data
npz = np.load('../../../../../../data/processed/tweets_data/spanish/tweets_train_data.npz',  allow_pickle=True)
train_inputs, train_targets = npz['inputs'], npz['targets']

#Test data
npz = np.load('../../../../../../data/processed/tweets_data/spanish/tweets_test_data.npz',  allow_pickle=True)
test_inputs, test_targets = npz['inputs'], npz['targets']

In [3]:
#Load the tokenizer
TOKENIZER_PATH = '../../../../../../exports/sentiment_analysis/tokenizers/spanish/tokenizer.pkl'
tokenizer = joblib.load(TOKENIZER_PATH)

In [4]:
#Model hyperparameters
VOCAB_SIZE = tokenizer.vocab_size # 65540

EMB_DIM = 200
LSTM_UNITS = 100
FFN_UNITS = 256

DROPOUT_RATE = 0.2

BATCH_SIZE = 32
NB_EPOCHS = 5

In [11]:
#Building Model
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(input_dim=VOCAB_SIZE, output_dim=EMB_DIM, mask_zero=True),
    tf.keras.layers.Dropout(DROPOUT_RATE),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(LSTM_UNITS)),
    tf.keras.layers.Dropout(DROPOUT_RATE),
    tf.keras.layers.Dense(FFN_UNITS, activation='relu'),
    tf.keras.layers.Dropout(DROPOUT_RATE),
    tf.keras.layers.Dense(4, activation='softmax')
])

In [12]:
#Compiling Model
model.compile(loss="sparse_categorical_crossentropy",
                 optimizer="adam",
                 metrics=["sparse_categorical_accuracy"])

In [13]:
checkpoint_path = "../../../../../../checkpoints/sentiment_analysis/rnn/spanish"
ckpt = tf.train.Checkpoint(model)

ckpt_manager = tf.train.CheckpointManager(ckpt, checkpoint_path, max_to_keep=5)

if ckpt_manager.latest_checkpoint:
    ckpt.restore(ckpt_manager.latest_checkpoint)
    print("Last checkpoint restored!!")

In [15]:
model.fit(train_inputs,
         train_targets,
         batch_size=BATCH_SIZE,
         epochs=NB_EPOCHS)
ckpt_manager.save()

Epoch 1/5
 128/3318 [>.............................] - ETA: 24:04 - loss: 0.4392 - sparse_categorical_accuracy: 0.8491

KeyboardInterrupt: 

In [21]:
#Validate model
results = model.evaluate(test_inputs, test_targets, batch_size=BATCH_SIZE)
print(results)

<src.models.DCNN.DCNN at 0x1298dd090>

In [None]:
#Save model weights
model.save('../../../../../../models/sentiment_analysis/rnn/spanish/sentiment_analysis_spanish.h5')