In [66]:
import tensorflow as tf
from keras.layers import MultiHeadAttention, LayerNormalization, Dropout, Layer
from keras.layers import Embedding, Input, GlobalAveragePooling1D, Dense, Flatten
from keras.datasets import imdb
from keras.models import Sequential, Model
from keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
import warnings

warnings.filterwarnings("ignore", category=np.VisibleDeprecationWarning)

In [67]:
def transformer_block(embed_dim, num_heads, ff_dim, dropout_rate):
    att = MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
    ffn = Sequential(
        [Dense(ff_dim, activation="relu"),
         Dense(embed_dim), ]
    )
    layernorm1 = LayerNormalization(epsilon=1e-6)
    layernorm2 = LayerNormalization(epsilon=1e-6)
    dropout1 = Dropout(dropout_rate)
    dropout2 = Dropout(dropout_rate)
    
    def call(inputs, training):
        attn_output = att(inputs, inputs)
        attn_output = dropout1(attn_output, training=training)
        out1 = layernorm1(inputs + attn_output)
        ffn_output = ffn(out1)
        ffn_output = dropout2(ffn_output, training=training)
        return layernorm2(out1 + ffn_output)
    
    return call

In [68]:
#Load training data
train_df = pd.read_csv('../data/cleaned_train.csv')

# Extract the columns you want to use as input features
columns = ['BERT_sentiment_score', 'normalised_word_count', 'pub_day', 'pub_hour']
# Extract input features from the dataframe
x_train = np.array(train_df[columns].dropna())

y_train = np.array(train_df['n_comments'])

In [69]:
#Load test data
test_df = pd.read_csv('../data/cleaned_test.csv')

x_test = np.array(test_df[columns].dropna())

In [70]:
def label_encode(raw_data):
    # Concatenate train and test data vertically to ensure consistent label encoding
    combined_df = pd.concat([train_df['topic'], test_df['topic']], axis=0)

    # Create an instance of LabelEncoder
    le = LabelEncoder()

    # Fit and transform the combined data using LabelEncoder
    combined_encoded = le.fit_transform(combined_df)
    
    return np.array(le.transform(raw_data.dropna()))

In [71]:
# Apply the LabelEncoder transformation on train data
train_topic_encoded = pd.DataFrame(label_encode(train_df['topic']), columns=['topic encoded'])
# Concatenate the 'topic_encoded' tensor with the other input features
x_train = np.concatenate((x_train, train_topic_encoded), axis=1)

In [72]:
# Apply the LabelEncoder transformation on train data
test_topic_encoded = pd.DataFrame(label_encode(test_df['topic']), columns=['topic encoded'])
# Concatenate the 'topic_encoded' tensor with the other input features
x_test = np.concatenate([x_test, test_topic_encoded], axis=1)

In [73]:
# Define the EarlyStopping callback
early_stopping = EarlyStopping(monitor='val_mae', 
                               # Number of epochs to wait for improvement
                               patience=10,  
                               verbose=1, 
                               # Restore the weights of the best epoch
                               restore_best_weights=True)  

In [74]:
# Define model architecture
max_seq_length = x_train.shape[1]
vocab_size = int(np.max(x_train) + 1)
embedding_dim = 32
num_heads = 2
ff_dim = 32
dropout_rate = 0.1

inputs = Input(shape=(max_seq_length,))
embedding_layer = Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=max_seq_length)(inputs)
transformer_block_fn = transformer_block(embed_dim=embedding_dim, 
                                         num_heads=num_heads, ff_dim=ff_dim, 
                                         dropout_rate=dropout_rate)
transformer_block = transformer_block_fn(embedding_layer, training=True)
pooling_layer = GlobalAveragePooling1D()(transformer_block)
dropout_layer = Dropout(rate=0.1)(pooling_layer)
outputs = Dense(units=1, activation='linear')(dropout_layer)  # Change activation to 'linear' for regression

model = Model(inputs=inputs, outputs=outputs)

# Compile the model
model.compile(optimizer=Adam(), loss='mse', metrics=['mae'])  # Change loss function to 'mse' for regression

# Train the model
model.fit(x_train, y_train, batch_size=64, epochs=100, validation_split=0.2, 
          callbacks=[early_stopping])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 29: early stopping


<keras.callbacks.History at 0x27fbeb24790>

In [75]:
# save the trained model
model.save('../models/transformer_regression_model.h5')

In [76]:
y_pred = model.predict(x_test)



In [80]:
y_pred[12]

array([286.4306], dtype=float32)