In [59]:
import os
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Input, Embedding, Conv1D, GlobalMaxPooling1D, Bidirectional, LSTM, Concatenate, Dropout, Dense, Layer, Multiply, TimeDistributed, GlobalAveragePooling1D
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import TensorBoard
from keras_tuner import HyperModel, RandomSearch, BayesianOptimization, Hyperband
from datetime import datetime
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from tensorboard.plugins.hparams import api as hp
from tensorflow.keras import models, layers
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer

In [3]:
url_data = 'https://raw.githubusercontent.com/TeodorRusKvi/Tekstanalyse/main/git_NLP_data/'

df = pd.read_csv(url_data + 'new_df.csv')

df['All_text'] = df['All_text'].replace(['U.S.', 'U.S.A.'], ['US', 'USA'], regex=True)
df['Processed'] = df['Processed'].fillna(0)
df['Processed'] = df['Processed'].astype(str)
df['All_text'] = df['All_text'].fillna(0)
df['All_text'] = df['All_text'].astype(str)

# df.to_csv('new_df.csv', index=False)

# Making the relevant columns to lists
all_texts = (df['All_text'].to_list())
texts = df['Processed'].to_list()

# Setting the wanted text for further modelling
corpus = texts

tokenizer = Tokenizer(oov_token='<OOV>') # Hyperparameters = num_words=vocab_size, oov_token=oov_tok
tokenizer.fit_on_texts(corpus)

sequences = tokenizer.texts_to_sequences(corpus)

word_index = tokenizer.word_index
print(f'Found {len(word_index)} unique tokens.\n\nFirst 10 is listen below:')
print(dict(list(word_index.items())[0:10]))

Found 22234 unique tokens.

First 10 is listen below:
{'<OOV>': 1, 'people': 2, 'like': 3, 'work': 4, 'right': 5, 'trump': 6, 'think': 7, 'state': 8, 'government': 9, 'party': 10}


In [66]:
# Last inn 'X_train_LSTM' fra en CSV-fil
X_train_LSTM = pd.read_csv(url_data+'X_train.csv')
# Konverter hele DataFrame til et NumPy array
X_train_LSTM = X_train_LSTM.to_numpy()

# Last inn 'y_train_LSTM' fra en CSV-fil
y_train_LSTM = pd.read_csv(url_data+'y_train_processed.csv')
# Konverter hele DataFrame til et NumPy array
y_train_LSTM = y_train_LSTM.to_numpy()

# Last inn 'y_train_LSTM' fra en CSV-fil
embeddings_GloVe = pd.read_csv(url_data+'embeddings_glove.csv')
# Konverter hele DataFrame til et NumPy array
embeddings_GloVe = embeddings_GloVe.to_numpy()


# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X_train_LSTM, y_train_LSTM, test_size=0.2, random_state=42)

HTTPError: HTTP Error 404: Not Found

In [None]:
y_train_int = y_train.astype(int)
y_train_one_hot = y_train_int.values  # Convert pandas DataFrame to a numpy array



In [5]:
# # Load the TensorBoard notebook extension
# %load_ext tensorboard

from tensorboard import notebook
notebook.list() # View open TensorBoard instances

Known TensorBoard instances:
  - port 6008: logdir logs/fit (started 1 day, 22:26:00 ago; pid 1956)
  - port 6006: logdir logs/hparam_tuning (started 5 days, 23:13:13 ago; pid 23884)


In [6]:
# Define hyperparameters with TensorBoard HParams API
HP_FILTERS = hp.HParam('filters', hp.Discrete([32, 35]))
HP_NUM_UNITS2 = hp.HParam('num_units2', hp.Discrete([64, 128, 152]))
HP_DROPOUT2 = hp.HParam('dropout2', hp.RealInterval(0.2, 0.6))
HP_LEARNING_RATE = hp.HParam('learning_rate', hp.RealInterval(0.001, 0.01))
HP_OPTIMIZER = hp.HParam('optimizer', hp.Discrete(['adam', 'sgd', 'rmsprop']))

In [7]:
# %tensorboard --logdir=logs/fit

In [None]:
# Convert the boolean DataFrame to int (0s and 1s)
y_train_int = y_train.astype(int)

# Convert to one-hot encoding if necessary (this is just a format conversion, as the data seems to be one-hot already)
y_train_one_hot = y_train_int.values


In [62]:
from keras.layers import Layer
import keras.backend as K
from tensorflow.keras.layers import AdditiveAttention, Attention


class Attention(Layer): #BahdanauAttention
    def __init__(self,**kwargs):
        super(Attention,self).__init__(**kwargs)

    def build(self,input_shape):
        self.W=self.add_weight(name="att_weight",shape=(input_shape[-1],1),initializer="normal")
        self.b=self.add_weight(name="att_bias",shape=(input_shape[1],1),initializer="zeros")        
        super(Attention, self).build(input_shape)

    def call(self,x):
        et=K.squeeze(K.tanh(K.dot(x,self.W)+self.b),axis=-1)
        at=K.softmax(et)
        at=K.expand_dims(at,axis=-1)
        output=x*at
        return K.sum(output,axis=1)

    def compute_output_shape(self,input_shape):
        return (input_shape[0],input_shape[-1])

    def get_config(self):
        return super(Attention,self).get_config()


class BahdanauAttention(Layer):
    def __init__(self, units):
        super(BahdanauAttention, self).__init__()
        self.W1 = Dense(units)
        self.W2 = Dense(units)
        self.V = Dense(1)

    def call(self, query, values):
        # query hidden state shape == (batch_size, hidden size)
        # query_with_time_axis shape == (batch_size, 1, hidden size)
        # values shape == (batch_size, max_len, hidden size)
        query_with_time_axis = tf.expand_dims(query, 1)

        # score shape == (batch_size, max_len, 1)
        # we get 1 at the last axis because we are applying score to self.V
        # the shape of the tensor before applying self.V is (batch_size, max_len, units)
        score = self.V(tf.nn.tanh(
            self.W1(query_with_time_axis) + self.W2(values)))

        # attention_weights shape == (batch_size, max_len, 1)
        attention_weights = tf.nn.softmax(score, axis=1)

        # context_vector shape after sum == (batch_size, hidden size)
        context_vector = attention_weights * values
        context_vector = tf.reduce_sum(context_vector, axis=1)

        return context_vector, attention_weights

from tensorflow.keras.initializers import Constant
from tensorflow.keras.layers import Layer, Conv1D, Softmax, Dense

class SelfAttention(Layer):
    def __init__(self, channels):
        super(SelfAttention, self).__init__()
        # Divide channels for query and key
        self.query = Dense(channels // 8, use_bias=False, kernel_initializer='he_normal')
        self.key = Dense(channels // 8, use_bias=False, kernel_initializer='he_normal')
        self.value = Dense(channels, use_bias=False, kernel_initializer='he_normal')
        
        # Initialize gamma as a trainable parameter
        self.gamma = self.add_weight(name='gamma', shape=[1], initializer=Constant(0.0), trainable=True)

    def call(self, x):
        # Shape of x is (batch, width, channels)
        # Learn query, key, and value vectors
        f = self.query(x)  # Shape (batch, width, channels/8)
        g = self.key(x)    # Shape (batch, width, channels/8)
        h = self.value(x)  # Shape (batch, width, channels)
        
        # Transpose and multiply to get the attention scores
        s = tf.matmul(f, g, transpose_b=True)  # Shape (batch, width, width)
        beta = Softmax(axis=-1)(s)  # Softmax over last dimension to get attention weights
        
        # Apply attention weights to value vector
        o = tf.matmul(beta, h)  # Shape (batch, width, channels)
        # Apply gamma and add input (residual connection)
        return self.gamma * o + x

In [63]:
class TextClassifierHyperModel(HyperModel):
    def __init__(self, input_shape, embeddings_GloVe, num_classes, parallel_blocks):#, include_attention_weights=False):
        self.input_shape = input_shape
        self.embeddings_GloVe = embeddings_GloVe
        self.num_classes = num_classes
        self.parallel_blocks = parallel_blocks
        #self.include_attention_weights = include_attention_weights


    def build(self, hp):
        sequence_input = Input(shape=(self.input_shape,), dtype='int32')
        embedded_sequences = Embedding(input_dim=self.embeddings_GloVe.shape[0],
                                       output_dim=self.embeddings_GloVe.shape[1],
                                       weights=[self.embeddings_GloVe],
                                       embeddings_regularizer=keras.regularizers.l2(hp.Float('dropout_rate', 
                                                                                             min_value=0.0, 
                                                                                             max_value=0.3, 
                                                                                             step=0.1)),
                                       trainable=False)(sequence_input)

        conv_blocks = []
        lstm_blocks = []

        for _ in range(hp.Int('blocks', 1, self.parallel_blocks)):
            conv = Conv1D(
                filters=hp.Int('conv_filter_units', min_value=30, max_value=40, step=2),
                kernel_size=hp.Int('conv_kernel_size', min_value=1, max_value=5, step=1),
                activation='relu',
                padding='same',
                strides=1)(embedded_sequences)
            
            # Apply a TimeDistributed Dense layer to each timestep of the convolutional block's output
            conv_dense = TimeDistributed(Dense(hp.Int('lstm_units', min_value=20, max_value=100, step=10), activation='relu'))(conv)
            conv_blocks.append(conv_dense)

            lstm = Bidirectional(LSTM(
                units=hp.Int('lstm_units', min_value=100, max_value=150, step=10),
                return_sequences=True,
                dropout=hp.Float('dropout_rate', min_value=0.0, max_value=0.3, step=0.1),
                recurrent_dropout=hp.Float('dropout_rate', min_value=0.0, max_value=0.3, step=0.1)
            ))(conv_dense)  # Pass the output of the Dense layer into LSTM
            lstm_blocks.append(lstm)

        combined = Concatenate()(conv_blocks + lstm_blocks)

        # Apply the custom attention mechanism
        attention_layer = AdditiveAttention(use_scale=True)
        attention_output, attention_weights = attention_layer([combined, combined], return_attention_scores=True)
        context_vector = GlobalAveragePooling1D()(attention_output)

        dense = Dense(units=hp.Int('dense_units', min_value=80, max_value=100, step=5), activation='relu')(context_vector)
        dropout = Dropout(hp.Float('dropout_rate', min_value=0.0, max_value=0.3, step=0.1))(dense)
        output = Dense(self.num_classes, activation='sigmoid')(dropout)

        model = Model(inputs=sequence_input, outputs=output)
        optimizer_name = hp.Choice('optimizer', ['Adam'])
        learning_rate = hp.Float('learning_rate', min_value=1e-4, max_value=1e-2, sampling='LOG')
        optimizer = getattr(tf.keras.optimizers, optimizer_name)(learning_rate=learning_rate)
        model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])
        return model

# Note: When actually using this model for training in tuner, make sure to remove attention_weights from outputs.
# Hyperparameters and settings
input_length = 20
num_classes = 2
parallel_blocks = 4
log_dir = 'logs/fit/' + datetime.now().strftime("%d-%m-%Y %H-%M-%S")

# Use this for training (without attention weights in outputs)
hypermodel_for_training = TextClassifierHyperModel(input_length, embeddings_GloVe, num_classes, parallel_blocks)#, include_attention_weights=False)


# # Use this for visualization (with attention weights in outputs)
# hypermodel_for_visualization = TextClassifierHyperModel(input_length, embeddings_GloVe, num_classes, parallel_blocks, include_attention_weights=True)

# tuner = RandomSearch(
#     hypermodel,
#     objective='val_accuracy',
#     max_trials=20,
#     executions_per_trial=1,
#     directory=log_dir,
#     project_name='TextClassification'
# )

tuner = BayesianOptimization(
    hypermodel_for_training,
    objective='val_accuracy',
    max_trials=10,  # Set the maximum number of trials (model configurations to test)
    executions_per_trial=1,  # Number of models that should be built and fit for each trial
    directory=log_dir,
    project_name='TextClassification'
)

# tuner = Hyperband(
#     hypermodel_for_training,
#     objective='val_accuracy',
#     max_epochs=10,  # Maximum number of epochs a model can train
#     factor=3,       # Reduction factor for epochs and models in each bracket
#     directory=log_dir,
#     project_name='TextClassification'
# )


tensorboard_callback = TensorBoard(
    log_dir=log_dir,
    histogram_freq=0,  # No histogram computation, set to 1 or higher to compute histograms every '1' epoch or specified frequency
    update_freq='epoch'  # Log metrics and histograms every epoch (default), not every batch
)


# Assume X_train, y_train, X_val, y_val are defined
tuner.search(X_train, y_train, 
             epochs=10, 
             validation_data=(X_test, y_test),
             callbacks=[tensorboard_callback])

# Get the best model
best_model = tuner.get_best_models(num_models=1)[0]


Search: Running Trial #1

Value             |Best Value So Far |Hyperparameter
0.1               |0.1               |dropout_rate
3                 |3                 |blocks
34                |34                |conv_filter_units
5                 |5                 |conv_kernel_size
70                |70                |lstm_units
80                |80                |dense_units
Adam              |Adam              |optimizer
0.00019352        |0.00019352        |learning_rate

Epoch 1/10


KeyboardInterrupt: 

In [16]:
model.summary()

NameError: name 'model' is not defined