In [17]:
import sys
sys.path.append('models')

# from google.colab import drive
# drive.mount('/content/drive')
# sys.path.append('/content/drive/MyDrive/Dissertation/models')
# sys.path.append('/content/drive/MyDrive/Dissertation')

In [18]:
# !pip install dask_ml
# !pip install scikeras
# import nltk
# nltk.download('stopwords')
# import nltk
# nltk.download('punkt')

In [19]:
# importing libraries
import joblib
import os
import pandas as pd
import numpy as np
from sklearn.metrics import classification_report, accuracy_score
import tensorflow as tf
from collections import defaultdict
import matplotlib.pyplot as plt

# from models.bert import BERT

# Load the TextPreprocessor class (assumed to be defined already)
from textpreprocessor import TextPreprocessor

import warnings
warnings.filterwarnings("ignore")

print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        # Set memory growth
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
    except RuntimeError as e:
        print(e)

Num GPUs Available:  1


In [20]:
NUM_SAMPLE = 10000
TEST_RATIO=0.2
BATCH_SIZE=32
EPOCHS = 5
MAX_WORD_COUNT = 5000
MAX_LENGTH = 100
OUTPUT_RESULT_DIR = "Output/result"
OUTPUT_MODELS_DIR = "Output/models"
USE_TEST_DATA = True

os.makedirs(OUTPUT_RESULT_DIR, exist_ok=True)
os.makedirs(OUTPUT_MODELS_DIR, exist_ok=True)

In [21]:
# Define a function to plot training history
def plot_training_history(history, title="Model Training History"):
    # Extract values from history
    acc = history.history['accuracy']
    val_acc = history.history['val_accuracy']
    loss = history.history['loss']
    val_loss = history.history['val_loss']

    epochs = range(1, len(acc) + 1)

    # Plot training and validation accuracy
    plt.figure(figsize=(14, 5))

    # Accuracy plot
    plt.subplot(1, 2, 1)
    plt.plot(epochs, acc, 'b', label='Training Accuracy')
    plt.plot(epochs, val_acc, 'r', label='Validation Accuracy')
    plt.title(f"{title} - Accuracy")
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()

    # Loss plot
    plt.subplot(1, 2, 2)
    plt.plot(epochs, loss, 'b', label='Training Loss')
    plt.plot(epochs, val_loss, 'r', label='Validation Loss')
    plt.title(f"{title} - Loss")
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()

    plt.tight_layout()
    plt.show()

# Function to plot training history from defaultdict data
def plot_training_history_from_dict(history, title="Model Training History"):
    # Extract values from the dictionary
    acc = history['accuracy']
    val_acc = history['val_accuracy']
    loss = history['loss']
    val_loss = history['val_loss']

    # Set up epoch range
    epochs = range(1, len(acc) + 1)

    # Plot training and validation accuracy
    plt.figure(figsize=(14, 5))

    # Accuracy plot
    plt.subplot(1, 2, 1)
    plt.plot(epochs, acc, 'b', label='Training Accuracy')
    plt.plot(epochs, val_acc, 'r', label='Validation Accuracy')
    plt.title(f"{title} - Accuracy")
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()

    # Loss plot
    plt.subplot(1, 2, 2)
    plt.plot(epochs, loss, 'b', label='Training Loss')
    plt.plot(epochs, val_loss, 'r', label='Validation Loss')
    plt.title(f"{title} - Loss")
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()

    plt.tight_layout()
    plt.show()

# Dictionary to store the results
results = {
    'Model': [],
    'Training-Time':[],
    'Accuracy': [],
    'Precision (Class 0)': [],
    'Precision (Class 1)': [],
    'Recall (Class 0)': [],
    'Recall (Class 1)': [],
    'F1-Score (Class 0)': [],
    'F1-Score (Class 1)': []
}

# Function to calculate accuracy and classification report
def _evaluate_model(training_time, model_name, y_test, y_pred):
    accuracy = accuracy_score(y_test, y_pred) * 100
    report = classification_report(y_test, y_pred, output_dict=True)

    # Store the results
    results['Model'].append(model_name)
    results['Training-Time'].append(training_time)
    results['Accuracy'].append(accuracy)
    results['Precision (Class 0)'].append(report['0']['precision'])
    results['Precision (Class 1)'].append(report['1']['precision'])
    results['Recall (Class 0)'].append(report['0']['recall'])
    results['Recall (Class 1)'].append(report['1']['recall'])
    results['F1-Score (Class 0)'].append(report['0']['f1-score'])
    results['F1-Score (Class 1)'].append(report['1']['f1-score'])

def _predict_model(model,X):
    y_pred_prob = model.predict(X)
    return [1 if prob > 0.5 else 0 for prob in y_pred_prob]

def evaluate_model_class(model_class, X_test, y_test):
    y_pred = _predict_model(model_class.model, X_test)
    y_pred_random = _predict_model(model_class.random_search_cv.best_estimator_, X_test)
    y_pred_grid = _predict_model(model_class.grid_search_cv.best_estimator_, X_test)
    y_pred_best = _predict_model(model_class.best_model, X_test)

    _evaluate_model(model_class.training_time, model_class.model_name, y_test, y_pred)
    _evaluate_model(model_class.random_search_time,  model_class.model_name + '_random_search', y_test, y_pred_random)
    _evaluate_model(model_class.grid_search_time,  model_class.model_name + '_grid_search', y_test, y_pred_grid)
    _evaluate_model(model_class.best_training_time,  model_class.model_name + '_best', y_test, y_pred_best)

    df_results = pd.DataFrame(results)
    df_results.to_excel(os.path.join(OUTPUT_RESULT_DIR,'Model_Compare.xlsx'))

def evaluate_xgboost_model_class(model_class, X_test, y_test):
    y_pred_xgb = _predict_model(model_class.model, model_class.convert_to_dmatrix(X_test, y_test))
    y_pred_xgb_random = _predict_model(model_class.random_search_cv.best_estimator_, X_test.toarray())
    y_pred_xgb_grid = _predict_model(model_class.grid_search_cv.best_estimator_, X_test.toarray())
    y_pred_xgb_best = _predict_model(model_class.best_model, model_class.convert_to_dmatrix(X_test, y_test))

    # Call the function with your actual predictions (replace placeholders with your data)
    _evaluate_model(model_class.training_time, model_class.model_name, y_test, y_pred_xgb)
    _evaluate_model(model_class.random_search_time,  model_class.model_name + '_random_search', y_test, y_pred_xgb_random)
    _evaluate_model(model_class.grid_search_time,  model_class.model_name + '_grid_search', y_test, y_pred_xgb_grid)
    _evaluate_model(model_class.best_training_time,  model_class.model_name + '_best', y_test, y_pred_xgb_best)

    df_results = pd.DataFrame(results)
    df_results.to_excel(os.path.join(OUTPUT_RESULT_DIR,'Model_Compare.xlsx'), index=False)

def compare_models_accuracy_and_get_best_params(models, X_test, y_test):
    best_accuracy = 0
    best_params = None
    best_model_name = None

    for model_name, model_class in models.items():
        # Get model's parameters (either from random search CV or from original model)
        if model_name!='Original':  # Check if it has random_search_cv
            y_pred_prob = model_class.predict(X_test)
            params = model_class.best_params_
        else:
            y_pred_prob = model_class.predict(X_test)
            params = model_class.get_params()

        # Convert probabilities to binary predictions
        pred = [1 if prob > 0.5 else 0 for prob in y_pred_prob]

        # Calculate accuracy
        accuracy = accuracy_score(y_test, pred) * 100
        print(f'{model_name} Accuracy: {accuracy}')

        # Compare and keep track of the model with the highest accuracy
        if accuracy > best_accuracy:
            best_accuracy = accuracy
            best_params = params
            best_model_name = model_name

    print(f'Best Model: {best_model_name} with Accuracy: {best_accuracy}')
    print(f'Best Parameters: {best_params}')
    return best_model_name, best_params


00. Text Pre-Processing

In [22]:
# Initialize the Text Preprocessor
processor = TextPreprocessor(MAX_WORD_COUNT, MAX_LENGTH)

if USE_TEST_DATA:
    INPUT_DIR = f"Output/proto_models_rev2_{NUM_SAMPLE}"
    # Load data
    df_train = pd.read_csv(os.path.join(INPUT_DIR, 'train_cleaned.csv'))
    df_test = pd.read_csv(os.path.join(INPUT_DIR, 'test_cleaned.csv'))
    # df_test = processor.load_data()
    X_train = df_train['review']
    X_test = df_test['review']
    y_train = df_train['polarity']
    y_test = df_test['polarity']
    # X_train_seq_padded = pickle.load(os.path.join(INPUT_DIR, 'X_train_pad.pkl'))
    # X_test_seq_padded = pickle.load(os.path.join(INPUT_DIR, 'X_test_pad.pkl'))
else:
    # Load data
    df_train, df_test = processor.parallel_load_data()

    df_train_step1 = processor.remove_stopwords(df_train.copy())
    df_test_step1 = processor.remove_stopwords(df_test.copy())

    print('----------TRAIN DATA----------')
    df_train_step2 = processor.filter_by_length_of_sentence(df_train_step1.copy(),50)
    print('----------TEST DATA----------')
    df_test_step2 = processor.filter_by_length_of_sentence(df_test_step1.copy(),50)

    df_train_step3 = processor.sampling_data(df_train_step2, NUM_SAMPLE)
    df_test_step3 = processor.sampling_data(df_test_step2, int(NUM_SAMPLE*TEST_RATIO))

    # Preprocess data
    df_train_step3 = processor.map_polarity(df_train_step3.copy())
    df_test_step3 = processor.map_polarity(df_test_step3.copy())

    # Split data
    X_train, y_train = processor.split_data(df_train_step3)
    X_test, y_test = processor.split_data(df_test_step3)
    
    INPUT_DIR = f"Output/proto_models_rev2_{NUM_SAMPLE}"
    os.makedirs(INPUT_DIR, exist_ok=True)
    # Save data
    df_train_step3.to_csv(os.path.join(INPUT_DIR, 'train_cleaned.csv'), index=False)
    df_test_step3.to_csv(os.path.join(INPUT_DIR, 'test_cleaned.csv'), index=False)

X_train_tfidf, X_test_tfidf = processor.vectorize_text(X_train, X_test)
X_train_pad, X_test_pad = processor.tokenization_and_padding(X_train, X_test)

MAX_LENGTH = processor.max_length

In [23]:
# Importing necessary libraries
from transformers import BertTokenizer, TFBertForSequenceClassification, AdamWeightDecay, TFBertModel
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import SparseCategoricalCrossentropy
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.layers import Input, Dense, Dropout, GlobalAveragePooling1D

from tensorflow.keras.models import Model
from keras_tuner import HyperParameters, RandomSearch
import tensorflow as tf
import numpy as np
import tensorflow
import time

class BertLayer(tensorflow.keras.layers.Layer):
    def __init__(self, bert_model):
        super(BertLayer, self).__init__()
        self.bert_model = bert_model
    def call(self, inputs):
        input_ids, attention_mask = inputs
        bert_output = self.bert_model(input_ids=input_ids, attention_mask = attention_mask)
        bert_output = bert_output.last_hidden_state
        return bert_output

class BERT():
    def __init__(self, max_length, epochs=15, batch_size=64, verbose=1):
        self.max_length = max_length
        self.epochs = epochs
        self.batch_size = batch_size
        self.verbose=verbose
        self.tokenizer_bert = BertTokenizer.from_pretrained('bert-base-uncased')

    def __make_tokens(self, data):
        return self.tokenizer_bert(
            text=list(data),
            add_special_tokens=True,
            max_length=self.max_length,
            padding='max_length',
            truncation=True,
            return_tensors='tf',
            return_token_type_ids=False,
            return_attention_mask=True
        )
        
    # Convert to TensorFlow Tensor
    def create_tf_dataset(X, y, batch_size=32):
        input_ids = tf.convert_to_tensor(X["input_ids"])
        attention_mask = tf.convert_to_tensor(X["attention_mask"])
        y = tf.convert_to_tensor(y)

        dataset = tf.data.Dataset.from_tensor_slices(({"input_ids": input_ids, "attention_mask": attention_mask}, y))
        dataset = dataset.batch(batch_size).prefetch(tf.data.experimental.AUTOTUNE)
        return dataset

    def train_model(self, X_train, y_train, X_test, y_test):
        X_train_tokens = self.__make_tokens(X_train)
        X_test_tokens = self.__make_tokens(X_test)

        # Convert datasets
        train_dataset = self.create_tf_dataset(X_train_tokens, y_train)
        val_dataset = self.create_tf_dataset(X_test_tokens, y_test)

        # Define a Keras-based model using TFBertModel
        input_ids = Input(shape=(self.max_length,), dtype='int32', name="input_ids")
        attention_mask = Input(shape=(self.max_length,), dtype='int32', name="attention_mask")


        bert_model = TFBertModel.from_pretrained("bert-base-uncased")
        bert_model.trainable = False
        
        bert_layer  = BertLayer(bert_model=bert_model)
        bert_output=bert_layer([input_ids, attention_mask])
        pooled_output = GlobalAveragePooling1D()(bert_output.last_hidden_state)

        dense = Dense(64, activation='relu')(pooled_output)
        dropout = Dropout(0.3)(dense)
        output = Dense(2, activation='softmax')(dropout)

        self.model = Model(inputs=[input_ids, attention_mask], outputs=output)

        # Adam optimizer with a learning rate of 2e-5
        optimizer = Adam(learning_rate=2e-5)

        # Compile the model with a standard loss function
        loss_fn = SparseCategoricalCrossentropy(from_logits=False)
        self.model.compile(optimizer=optimizer, loss=loss_fn, metrics=['accuracy'])

        # Adding EarlyStopping callback
        early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

        # Model summary
        self.model.summary()

        # Train the model
        start_time = time.time()
        self.history = self.model.fit(
            {'input_ids': X_train_tokens['input_ids'], 'attention_mask': X_train_tokens['attention_mask']},
            y_train,
            validation_data=({'input_ids': X_test_tokens['input_ids'], 'attention_mask': X_test_tokens['attention_mask']}, y_test),
            epochs=self.epochs,
            batch_size=self.batch_size,
            callbacks=[early_stopping]  # Add EarlyStopping callback here
        )

        self.training_time = time.time() - start_time

In [24]:
import keras
def build_bert_model():
    input_ids = Input(shape=(100,), dtype=tf.int32, name="input_ids")
    attention_mask = Input(shape=(100,), dtype=tf.int32, name="attention_mask")

    class BertLayer(keras.layers.Layer):
        def __init__(self, bert_model):
            super(BertLayer, self).__init__()
            self.bert_model = bert_model

        def call(self, inputs):
            input_ids, attention_mask = inputs
            bert_output= self.bert_model(input_ids=input_ids, attention_mask=attention_mask)
            bert_output = bert_output.last_hidden_state
            return bert_output

    bert_model = TFBertModel.from_pretrained("bert-base-uncased")
    bert_model.trainable = False

    bert_layer = BertLayer(bert_model)
    bert_output = bert_layer([input_ids, attention_mask])

    pooled_output = GlobalAveragePooling1D()(bert_output)
    dense = Dense(units=32, activation='relu')(pooled_output)
    output = Dense(2, activation='softmax')(dense)

    model = Model(inputs=[input_ids, attention_mask], outputs=output)

    learning_rate = 2e-5
    optimizer = Adam(learning_rate=learning_rate, weight_decay=0.01)
    model.compile(optimizer=optimizer, loss=SparseCategoricalCrossentropy(from_logits=True), metrics=["accuracy"])

    return model

In [25]:

# Convert to TensorFlow Tensor
def create_tf_dataset(X, y, batch_size=32):
    input_ids = tf.convert_to_tensor(X["input_ids"])
    attention_mask = tf.convert_to_tensor(X["attention_mask"])
    y = tf.convert_to_tensor(y)

    dataset = tf.data.Dataset.from_tensor_slices(({"input_ids": input_ids, "attention_mask": attention_mask}, y))
    dataset = dataset.batch(batch_size).prefetch(tf.data.experimental.AUTOTUNE)
    return dataset

tokenizer_bert = BertTokenizer.from_pretrained('bert-base-uncased')

def __make_tokens(data):
    return tokenizer_bert(
        text=list(data),
        add_special_tokens=True,
        max_length=100,
        padding='max_length',
        truncation=True,
        return_tensors='tf',
        return_token_type_ids=False,
        return_attention_mask=True
    )

X_train_tokens = __make_tokens(X_train)
X_test_tokens = __make_tokens(X_test)

train_dataset = create_tf_dataset(X_train_tokens, y_train)
val_dataset = create_tf_dataset(X_test_tokens, y_test)


I0000 00:00:1729707118.993584  558596 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:00:1729707118.993633  558596 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:00:1729707118.993647  558596 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:00:1729707119.107929  558596 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:00:1729707119.107998  558596 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-10-23

In [26]:
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

In [27]:
modelss = build_bert_model()

Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertModel: ['cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias']
- This IS expected if you are initializing TFBertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFBertModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions w

In [40]:
modelss.fit(X_train_tokens, y_test, epochs=3, callbacks=early_stopping)

ValueError: Unrecognized data type: x={'input_ids': <tf.Tensor: shape=(10000, 100), dtype=int32, numpy=
array([[  101,  2128,  3676, ...,     0,     0,     0],
       [  101,  3114,  6100, ...,     0,     0,     0],
       [  101,  2204,  2143, ...,     0,     0,     0],
       ...,
       [  101,  4067,  2172, ...,     0,     0,     0],
       [  101, 24005,  2081, ...,     0,     0,     0],
       [  101,  9785, 17233, ...,     0,     0,     0]], dtype=int32)>, 'attention_mask': <tf.Tensor: shape=(10000, 100), dtype=int32, numpy=
array([[1, 1, 1, ..., 0, 0, 0],
       [1, 1, 1, ..., 0, 0, 0],
       [1, 1, 1, ..., 0, 0, 0],
       ...,
       [1, 1, 1, ..., 0, 0, 0],
       [1, 1, 1, ..., 0, 0, 0],
       [1, 1, 1, ..., 0, 0, 0]], dtype=int32)>} (of type <class 'transformers.tokenization_utils_base.BatchEncoding'>)

In [39]:
# 예측 확률을 얻은 후, 가장 높은 확률을 가진 클래스 선택
y_pred_prob = modelss.predict(val_dataset)

# 각 샘플에서 가장 높은 확률을 가진 클래스를 예측값으로 변환
y_pred = np.argmax(y_pred_prob, axis=1)

# 정확도 계산
accuracy = accuracy_score(y_test, y_pred) * 100
print(accuracy)

[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 31ms/step
54.55


In [None]:

# Function to calculate accuracy and classification report
def _evaluate_model(training_time, model_name, y_test, y_pred):
    accuracy = accuracy_score(y_test, y_pred) * 100
    report = classification_report(y_test, y_pred, output_dict=True)

    # Store the results
    results['Model'].append(model_name)
    results['Training-Time'].append(training_time)
    results['Accuracy'].append(accuracy)
    results['Precision (Class 0)'].append(report['0']['precision'])
    results['Precision (Class 1)'].append(report['1']['precision'])
    results['Recall (Class 0)'].append(report['0']['recall'])
    results['Recall (Class 1)'].append(report['1']['recall'])
    results['F1-Score (Class 0)'].append(report['0']['f1-score'])
    results['F1-Score (Class 1)'].append(report['1']['f1-score'])

def _predict_model(model,X):
    y_pred_prob = model.predict(X)
    return [1 if prob > 0.5 else 0 for prob in y_pred_prob]

def evaluate_model_class(model_class, X_test, y_test):
    y_pred = _predict_model(model_class.model, X_test)
    y_pred_random = _predict_model(model_class.random_search_cv.best_estimator_, X_test)
    y_pred_grid = _predict_model(model_class.grid_search_cv.best_estimator_, X_test)
    y_pred_best = _predict_model(model_class.best_model, X_test)

    _evaluate_model(model_class.training_time, model_class.model_name, y_test, y_pred)
    _evaluate_model(model_class.random_search_time,  model_class.model_name + '_random_search', y_test, y_pred_random)
    _evaluate_model(model_class.grid_search_time,  model_class.model_name + '_grid_search', y_test, y_pred_grid)
    _evaluate_model(model_class.best_training_time,  model_class.model_name + '_best', y_test, y_pred_best)

    df_results = pd.DataFrame(results)
    df_results.to_excel(os.path.join(OUTPUT_RESULT_DIR,'Model_Compare.xlsx'))

def evaluate_xgboost_model_class(model_class, X_test, y_test):
    y_pred_xgb = _predict_model(model_class.model, model_class.convert_to_dmatrix(X_test, y_test))
    y_pred_xgb_random = _predict_model(model_class.random_search_cv.best_estimator_, X_test.toarray())
    y_pred_xgb_grid = _predict_model(model_class.grid_search_cv.best_estimator_, X_test.toarray())
    y_pred_xgb_best = _predict_model(model_class.best_model, model_class.convert_to_dmatrix(X_test, y_test))

    # Call the function with your actual predictions (replace placeholders with your data)
    _evaluate_model(model_class.training_time, model_class.model_name, y_test, y_pred_xgb)
    _evaluate_model(model_class.random_search_time,  model_class.model_name + '_random_search', y_test, y_pred_xgb_random)
    _evaluate_model(model_class.grid_search_time,  model_class.model_name + '_grid_search', y_test, y_pred_xgb_grid)
    _evaluate_model(model_class.best_training_time,  model_class.model_name + '_best', y_test, y_pred_xgb_best)

    df_results = pd.DataFrame(results)
    df_results.to_excel(os.path.join(OUTPUT_RESULT_DIR,'Model_Compare.xlsx'), index=False)


In [69]:
# Initialize the BERT tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Tokenizing the datasets
X_train_tokens = tokenizer(
    text=list(X_train),
    add_special_tokens=True,
    max_length=100,
    padding='max_length',
    truncation=True,
    return_tensors='tf',
    return_token_type_ids=False,
    return_attention_mask=True
)

X_test_tokens = tokenizer(
    text=list(X_test),
    add_special_tokens=True,
    max_length=100,
    padding='max_length',
    truncation=True,
    return_tensors='tf',
    return_token_type_ids=False,
    return_attention_mask=True
)


# # Define BERT Model
# bert_model = TFBertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)

# # Use Hugging Face's AdamWeightDecay optimizer
# optimizer = AdamWeightDecay(learning_rate=2e-5, weight_decay_rate=0.01)
# # Compile the model using a standard loss function
# loss_fn = SparseCategoricalCrossentropy(from_logits=True)
# bert_model.compile(optimizer=optimizer, loss=loss_fn, metrics=['accuracy'])
# # Model Summary
# bert_model.summary()

# BERT Model Definition
def build_bert_model(hp):
    input_ids = Input(shape=(100,), dtype=tf.int32, name="input_ids")
    attention_mask = Input(shape=(100,), dtype=tf.int32, name="attention_mask")

    class BertLayer(keras.layers.Layer):
        def __init__(self, bert_model):
            super(BertLayer, self).__init__()
            self.bert_model = bert_model

        def call(self, inputs):
            input_ids, attention_mask = inputs
            bert_output= self.bert_model(input_ids=input_ids, attention_mask=attention_mask)
            bert_output = bert_output.last_hidden_state
            return bert_output

    bert_model = TFBertModel.from_pretrained("bert-base-uncased")
    bert_model.trainable = False

    bert_layer = BertLayer(bert_model)
    bert_output = bert_layer([input_ids, attention_mask])

    pooled_output = GlobalAveragePooling1D()(bert_output)
    dense = Dense(units=hp.Int("units", min_value=32, max_value=128, step=32), activation='relu')(pooled_output)
    output = Dense(2, activation='softmax')(dense)

    model = Model(inputs=[input_ids, attention_mask], outputs=output)

    learning_rate = hp.Choice("learning_rate", values=[1e-5, 2e-5, 3e-5])
    optimizer = Adam(learning_rate=learning_rate, weight_decay=0.01)
    model.compile(optimizer=optimizer, loss=SparseCategoricalCrossentropy(from_logits=True), metrics=["accuracy"])

    return model

# Convert to TensorFlow Tensor
def create_tf_dataset(X, y, batch_size=32):
    input_ids = tf.convert_to_tensor(X["input_ids"])
    attention_mask = tf.convert_to_tensor(X["attention_mask"])
    y = tf.convert_to_tensor(y)

    dataset = tf.data.Dataset.from_tensor_slices(({"input_ids": input_ids, "attention_mask": attention_mask}, y))
    dataset = dataset.batch(batch_size).prefetch(tf.data.experimental.AUTOTUNE)
    return dataset

# Convert datasets
train_dataset = create_tf_dataset(X_train_tokens, y_train)
val_dataset = create_tf_dataset(X_test_tokens, y_test)

# Hyperparameter tuner
tuner = RandomSearch(
    build_bert_model,
    objective="val_accuracy",
    max_trials=5,
    executions_per_trial=1,
    directory="bert_tuning",
    project_name="bert_sentiment_analysis"
)


# Perform the hyperparameter tuning search
tuner.search(
    train_dataset,
    validation_data=val_dataset,
    epochs=1
)

Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight']
- This IS expected if you are initializing TFBertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFBertModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions w


Search: Running Trial #1

Value             |Best Value So Far |Hyperparameter
128               |128               |units
1e-05             |1e-05             |learning_rate



Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight']
- This IS expected if you are initializing TFBertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFBertModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions w

[1m  5/313[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 37ms/step - accuracy: 0.5926 - loss: 0.7025

I0000 00:00:1729707052.646527  540404 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m312/313[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 35ms/step - accuracy: 0.5134 - loss: 0.6989

W0000 00:00:1729707063.862759  540405 assert_op.cc:38] Ignoring Assert operator functional_1/bert_layer_1/tf_bert_model/bert/embeddings/assert_less/Assert/Assert






: 

: 