#### Environment Setup

In [None]:
import os

workspace_dir = '/content/spam-detection'
branch = 'feature/bert-training'
current_dir = os.getcwd()
if not os.path.exists(workspace_dir) and current_dir != workspace_dir:
    !git clone https://github.com/RationalEar/spam-detection.git
    os.chdir(workspace_dir)
    !git checkout $branch
    !ls -al
    !pip install -q transformers==4.48.0 scikit-learn pandas numpy
    !pip install -q torch --index-url https://download.pytorch.org/whl/cu126
    !pip install captum --no-deps --ignore-installed
else:
    os.chdir(workspace_dir)
    !git pull origin $branch

In [None]:
### If running on Google Colab, mount Google Drive
from google.colab import drive

drive.mount('/content/drive')

In [None]:
import pandas as pd
from utils.functions import set_seed
from utils.constants import DATA_PATH, MODEL_SAVE_PATH

set_seed(42)

#### Load the preprocessed data

In [None]:
train_df = pd.read_pickle(open(f"{DATA_PATH}/data/processed/train.pkl", "rb"))
val_df = pd.read_pickle(open(f"{DATA_PATH}/data/processed/val.pkl", "rb"))
test_df = pd.read_pickle(open(f"{DATA_PATH}/data/processed/test.pkl", "rb"))
train_df.head()

In [None]:
# Build vocabulary from training data
from utils.functions import build_vocab

word2idx, idx2word = build_vocab(train_df['text'])

#### BERT Model Configuration Verification

The BERT model is configured with the following specifications:
- **Base Model**: bert-base-uncased (12 layers, 768 hidden dim)
- **Classification Head**: Added linear layer for spam detection
- **Explainability**: Integrated gradients and attention analysis
- **Regularization**:
  - Dropout: p=0.2
  - Label smoothing: ε=0.1  
  - Gradient clipping: max norm=1.0
- **Optimization**:
  - Layer-wise learning rate decay (lower layers get lower LR)
  - Base learning rate: 2e-5
  - AdamW optimizer with weight decay
- **Training**: End-to-end fine-tuning on spam detection task

In [None]:
# BERT uses its own pre-trained embeddings, no need for GloVe
# Set parameters for BERT training
embedding_dim = 768  # BERT hidden size
max_len = 200
pretrained_embeddings = None  # Not used for BERT

#### Train the BERT model

In [None]:
from training.trainer import train_model
start_time = pd.Timestamp.now()

# Train BERT model with enhanced regularization
model = train_model('bert', train_df, val_df, test_df, 
                   embedding_dim=embedding_dim, 
                   pretrained_embeddings=pretrained_embeddings,
                   model_save_path=MODEL_SAVE_PATH, 
                   max_len=max_len, 
                   evaluate=True)
end_time = pd.Timestamp.now()

In [None]:
training_time = end_time - start_time
print(f"Training completed in: {training_time}")