# BERT for Diplomacy Deception Detection

This notebook implements a BERT model to detect deception in Diplomacy game messages. 
We fine-tune a pre-trained `bert-base-uncased` model.

## Steps:
1. Load Data
2. Tokenization with `BertTokenizer`
3. Model Definition (`TFBertForSequenceClassification`)
4. Training with Class Weights
5. Evaluation

In [None]:
%pip install tf-keras

import pandas as pd
import numpy as np
import os
os.environ["TF_USE_LEGACY_KERAS"] = "1"
import tensorflow as tf
from transformers import BertTokenizer, TFBertForSequenceClassification
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.utils import class_weight
import matplotlib.pyplot as plt
import seaborn as sns

# Set seeds
np.random.seed(42)
tf.random.set_seed(42)

Collecting transformersNote: you may need to restart the kernel to use updated packages.

  Using cached transformers-4.57.1-py3-none-any.whl.metadata (43 kB)
Collecting huggingface-hub<1.0,>=0.34.0 (from transformers)
  Using cached huggingface_hub-0.36.0-py3-none-any.whl.metadata (14 kB)
Collecting tokenizers<=0.23.0,>=0.22.0 (from transformers)
  Using cached tokenizers-0.22.1-cp39-abi3-win_amd64.whl.metadata (6.9 kB)
Collecting safetensors>=0.4.3 (from transformers)
  Using cached safetensors-0.7.0-cp38-abi3-win_amd64.whl.metadata (4.2 kB)
Using cached transformers-4.57.1-py3-none-any.whl (12.0 MB)
Using cached huggingface_hub-0.36.0-py3-none-any.whl (566 kB)
Using cached tokenizers-0.22.1-cp39-abi3-win_amd64.whl (2.7 MB)
Using cached safetensors-0.7.0-cp38-abi3-win_amd64.whl (341 kB)
Installing collected packages: safetensors, huggingface-hub, tokenizers, transformers

   ---------- ----------------------------- 1/4 [huggingface-hub]
   ---------- ----------------------------- 1/4

ValueError: Your currently installed version of Keras is Keras 3, but this is not yet supported in Transformers. Please install the backwards-compatible tf-keras package with `pip install tf-keras`.

In [None]:
# Define Paths
base_path = os.path.dirname(os.path.dirname(os.getcwd()))
data_path = os.path.join(base_path, "data", "processed", "diplomacy")

print(f"Data Path: {data_path}")

In [None]:
# Load Data
train_df = pd.read_parquet(os.path.join(data_path, "train_final.parquet"))
val_df = pd.read_parquet(os.path.join(data_path, "val_final.parquet"))
test_df = pd.read_parquet(os.path.join(data_path, "test_final.parquet"))

print(f"Train shape: {train_df.shape}")
print(f"Val shape: {val_df.shape}")
print(f"Test shape: {test_df.shape}")

## Tokenization
We use the BERT tokenizer to convert text into input IDs and attention masks.

In [None]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
MAX_LEN = 128

def encode_examples(df, limit=-1):
    # Use a subset for testing if limit > 0
    if limit > 0:
        df = df.iloc[:limit]
        
    input_ids = []
    attention_masks = []
    
    for text in df['message_text']:
        encoded = tokenizer.encode_plus(
            text,
            add_special_tokens=True,
            max_length=MAX_LEN,
            padding='max_length',
            truncation=True,
            return_attention_mask=True,
            return_tensors='tf'
        )
        input_ids.append(encoded['input_ids'][0])
        attention_masks.append(encoded['attention_mask'][0])
        
    return np.array(input_ids), np.array(attention_masks), df['target'].values

# Encode data
print("Encoding data...")
X_train_ids, X_train_mask, y_train = encode_examples(train_df)
X_val_ids, X_val_mask, y_val = encode_examples(val_df)
X_test_ids, X_test_mask, y_test = encode_examples(test_df)

print("Encoding complete.")

## Class Weights

In [None]:
class_weights = class_weight.compute_class_weight(
    class_weight='balanced',
    classes=np.unique(y_train),
    y=y_train
)
class_weights_dict = dict(enumerate(class_weights))
print(f"Class Weights: {class_weights_dict}")

## Model Definition

In [None]:
model = TFBertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=1, use_safetensors=False)

optimizer = tf.keras.optimizers.Adam(learning_rate=2e-5, epsilon=1e-08)
loss = tf.keras.losses.BinaryCrossentropy(from_logits=True)
metric = tf.keras.metrics.BinaryAccuracy('accuracy')

model.compile(optimizer=optimizer, loss=loss, metrics=[metric])
model.summary()

In [None]:
# Train
history = model.fit(
    [X_train_ids, X_train_mask], y_train,
    epochs=3,
    batch_size=16,
    validation_data=([X_val_ids, X_val_mask], y_val),
    class_weight=class_weights_dict
)

## Evaluation

In [None]:
# Predictions
y_pred_logits = model.predict([X_test_ids, X_test_mask]).logits
y_pred_prob = tf.sigmoid(y_pred_logits).numpy().flatten()
y_pred = (y_pred_prob > 0.5).astype(int)

print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=['Truth', 'Deception']))

# Confusion Matrix
cm = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(6, 5))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['Truth', 'Deception'], yticklabels=['Truth', 'Deception'])
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix')
plt.show()

In [None]:
# Save Model
model.save_pretrained(os.path.join(base_path, "models", "bert_diplomacy"))
print("Model saved.")