# BERT for Diplomacy Deception Detection

This notebook implements a BERT model to detect deception in Diplomacy game messages. 
We fine-tune a pre-trained `bert-base-uncased` model.

## Steps:
1. Load Data
2. Tokenization with `BertTokenizer`
3. Model Definition (`TFBertForSequenceClassification`)
4. Training with Class Weights
5. Evaluation

In [1]:
import pandas as pd
import numpy as np
import os
import time
os.environ["TF_USE_LEGACY_KERAS"] = "1"
import tensorflow as tf
from transformers import BertTokenizer, TFBertForSequenceClassification
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.utils import class_weight
import matplotlib.pyplot as plt
import seaborn as sns

# Set seeds
np.random.seed(42)
tf.random.set_seed(42)

Note: you may need to restart the kernel to use updated packages.



In [2]:
# Define Paths
base_path = os.path.dirname(os.path.dirname(os.getcwd()))
data_path = os.path.join(base_path, "data", "processed", "diplomacy")

print(f"Data Path: {data_path}")

Data Path: c:\work environment\Projects\amazon-spam-review\data\processed\diplomacy


In [3]:
# Load Data
train_df = pd.read_parquet(os.path.join(data_path, "train_processed.parquet"))
val_df = pd.read_parquet(os.path.join(data_path, "val_processed.parquet"))
test_df = pd.read_parquet(os.path.join(data_path, "test_processed.parquet"))

print(f"Train shape: {train_df.shape}")
print(f"Val shape: {val_df.shape}")
print(f"Test shape: {test_df.shape}")

Train shape: (13132, 13)
Val shape: (1416, 13)
Test shape: (2741, 13)


## Tokenization
We use the BERT tokenizer to convert text into input IDs and attention masks.

In [4]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
MAX_LEN = 128

def encode_examples(df, limit=-1):
    # Use a subset for testing if limit > 0
    if limit > 0:
        df = df.iloc[:limit]
        
    input_ids = []
    attention_masks = []
    
    for text in df['message_text']:
        encoded = tokenizer.encode_plus(
            text,
            add_special_tokens=True,
            max_length=MAX_LEN,
            padding='max_length',
            truncation=True,
            return_attention_mask=True,
            return_tensors='tf'
        )
        input_ids.append(encoded['input_ids'][0])
        attention_masks.append(encoded['attention_mask'][0])
        
    return np.array(input_ids), np.array(attention_masks), df['target'].values

# Encode data
print("Encoding data...")
X_train_ids, X_train_mask, y_train = encode_examples(train_df)
X_val_ids, X_val_mask, y_val = encode_examples(val_df)
X_test_ids, X_test_mask, y_test = encode_examples(test_df)

print("Encoding complete.")

Encoding data...
Encoding complete.


In [None]:
# CHECK FOR EXISTING MODEL
from transformers import AutoModelForSequenceClassification, AutoTokenizer

model_path = os.path.join(model_dir, 'bert_diplomacy')
try:
    model = AutoModelForSequenceClassification.from_pretrained(model_path)
    tokenizer = AutoTokenizer.from_pretrained(model_path)
    print(f'Model loaded from {model_path}')
    print('Skipping training and proceeding to evaluation...')
    skip_training = True
except (OSError, EnvironmentError):
    print('No existing model found. Will train a new model.')
    skip_training = False


# Train
start_time = time.time()
history = model.fit(
    [X_train_ids, X_train_mask], y_train,
    epochs=3,
    batch_size=16,
    validation_data=([X_val_ids, X_val_mask], y_val),
    class_weight=class_weights_dict
)
end_time = time.time()
training_time_minutes = (end_time - start_time) / 60
print(f"Training time: {training_time_minutes:.2f} minutes")

In [5]:
class_weights = class_weight.compute_class_weight(
    class_weight='balanced',
    classes=np.unique(y_train),
    y=y_train
)
class_weights_dict = dict(enumerate(class_weights))
print(f"Class Weights: {class_weights_dict}")

Class Weights: {0: np.float64(0.5235627142971055), 1: np.float64(11.109983079526227)}


# Save Model
model_dir = os.path.join(base_path, "models", "deceptency", "bert_diplomacy")
os.makedirs(model_dir, exist_ok=True)
model.save_pretrained(model_dir)
print("Model saved.")

# Save Results to CSV
report = classification_report(y_test, y_pred, output_dict=True)

result_data = {
    'category': 'Deceptency_BERT',
    'best_cv_f1_score': 'N/A',
    'test_accuracy': report['accuracy'],
    'test_f1_truth': report['0']['f1-score'],
    'test_precision_truth': report['0']['precision'],
    'best_params': '{"epochs": 3, "batch_size": 16, "lr": 2e-5}',
    'training_time_minutes': training_time_minutes
}

results_file = os.path.join(base_path, "reports", "model_results_deceptency_bert.csv")
result_df = pd.DataFrame([result_data])
header = not os.path.exists(results_file)
result_df.to_csv(results_file, mode='a', header=header, index=False)

print(f"Results saved to {results_file}")

In [6]:
tf.keras.backend.clear_session()
model = TFBertForSequenceClassification.from_pretrained(
    'bert-base-uncased', 
    num_labels=1, 
    use_safetensors=False
)

optimizer = tf.keras.optimizers.Adam(learning_rate=2e-5, epsilon=1e-08)
loss = tf.keras.losses.BinaryCrossentropy(from_logits=True)
metric = tf.keras.metrics.BinaryAccuracy('accuracy')

model.compile(optimizer=optimizer, loss=loss, metrics=[metric])
model.summary()




TensorFlow and JAX classes are deprecated and will be removed in Transformers v5. We recommend migrating to PyTorch classes or pinning your version of Transformers.
All model checkpoint layers were used when initializing TFBertForSequenceClassification.

Some layers of TFBertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model: "tf_bert_for_sequence_classification"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 bert (TFBertMainLayer)      multiple                  109482240 
                                                                 
 dropout_37 (Dropout)        multiple                  0 (unused)
                                                                 
 classifier (Dense)          multiple                  769       
                                                                 
Total params: 109483009 (417.64 MB)
Trainable params: 109483009 (417.64 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [7]:
# Train
start_time = time.time()
history = model.fit(
    [X_train_ids, X_train_mask], y_train,
    epochs=3,
    batch_size=16,
    validation_data=([X_val_ids, X_val_mask], y_val),
    class_weight=class_weights_dict
)
end_time = time.time()
training_time_minutes = (end_time - start_time) / 60
print(f"Training time: {training_time_minutes:.2f} minutes")

Epoch 1/3


KeyboardInterrupt: 

## Evaluation

In [None]:
# Predictions
y_pred_logits = model.predict([X_test_ids, X_test_mask]).logits
y_pred_prob = tf.sigmoid(y_pred_logits).numpy().flatten()
y_pred = (y_pred_prob > 0.5).astype(int)

print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=['Truth', 'Deception']))

# Confusion Matrix
cm = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(6, 5))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['Truth', 'Deception'], yticklabels=['Truth', 'Deception'])
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix')
plt.show()

In [None]:
# Save Model (only if newly trained)
if not skip_training:
    model.save_pretrained(os.path.join(base_path, "models", "bert_diplomacy"))
    print("Model saved.")    
    # --- SAVE MODEL AND REPORT ---
    model_dir = os.path.join(base_path, "models", "deceptency")
    os.makedirs(model_dir, exist_ok=True)
    model.save_pretrained(os.path.join(model_dir, "bert_diplomacy"))
    print("Model saved.")
    
# Metrics
    from sklearn.metrics import classification_report
    y_pred_logits = model.predict([X_test_ids, X_test_mask]).logits
    y_pred_prob = tf.sigmoid(y_pred_logits).numpy().flatten()
    y_pred = (y_pred_prob > 0.5).astype(int)
    report = classification_report(y_test, y_pred, output_dict=True)
    result_data = {
        'category': 'Deceptency_bert_diplomacy',
        'best_cv_f1_score': 'N/A',
        'best_params': 'N/A',
        'test_accuracy': report['accuracy'],
        'test_f1_truth': report['0']['f1-score'],
        'test_precision_truth': report['0']['precision'],
    
    'test_f1_deception': report['1']['f1-score'],
    'test_precision_deception': report['1']['precision'],
        'training_time_minutes': training_time_minutes
    }
    results_file = os.path.join(base_path, "reports", "model_results_deceptency_bert_diplomacy.csv")
    result_df = pd.DataFrame([result_data])
    header = not os.path.exists(results_file)
    result_df.to_csv(results_file, mode='a', header=header, index=False)
    print(f"Results saved to {results_file}")
else:
    print('Model was loaded, no need to save')
