# Depression Detection with BERT: Complete Tutorial

This notebook demonstrates the complete workflow for depression detection from DAIC-WOZ interview transcripts using BERT-based modeling.

## Overview
1. Data Loading and Preprocessing
2. Model Training
3. Evaluation
4. SHAP Explainability
5. Fairness Auditing

In [None]:
# Import required libraries
import sys
sys.path.append('../src')

import numpy as np
import pandas as pd
import torch
import matplotlib.pyplot as plt

from data.data_processor import DAICWOZDataProcessor, load_sample_data
from models.bert_model import DepressionDetectionModel
from explainability.shap_explainer import ModelExplainer
from fairness.fairness_auditor import FairnessAuditor, create_synthetic_sensitive_attributes
from utils.evaluation import ModelEvaluator
from utils.visualization import plot_label_distribution

# Set random seeds for reproducibility
np.random.seed(42)
torch.manual_seed(42)

print("Libraries imported successfully!")

## 1. Data Loading and Preprocessing

In [None]:
# Load sample data
print("Loading sample data...")
transcripts, labels = load_sample_data()

print(f"Loaded {len(transcripts)} transcripts")
print(f"Labels distribution: {np.bincount(labels)}")

In [None]:
# Create structured dataset
processor = DAICWOZDataProcessor(data_dir="../data")
df = processor.create_dataset(transcripts, labels)

# Display first few rows
print("Dataset preview:")
df.head()

In [None]:
# Visualize label distribution
plot_label_distribution(df['label'].values, title="Depression Label Distribution")

In [None]:
# Split data
train_df, val_df, test_df = processor.split_dataset(df, train_size=0.6, val_size=0.2, test_size=0.2)

print(f"Train set: {len(train_df)} samples")
print(f"Validation set: {len(val_df)} samples")
print(f"Test set: {len(test_df)} samples")

## 2. Model Training

In [None]:
# Initialize model
print("Initializing BERT model...")
model = DepressionDetectionModel(
    model_name='bert-base-uncased',
    max_length=512
)

print(f"Model device: {model.device}")

In [None]:
# Prepare data loaders
train_loader = model.prepare_data(
    texts=train_df['cleaned_transcript'].tolist(),
    labels=train_df['label'].tolist(),
    batch_size=8
)

val_loader = model.prepare_data(
    texts=val_df['cleaned_transcript'].tolist(),
    labels=val_df['label'].tolist(),
    batch_size=8
)

print("Data loaders prepared")

In [None]:
# Training configuration
import torch.nn as nn
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.model.parameters(), lr=2e-5)

# Training loop (simplified for demonstration)
num_epochs = 3
train_losses = []
val_losses = []
val_accuracies = []

print("Starting training...")
for epoch in range(num_epochs):
    # Training
    train_loss = model.train_step(train_loader, optimizer, criterion)
    train_losses.append(train_loss)
    
    # Validation
    val_loss, val_acc = model.evaluate(val_loader, criterion)
    val_losses.append(val_loss)
    val_accuracies.append(val_acc)
    
    print(f"Epoch {epoch+1}/{num_epochs}:")
    print(f"  Train Loss: {train_loss:.4f}")
    print(f"  Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}")

print("Training complete!")

## 3. Model Evaluation

In [None]:
# Make predictions on test set
test_texts = test_df['cleaned_transcript'].tolist()
test_labels = test_df['label'].values

predictions, probabilities = model.predict(test_texts, batch_size=8)

print(f"Predictions shape: {predictions.shape}")
print(f"Probabilities shape: {probabilities.shape}")

In [None]:
# Calculate evaluation metrics
evaluator = ModelEvaluator()
metrics = evaluator.calculate_metrics(test_labels, predictions, probabilities)

print("\nTest Set Metrics:")
for metric, value in metrics.items():
    print(f"{metric:15s}: {value:.4f}")

In [None]:
# Confusion matrix
evaluator.plot_confusion_matrix(test_labels, predictions)

In [None]:
# ROC curve
evaluator.plot_roc_curve(test_labels, probabilities)

## 4. SHAP Explainability

In [None]:
# Initialize explainer
explainer = ModelExplainer(
    model=model.model,
    tokenizer=model.tokenizer,
    device=model.device
)

# Initialize with background samples
background_texts = train_df['cleaned_transcript'].sample(min(10, len(train_df))).tolist()
explainer.initialize_explainer(background_texts)

print("Explainer initialized")

In [None]:
# Explain a sample prediction
sample_text = test_texts[0]
print(f"Sample text: {sample_text[:200]}...")

shap_values = explainer.explain_prediction(sample_text, show_plot=True)

In [None]:
# Feature importance
batch_shap = explainer.explain_batch(test_texts[:5])
importance_df = explainer.get_feature_importance(batch_shap, top_k=15)

print("\nTop Important Features:")
importance_df

## 5. Fairness Auditing

In [None]:
# Create synthetic sensitive attributes for demonstration
sensitive_attrs = create_synthetic_sensitive_attributes(len(test_labels))

print("Sensitive attributes created:")
for attr_name, values in sensitive_attrs.items():
    print(f"  {attr_name}: {np.unique(values, return_counts=True)}")

In [None]:
# Perform fairness audit
auditor = FairnessAuditor()
audit_results = auditor.audit_model(
    y_true=test_labels,
    y_pred=predictions,
    sensitive_attributes={'gender': sensitive_attrs['gender']},
    threshold=0.1
)

print("\nFairness Audit Results:")
print("Overall Metrics:", audit_results['overall_metrics'])
print("\nFairness Metrics:", audit_results['fairness_metrics'])
print("\nFairness Assessment:", audit_results['fairness_assessment'])

In [None]:
# Plot group comparison
auditor.plot_group_comparison('gender')

## 6. Save Model and Results

In [None]:
# Save the trained model
model.save_model('../models/trained_model')
print("Model saved successfully!")

In [None]:
# Save evaluation results
results_df = pd.DataFrame({
    'true_label': test_labels,
    'predicted_label': predictions,
    'probability_depression': probabilities[:, 1]
})

results_df.to_csv('../data/processed/test_results.csv', index=False)
print("Results saved!")

## Conclusion

This notebook demonstrated:
- Loading and preprocessing DAIC-WOZ transcripts
- Training a BERT-based depression detection model
- Evaluating model performance
- Explaining predictions using SHAP
- Auditing model fairness across demographic groups

For more details, refer to the project documentation.