In [1]:
import pandas as pd
import numpy as np
import os
import json
from profanity_check import predict, predict_prob
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_recall_fscore_support
from torch.utils.data import Dataset, DataLoader
import torch
from transformers import (
    BertForSequenceClassification,
    BertTokenizer,
    Trainer,
    TrainingArguments,
    EarlyStoppingCallback
)


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def create_profanity_dataset():
    """
    Creates a profanity detection dataset from JSON files in All_conversations folder
    Returns a pandas DataFrame with columns: ['sentence', 'profanity']
    """
    data = []
    
    # Process each JSON file in All_conversations
    for filename in os.listdir('All_Conversations'):
        if filename.endswith('.json'):
            with open(f'All_conversations/{filename}', 'r', encoding='utf-8') as f:
                conversation = json.load(f)
                
                for turn in conversation:
                    if 'text' in turn:
                        text = turn['text'].strip()
                        if text:  # Only process non-empty text
                            is_profane = predict([text])[0]
                            data.append({
                                'sentence': text,
                                'profanity': is_profane
                            })
    
    # Create DataFrame
    df = pd.DataFrame(data)
    return df

# Create and save the dataset
profanity_df = create_profanity_dataset()
profanity_df.to_csv('profanity_dataset.csv', index=False)

# Print summary
print(f"Created dataset with {len(profanity_df)} sentences")
print("Profanity distribution:")
print(profanity_df['profanity'].value_counts())
print("\nSample data:")
print(profanity_df.head())

Created dataset with 3187 sentences
Profanity distribution:
profanity
0    3080
1     107
Name: count, dtype: int64

Sample data:
                                            sentence  profanity
0  Hello, is this Mr. Johnson? This is Lisa calli...          0
1  I'm sorry, but I think you have the wrong pers...          0
2  Oh, I apologize for the confusion, Sarah. I'm ...          0
3  I don't have any account with Definite Bank. Y...          0
4  Thank you for letting me know. I will make a n...          0


In [3]:
# Load your existing dataset
df = pd.read_csv('profanity_dataset.csv')

# Split while stratifying by the 'profanity' label to maintain class balance
train_df, test_df = train_test_split(
    df,
    test_size=0.3,       
    random_state=42,        
    stratify=df['profanity'] 
)

# Verify the splits
print("Training set distribution:")
print(train_df['profanity'].value_counts())
print("\nTest set distribution:")
print(test_df['profanity'].value_counts())

# Save the splits
train_df.to_csv('train_dataset.csv', index=False)
test_df.to_csv('test_dataset.csv', index=False)

Training set distribution:
profanity
0    2155
1      75
Name: count, dtype: int64

Test set distribution:
profanity
0    925
1     32
Name: count, dtype: int64


In [4]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

class ProfanityDataset(Dataset):
    def __init__(self, texts, labels):
        # Convert to numpy arrays to avoid pandas indexing issues
        self.texts = texts.values if hasattr(texts, 'values') else texts
        self.labels = labels.values if hasattr(labels, 'values') else labels
        
    def __len__(self):
        return len(self.labels)
        
    def __getitem__(self, idx):
        encoding = tokenizer(
            str(self.texts[idx]),  # Ensure string type
            max_length=128,
            padding='max_length',
            truncation=True,
            return_tensors='pt'
        )
        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'labels': torch.tensor(self.labels[idx], dtype=torch.long)
        }

# Create datasets
train_dataset = ProfanityDataset(train_df['sentence'], train_df['profanity'])
test_dataset = ProfanityDataset(test_df['sentence'], test_df['profanity'])

In [23]:
# 1. Handle Class Imbalance
class_counts = train_df['profanity'].value_counts()
class_weights = torch.tensor([
    1.0,  # weight for clean (majority class)
    class_counts[0]/class_counts[1]  # weight for profane
], dtype=torch.float32)

# 2. Custom Model with Weighted Loss
class WeightedBERT(BertForSequenceClassification):
    def __init__(self, config):
        super().__init__(config)
        self.loss_fct = torch.nn.CrossEntropyLoss(weight=class_weights)

model = WeightedBERT.from_pretrained(
    'bert-base-uncased',
    num_labels=2,
    output_attentions=False,
    output_hidden_states=False
)

# 3. Metrics Calculation
def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    precision, recall, f1, _ = precision_recall_fscore_support(
        labels, preds, average='binary', pos_label=1
    )
    return {
        'f1': f1,
        'precision': precision,
        'recall': recall,
        'false_positives': ((preds == 1) & (labels == 0)).sum(),
        'false_negatives': ((preds == 0) & (labels == 1)).sum()
    }

# 4. Training Configuration
training_args = TrainingArguments(
    output_dir='./bert_profanity_results',
    num_train_epochs=3,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=64,
    learning_rate=3e-5,
    evaluation_strategy='steps',
    eval_steps=50,
    save_steps=50,
    load_best_model_at_end=True,
    metric_for_best_model='f1',
    greater_is_better=True,
    warmup_steps=100,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=50,
    fp16=torch.cuda.is_available()
)

# 5. Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=3)]
)

# 6. Start Training
print("Starting training...")
trainer.train()

# 7. Save Model
model.save_pretrained('./bert_profanity_model')
tokenizer.save_pretrained('./bert_profanity_model')

# 8. Evaluate Final Model
results = trainer.evaluate()
print("\nFinal Evaluation Results:")
print(f"F1 Score: {results['eval_f1']:.4f}")
print(f"Precision: {results['eval_precision']:.4f}")
print(f"Recall: {results['eval_recall']:.4f}")

Some weights of WeightedBERT were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'loss_fct.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Starting training...


Step,Training Loss,Validation Loss,F1,Precision,Recall,False Positives,False Negatives
50,0.3021,0.077858,0.0,0.0,0.0,0,32
100,0.0657,0.091098,0.590164,0.62069,0.5625,11,14
150,0.0501,0.025223,0.885714,0.815789,0.96875,7,1
200,0.0202,0.019852,0.909091,0.882353,0.9375,4,2


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



Final Evaluation Results:
F1 Score: 0.9091
Precision: 0.8824
Recall: 0.9375


In [20]:
print(f"Total samples in dataset: {len(train_dataset)}")

Total samples in dataset: 2230


In [7]:
# 1. accounting imbalanced dataset
class_counts = train_df['profanity'].value_counts()
class_weights = torch.tensor([
    1.0,  # weight for clean (majority class)
    class_counts[0]/class_counts[1]  # weight for profane
], dtype=torch.float32)

class WeightedBERTPartial(BertForSequenceClassification):
    def __init__(self, config):
        super().__init__(config)
        self.loss_fct = torch.nn.CrossEntropyLoss(weight=class_weights)

# 2. Load Weighted BERT Model directly (with custom loss)
model2 = WeightedBERTPartial.from_pretrained(
    'bert-base-uncased',
    num_labels=2,
    output_attentions=False,
    output_hidden_states=False
)

# 3. Freeze All Layers First
for param in model2.bert.parameters():
    param.requires_grad = False

# 4. Unfreeze Last N Layers (let's do last 2 transformer layers)
unfreeze_layers = 2
for layer in model2.bert.encoder.layer[-unfreeze_layers:]:
    for param in layer.parameters():
        param.requires_grad = True

# 5. Always unfreeze the classifier head
for param in model2.classifier.parameters():
    param.requires_grad = True

# 6. Verify trainable parameters
print("\nTrainable parameters:")
for name, param in model2.named_parameters():
    if param.requires_grad:
        print(name)

# 7. Metrics Calculation
def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    precision, recall, f1, _ = precision_recall_fscore_support(
        labels, preds, average='binary', pos_label=1
    )
    return {
        'f1': f1,
        'precision': precision,
        'recall': recall,
        'false_positives': ((preds == 1) & (labels == 0)).sum(),
        'false_negatives': ((preds == 0) & (labels == 1)).sum()
    }

# 8. Training Configuration (same as before)
training_args = TrainingArguments(
    output_dir='./bert_profanity_partial_results',
    num_train_epochs=3,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=64,
    learning_rate=3e-5,  # Might want to use a slightly higher LR here
    evaluation_strategy='steps',
    eval_steps=50,
    save_steps=50,
    load_best_model_at_end=True,
    metric_for_best_model='f1',
    greater_is_better=True,
    warmup_steps=100,
    weight_decay=0.01,
    logging_dir='./logs_partial',
    logging_steps=50,
    fp16=torch.cuda.is_available()
)

# 9. Initialize Trainer
trainer_partial = Trainer(
    model=model2,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=3)]
)

# 10. Start Training
print("Starting partial fine-tuning (last layers only)...")
trainer_partial.train()

# 11. Save Model
model2.save_pretrained('./bert_profanity_partial_model')
tokenizer.save_pretrained('./bert_profanity_partial_model')

# 12. Evaluate Final Model
results_partial = trainer_partial.evaluate()
print("\nFinal Evaluation Results (Partial Fine-Tuning):")
print(f"F1 Score: {results_partial['eval_f1']:.4f}")
print(f"Precision: {results_partial['eval_precision']:.4f}")
print(f"Recall: {results_partial['eval_recall']:.4f}")


Some weights of WeightedBERTPartial were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'loss_fct.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



Trainable parameters:
bert.encoder.layer.10.attention.self.query.weight
bert.encoder.layer.10.attention.self.query.bias
bert.encoder.layer.10.attention.self.key.weight
bert.encoder.layer.10.attention.self.key.bias
bert.encoder.layer.10.attention.self.value.weight
bert.encoder.layer.10.attention.self.value.bias
bert.encoder.layer.10.attention.output.dense.weight
bert.encoder.layer.10.attention.output.dense.bias
bert.encoder.layer.10.attention.output.LayerNorm.weight
bert.encoder.layer.10.attention.output.LayerNorm.bias
bert.encoder.layer.10.intermediate.dense.weight
bert.encoder.layer.10.intermediate.dense.bias
bert.encoder.layer.10.output.dense.weight
bert.encoder.layer.10.output.dense.bias
bert.encoder.layer.10.output.LayerNorm.weight
bert.encoder.layer.10.output.LayerNorm.bias
bert.encoder.layer.11.attention.self.query.weight
bert.encoder.layer.11.attention.self.query.bias
bert.encoder.layer.11.attention.self.key.weight
bert.encoder.layer.11.attention.self.key.bias
bert.encoder.laye

Step,Training Loss,Validation Loss,F1,Precision,Recall,False Positives,False Negatives
50,0.3053,0.1578,0.0,0.0,0.0,0,32
100,0.1053,0.057733,0.702703,0.619048,0.8125,16,6
150,0.0471,0.051403,0.714286,0.657895,0.78125,13,7
200,0.0495,0.048369,0.727273,0.705882,0.75,10,8


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



Final Evaluation Results (Partial Fine-Tuning):
F1 Score: 0.7273
Precision: 0.7059
Recall: 0.7500
