# Toxic Comment Classification

## Multi-Label Classification using Bidirectional LSTM

This notebook implements a deep learning approach for toxic comment classification using BiLSTM with 10-fold cross-validation.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from collections import Counter

%matplotlib inline
sns.set_style('whitegrid')

## 1. Load Data

We use the complete dataset with 159,571 Wikipedia comments.

In [None]:
# Load the full training dataset
train_df = pd.read_csv('../data/train.csv')

print(f"Training samples: {len(train_df)}")
print(f"Label columns: {list(train_df.columns[2:])}")

print("\nFirst few rows:")
train_df.head()

## 2. Exploratory Data Analysis

### 2.1 Label Distribution

In [None]:
# Calculate label statistics
label_cols = ['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']
label_counts = train_df[label_cols].sum().sort_values(ascending=False)

# Visualize label distribution
plt.figure(figsize=(10, 6))
label_counts.plot(kind='bar', color='steelblue')
plt.title('Distribution of Toxicity Labels', fontsize=14, fontweight='bold')
plt.xlabel('Label', fontsize=12)
plt.ylabel('Count', fontsize=12)
plt.xticks(rotation=45, ha='right')
plt.grid(axis='y', alpha=0.3)
plt.tight_layout()
plt.savefig('../report/figures/label_distribution.png', dpi=300, bbox_inches='tight')
plt.show()

print("\nLabel Statistics:")
for label in label_cols:
    count = train_df[label].sum()
    percentage = (count / len(train_df)) * 100
    print(f"{label:15s}: {count:6d} ({percentage:5.2f}%)")

### 2.2 Multi-Label Distribution

Analyze how many labels each comment has.

In [None]:
# Count number of labels per comment
label_counts_per_comment = train_df[label_cols].sum(axis=1)
multi_label_dist = label_counts_per_comment.value_counts().sort_index()

# Visualize
plt.figure(figsize=(10, 6))
multi_label_dist.plot(kind='bar', color='coral')
plt.title('Number of Labels per Comment', fontsize=14, fontweight='bold')
plt.xlabel('Number of Labels', fontsize=12)
plt.ylabel('Number of Comments', fontsize=12)
plt.grid(axis='y', alpha=0.3)
plt.tight_layout()
plt.savefig('../report/figures/multilabel_distribution.png', dpi=300, bbox_inches='tight')
plt.show()

print("\nMulti-Label Statistics:")
for num_labels, count in multi_label_dist.items():
    percentage = (count / len(train_df)) * 100
    print(f"{num_labels} label(s): {count:6d} comments ({percentage:5.2f}%)")

### 2.3 Label Correlation

Analyze correlations between different toxicity types.

In [None]:
# Calculate correlation matrix
correlation_matrix = train_df[label_cols].corr()

# Visualize heatmap
plt.figure(figsize=(10, 8))
sns.heatmap(correlation_matrix, annot=True, fmt='.2f', cmap='coolwarm', 
            square=True, cbar_kws={'label': 'Correlation'})
plt.title('Correlation Between Toxicity Labels', fontsize=14, fontweight='bold')
plt.tight_layout()
plt.savefig('../report/figures/label_correlation.png', dpi=300, bbox_inches='tight')
plt.show()

print("\nStrongest Correlations:")
corr_pairs = []
for i in range(len(label_cols)):
    for j in range(i+1, len(label_cols)):
        corr_pairs.append((label_cols[i], label_cols[j], correlation_matrix.iloc[i, j]))
corr_pairs.sort(key=lambda x: abs(x[2]), reverse=True)
for label1, label2, corr in corr_pairs[:5]:
    print(f"{label1} - {label2}: {corr:.3f}")

### 2.4 Text Length Analysis

In [None]:
# Calculate comment lengths
train_df['comment_length'] = train_df['comment_text'].str.len()
train_df['word_count'] = train_df['comment_text'].str.split().str.len()

# Compare toxic vs non-toxic lengths
toxic_lengths = train_df[train_df['toxic'] == 1]['comment_length']
non_toxic_lengths = train_df[train_df['toxic'] == 0]['comment_length']

plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.hist([non_toxic_lengths, toxic_lengths], bins=50, label=['Non-toxic', 'Toxic'], 
         color=['green', 'red'], alpha=0.6)
plt.xlabel('Character Count', fontsize=12)
plt.ylabel('Frequency', fontsize=12)
plt.title('Comment Length Distribution', fontsize=14, fontweight='bold')
plt.legend()
plt.xlim(0, 1000)

plt.subplot(1, 2, 2)
data_to_plot = [non_toxic_lengths, toxic_lengths]
plt.boxplot(data_to_plot, labels=['Non-toxic', 'Toxic'])
plt.ylabel('Character Count', fontsize=12)
plt.title('Comment Length Comparison', fontsize=14, fontweight='bold')
plt.ylim(0, 1000)

plt.tight_layout()
plt.savefig('../report/figures/length_analysis.png', dpi=300, bbox_inches='tight')
plt.show()

print("\nLength Statistics:")
print(f"Non-toxic comments - Mean length: {non_toxic_lengths.mean():.1f} chars")
print(f"Toxic comments     - Mean length: {toxic_lengths.mean():.1f} chars")

### 2.5 Word Frequency Analysis

In [None]:
# Get most common words in toxic comments
toxic_texts = ' '.join(train_df[train_df['toxic'] == 1]['comment_text'].astype(str))
toxic_words = toxic_texts.lower().split()
toxic_word_freq = Counter(toxic_words).most_common(20)

# Get most common words in non-toxic comments
non_toxic_texts = ' '.join(train_df[train_df['toxic'] == 0]['comment_text'].head(10000).astype(str))
non_toxic_words = non_toxic_texts.lower().split()
non_toxic_word_freq = Counter(non_toxic_words).most_common(20)

# Visualize
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))

words1, counts1 = zip(*toxic_word_freq)
ax1.barh(range(len(words1)), counts1, color='red', alpha=0.6)
ax1.set_yticks(range(len(words1)))
ax1.set_yticklabels(words1)
ax1.set_xlabel('Frequency', fontsize=12)
ax1.set_title('Top 20 Words in Toxic Comments', fontsize=14, fontweight='bold')
ax1.invert_yaxis()

words2, counts2 = zip(*non_toxic_word_freq)
ax2.barh(range(len(words2)), counts2, color='green', alpha=0.6)
ax2.set_yticks(range(len(words2)))
ax2.set_yticklabels(words2)
ax2.set_xlabel('Frequency', fontsize=12)
ax2.set_title('Top 20 Words in Non-Toxic Comments', fontsize=14, fontweight='bold')
ax2.invert_yaxis()

plt.tight_layout()
plt.savefig('../report/figures/word_frequency.png', dpi=300, bbox_inches='tight')
plt.show()

## 3. BiLSTM Model Training

The BiLSTM model is trained using the `train_models.py` script with 10-fold cross-validation.

**Model Configuration:**
- Vocabulary size: 3,000 tokens
- Sequence length: 100
- Embedding dimension: 32
- LSTM units: 32 per direction (64 total)
- Dense layers: 2 × 64 units with ReLU + Dropout(0.5)
- Output: 6 sigmoid units (multi-label)

**Training Configuration:**
- 10-fold stratified cross-validation
- 5 epochs per fold
- Batch size: 32
- Optimizer: Adam (lr=0.001)
- Loss: Binary cross-entropy

**To train the model:**
```bash
python train_models.py
```

**Training time:** ~75 minutes on CPU (10 folds × 7.5 min each)

## 4. Load Trained Model and Results

In [None]:
# Load model metadata
import json

with open('../models/bilstm_toxic_classifier_metadata.json', 'r') as f:
    metadata = json.load(f)

print("Model Information:")
print(f"  Model name: {metadata['model_name']}")
print(f"  Trained: {metadata['is_trained']}")
print(f"  Saved: {metadata['save_timestamp']}")

## 5. Cross-Validation Results

The model was evaluated using stratified 10-fold cross-validation on the complete dataset.

**Results:**

| Metric | Mean | Std Dev |
|--------|------|----------|
| **F1-Score (Macro)** | 0.6877 | 0.0143 |
| **Precision (Macro)** | 0.7783 | 0.0236 |
| **Recall (Macro)** | 0.6282 | 0.0246 |
| **Hamming Loss** | 0.0190 | 0.0007 |
| **ROC-AUC (Macro)** | 0.9658 | 0.0016 |
| **Accuracy** | 0.9172 | 0.0030 |

**Key Observations:**
- Excellent ROC-AUC (0.9658) indicates strong ranking ability
- High precision (0.7783) means 78% of flagged comments are actually toxic
- Moderate recall (0.6282) catches 63% of toxic comments
- Very low Hamming Loss (0.0190) shows few label prediction errors
- Low standard deviations across all metrics indicate stable, robust model

## 6. Model Performance Visualization

In [None]:
# Visualize CV results
metrics = ['F1-Score', 'Precision', 'Recall', 'ROC-AUC']
means = [0.6877, 0.7783, 0.6282, 0.9658]
stds = [0.0143, 0.0236, 0.0246, 0.0016]

fig, ax = plt.subplots(figsize=(10, 6))
x_pos = np.arange(len(metrics))
ax.bar(x_pos, means, yerr=stds, align='center', alpha=0.7, 
       color='steelblue', capsize=10, error_kw={'linewidth': 2})
ax.set_ylabel('Score', fontsize=12)
ax.set_xlabel('Metric', fontsize=12)
ax.set_title('BiLSTM 10-Fold Cross-Validation Results', fontsize=14, fontweight='bold')
ax.set_xticks(x_pos)
ax.set_xticklabels(metrics)
ax.set_ylim([0, 1.0])
ax.grid(axis='y', alpha=0.3)

# Add value labels on bars
for i, (mean, std) in enumerate(zip(means, stds)):
    ax.text(i, mean + std + 0.02, f'{mean:.4f}±{std:.4f}', 
            ha='center', va='bottom', fontsize=10, fontweight='bold')

plt.tight_layout()
plt.savefig('../report/figures/cv_results.png', dpi=300, bbox_inches='tight')
plt.show()

## 7. Conclusion

This project successfully implements a Bidirectional LSTM for multi-label toxic comment classification:

**Key Achievements:**
- ✓ Trained on complete dataset (159,571 samples)
- ✓ Robust 10-fold cross-validation methodology
- ✓ Strong performance (F1: 0.69, ROC-AUC: 0.97)
- ✓ Low variance across folds (stable model)
- ✓ Efficient architecture (1.5MB model size)

**Model Characteristics:**
- **High Precision:** Conservative model, minimizes false positives
- **Good Recall:** Catches majority of toxic content
- **Excellent Ranking:** ROC-AUC of 0.97 shows strong discrimination ability

**Future Improvements:**
- Experiment with transformer architectures (BERT, RoBERTa)
- Implement attention mechanisms for interpretability
- Ensemble methods combining multiple models
- Fine-tune on domain-specific data