In [None]:
!pip install -U "numpy<2.0" "matplotlib==3.8.2" scikit-learn pandas "transformers==4.44.2" seaborn tqdm "torch==2.4.0" --force-reinstall --no-cache-dir

In [None]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"  # Force single GPU usage
os.environ["TOKENIZERS_PARALLELISM"] = "false"
os.environ["WANDB_DISABLED"] = "true"
import torch
import pandas as pd
import numpy as np
from transformers import T5Tokenizer, T5ForConditionalGeneration
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm.auto import tqdm
import warnings
warnings.filterwarnings('ignore')

# Set random seeds for reproducibility
torch.manual_seed(42)
np.random.seed(42)

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"CUDA device count: {torch.cuda.device_count()}")
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB")

In [None]:
# Configuration
MODEL_NAME = "google/flan-t5-base"

# Model configuration for inference
MAX_LENGTH = 512  # Maximum input length for T5
GENERATION_MAX_LENGTH = 10  
BATCH_SIZE = 8  # Batch size for inference

# Adjust this path to point to your local dataset location
TRAIN_DATA_PATH = "/kaggle/input/jigsaw-toxic-comment-classification-preprocessed/train_preprocessed.csv" 
OUTPUT_DIR = "/kaggle/working/"
TEXT_COLUMN = "comment_text" 
LABEL_COLUMNS = ["toxic", "severe_toxic", "obscene", "threat", "insult", "identity_hate"]
NUM_LABELS = len(LABEL_COLUMNS)

# Defining comprehensive few-shot prompts and descriptions for each toxicity category
PROMPT_TEMPLATES = {
    "toxic": """A toxic comment is a rude, disrespectful, or unreasonable utterance that is likely to make people leave a discussion.

Examples of toxic comments:
1. "Your opinion is completely off-base and pointless—you don't bring anything of value to this conversation."
2. "Honestly, I can't believe you're still typing—this is just mind-numbing fluff."
3. "This entire thread is embarrassing; you clearly don't know what you're talking about."
4. "Can you please stop wasting everyone's time with this garbage?"
5. "Reading your posts is exhausting; maybe you should rethink posting at all."

Based on the definition and examples above, is this comment toxic? Answer yes or no.
Comment: {text}""",

    "severe_toxic": """A severe_toxic comment exhibits very hateful or aggressive language—far beyond mere profanity—such that it is highly likely to drive participants away or cause them to abandon sharing their perspective.

Examples of severely toxic comments:
1. "You are an absolute disgrace—no one wants to hear your toxic drivel; just disappear."
2. "I sincerely hope you get banned and never show your pathetic face here again."
3. "Your comments are the vile rubbish of the internet, and I want you gone."
4. "Nobody cares about your warped opinions, and you'd be doing us all a favor shutting up forever."
5. "This is filth, pure filth—and it's poisoning everyone's time; stop posting or get banned."

Based on the definition and examples above, is this comment severely toxic? Answer yes or no.
Comment: {text}""",

    "obscene": """An obscene comment contains vulgar or profane language (e.g., explicit curse words or sexual references) intended to shock or offend.

Examples of obscene comments:
1. "You're a fucking clown, and your posts are full of utter bullshit."
2. "Go fuck yourself—you don't have a clue what you're talking about."
3. "That was some pathetic horny rambling—get out of here with that smut."
4. "Your comment is pure disgusting crap—full of stupid fucking lies."
5. "You piece of shit, stop spewing your obscene drivel at us."

Based on the definition and examples above, is this comment obscene? Answer yes or no.
Comment: {text}""",

    "threat": """A threat is a statement expressing an intention to inflict pain, injury, damage, or other hostile action against an individual or group, or to coerce them by implying future harm.

Examples of threatening comments:
1. "If you don't shut up, I'm going to make sure you regret it."
2. "Try posting that again and I'll find out where you live."
3. "You better delete that now or you'll pay the price."
4. "Keep talking like that and I'll come after you personally."
5. "One more smart comment from you and I guarantee you'll be sorry."

Based on the definition and examples above, does this comment contain a threat? Answer yes or no.
Comment: {text}""",

    "insult": """An insult is an abusive, inflammatory, or negative remark directed at a person or group, intended to demean, belittle, or provoke.

Examples of insulting comments:
1. "You're the most clueless person I've seen online—embarrassing."
2. "Nice rant, genius—you'd fail a kindergarten test."
3. "That idea was so stupid I can't believe you even wrote it."
4. "You're pathetic—someone as ignorant as you shouldn't be posting in public."
5. "Thanks for your insight, Captain Obvious—you're really helpful (not)."

Based on the definition and examples above, is this comment insulting? Answer yes or no.
Comment: {text}""",

    "identity_hate": """An identity_hate comment targets a person or group based on a protected characteristic (e.g., race, religion, gender identity, sexual orientation), conveying negative or hateful sentiments toward that identity.

Examples of identity-based hate comments:
1. "Of course you'd say that—that's typical of your religion, always acting like that."
2. "People from your country are worthless and should be ashamed."
3. "Why are gays always so annoying? Just go away."
4. "You're only smart for a woman—too bad you're just a girl."
5. "I can't stand black people—they ruin everything they touch."

Based on the definition and examples above, does this comment contain identity-based hate? Answer yes or no.
Comment: {text}"""
}

print(f"Model: {MODEL_NAME}")
print(f"Using device: {device}")
print(f"Label columns: {LABEL_COLUMNS}")
print(f"Number of labels: {NUM_LABELS}")



In [None]:
# Loading and Splitting Dataset into Train/Validation/Test
print("Loading preprocessed training data...")
try:
    full_train_df = pd.read_csv(TRAIN_DATA_PATH)
    print(f" Training data loaded successfully: {full_train_df.shape}")
except FileNotFoundError:
    print(f" Training data not found at: {TRAIN_DATA_PATH}")
    print("Please update paths in the configuration section")
    raise

# Displaying basic information about the data
print("\nOriginal training data columns:")
print(full_train_df.columns.tolist())

# Check if required columns exist
required_columns = [TEXT_COLUMN] + LABEL_COLUMNS
missing_columns = [col for col in required_columns if col not in full_train_df.columns]

if missing_columns:
    print(f"\n Missing required columns in training data: {missing_columns}")
    print(f"Available columns: {full_train_df.columns.tolist()}")
    print("\nPlease ensure your preprocessed data has the following columns:")
    print(f"- {TEXT_COLUMN} (the processed text)")
    print(f"- {', '.join(LABEL_COLUMNS)} (label columns)")
    raise ValueError("Missing required columns")
else:
    print("\n All required columns found in training data")

print("\nFirst few rows of original training data:")
print(full_train_df.head())

# Check label distribution in original data
print("\nLabel distribution in original training data:")
label_stats = full_train_df[LABEL_COLUMNS].sum()
print(label_stats)

# Calculate percentage of positive labels
print("\nPercentage of positive labels:")
label_percentages = (full_train_df[LABEL_COLUMNS].sum() / len(full_train_df)) * 100
print(label_percentages)

# Visualize original label distribution
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
label_stats.plot(kind='bar')
plt.title('Label Counts in Original Training Data')
plt.xlabel('Label')
plt.ylabel('Count')
plt.xticks(rotation=45)

plt.subplot(1, 2, 2)
label_percentages.plot(kind='bar')
plt.title('Label Percentages in Original Training Data')
plt.xlabel('Label')
plt.ylabel('Percentage (%)')
plt.xticks(rotation=45)

plt.tight_layout()
plt.show()

# Split the data into train/validation/test (70%/15%/15%)
print("\n" + "="*50)
print("SPLITTING DATA INTO TRAIN/VALIDATION/TEST")
print("="*50)

# First split: separate test set (15% of total data)
train_val_df, test_df = train_test_split(
    full_train_df, 
    test_size=0.15, 
    random_state=42,
    stratify=full_train_df[LABEL_COLUMNS[0]]  # Stratify on primary toxic label
)

# Second split: separate validation from remaining data (15% of total = ~17.6% of remaining)
train_df, val_df = train_test_split(
    train_val_df,
    test_size=0.176,  # This gives us ~15% of original data for validation
    random_state=42,
    stratify=train_val_df[LABEL_COLUMNS[0]]  # Stratify on primary toxic label
)

print(f"Data split completed:")
print(f"- Training set: {len(train_df):,} samples ({len(train_df)/len(full_train_df)*100:.1f}%)")
print(f"- Validation set: {len(val_df):,} samples ({len(val_df)/len(full_train_df)*100:.1f}%)")
print(f"- Test set: {len(test_df):,} samples ({len(test_df)/len(full_train_df)*100:.1f}%)")
print(f"- Total: {len(train_df) + len(val_df) + len(test_df):,} samples")

# Create test set without labels for prediction
test_df_no_labels = test_df[[TEXT_COLUMN]].copy()
if 'id' not in test_df.columns:
    # Create an ID column if it doesn't exist
    test_df_no_labels['id'] = range(len(test_df_no_labels))
    test_df['id'] = range(len(test_df))
else:
    test_df_no_labels['id'] = test_df['id'].copy()

# Save the split datasets
train_split_path = f"{OUTPUT_DIR}/train_split.csv"
val_split_path = f"{OUTPUT_DIR}/val_split.csv"
test_split_with_labels_path = f"{OUTPUT_DIR}/test_split_with_labels.csv"
test_split_no_labels_path = f"{OUTPUT_DIR}/test_split_no_labels.csv"

train_df.to_csv(train_split_path, index=False)
val_df.to_csv(val_split_path, index=False)
test_df.to_csv(test_split_with_labels_path, index=False)
test_df_no_labels.to_csv(test_split_no_labels_path, index=False)

print(f"\n Split datasets saved:")
print(f"- Training data: {train_split_path}")
print(f"- Validation data: {val_split_path}")
print(f"- Test data (with labels): {test_split_with_labels_path}")
print(f"- Test data (without labels): {test_split_no_labels_path}")

# Compare label distributions across splits
print(f"\n LABEL DISTRIBUTION COMPARISON:")
print("-" * 60)

splits_info = {
    'Original': full_train_df,
    'Train': train_df,
    'Validation': val_df,
    'Test': test_df
}

comparison_data = []
for split_name, split_df in splits_info.items():
    row = {'Split': split_name, 'Size': len(split_df)}
    for label in LABEL_COLUMNS:
        count = split_df[label].sum()
        percentage = (count / len(split_df)) * 100
        row[f'{label}_count'] = count
        row[f'{label}_pct'] = percentage
    comparison_data.append(row)

comparison_df = pd.DataFrame(comparison_data)
print("\nLabel counts and percentages by split:")
print(comparison_df.to_string(index=False))

# Visualize distribution comparison
fig, axes = plt.subplots(2, 2, figsize=(15, 10))
fig.suptitle('Label Distribution Comparison Across Splits', fontsize=16)

# Plot 1: Sample counts
splits = ['Train', 'Validation', 'Test']
sizes = [len(train_df), len(val_df), len(test_df)]
axes[0, 0].bar(splits, sizes, color=['skyblue', 'lightgreen', 'lightcoral'])
axes[0, 0].set_title('Sample Counts by Split')
axes[0, 0].set_ylabel('Number of Samples')
for i, v in enumerate(sizes):
    axes[0, 0].text(i, v + max(sizes)*0.01, f'{v:,}', ha='center', va='bottom')

# Plot 2: Label percentages for each split
label_pcts = {
    'Train': [(train_df[label].sum() / len(train_df)) * 100 for label in LABEL_COLUMNS],
    'Validation': [(val_df[label].sum() / len(val_df)) * 100 for label in LABEL_COLUMNS],
    'Test': [(test_df[label].sum() / len(test_df)) * 100 for label in LABEL_COLUMNS]
}

x = np.arange(len(LABEL_COLUMNS))
width = 0.25

axes[0, 1].bar(x - width, label_pcts['Train'], width, label='Train', color='skyblue')
axes[0, 1].bar(x, label_pcts['Validation'], width, label='Validation', color='lightgreen')
axes[0, 1].bar(x + width, label_pcts['Test'], width, label='Test', color='lightcoral')

axes[0, 1].set_title('Label Percentages by Split')
axes[0, 1].set_xlabel('Labels')
axes[0, 1].set_ylabel('Percentage (%)')
axes[0, 1].set_xticks(x)
axes[0, 1].set_xticklabels([label.replace('_', '\n') for label in LABEL_COLUMNS], rotation=45)
axes[0, 1].legend()

# Plot 3: Training set label distribution (bar chart)
train_label_counts = [train_df[label].sum() for label in LABEL_COLUMNS]
axes[1, 0].bar(range(len(LABEL_COLUMNS)), train_label_counts, color='skyblue')
axes[1, 0].set_title('Training Set Label Counts')
axes[1, 0].set_xlabel('Labels')
axes[1, 0].set_ylabel('Count')
axes[1, 0].set_xticks(range(len(LABEL_COLUMNS)))
axes[1, 0].set_xticklabels([label.replace('_', '\n') for label in LABEL_COLUMNS], rotation=45)

# Plot 4: Test set label distribution (bar chart)
test_label_counts = [test_df[label].sum() for label in LABEL_COLUMNS]
axes[1, 1].bar(range(len(LABEL_COLUMNS)), test_label_counts, color='lightcoral')
axes[1, 1].set_title('Test Set Label Counts')
axes[1, 1].set_xlabel('Labels')
axes[1, 1].set_ylabel('Count')
axes[1, 1].set_xticks(range(len(LABEL_COLUMNS)))
axes[1, 1].set_xticklabels([label.replace('_', '\n') for label in LABEL_COLUMNS], rotation=45)

plt.tight_layout()
plt.savefig(f"{OUTPUT_DIR}/data_split_comparison.png", dpi=300, bbox_inches='tight')
plt.show()

# Update paths for the inference process
print(f"\n Updating paths for inference process...")
TRAIN_DATA_PATH = train_split_path
VAL_DATA_PATH = val_split_path
TEST_DATA_PATH = test_split_no_labels_path  # Use the version without labels for prediction
TEST_WITH_LABELS_PATH = test_split_with_labels_path  # Keep reference to version with labels

print(f"Updated paths:")
print(f"- TRAIN_DATA_PATH: {TRAIN_DATA_PATH}")
print(f"- VAL_DATA_PATH: {VAL_DATA_PATH}")
print(f"- TEST_DATA_PATH: {TEST_DATA_PATH}")
print(f"- TEST_WITH_LABELS_PATH: {TEST_WITH_LABELS_PATH}")

# Use the split data for inference
train_df_split = train_df.copy()
val_df_split = val_df.copy()
test_df = test_df_no_labels.copy()  # This will be used for prediction

print(f"\n Data splitting completed successfully!")
print(f"Final dataset sizes:")
print(f"- Training: {len(train_df_split):,} samples")
print(f"- Validation: {len(val_df_split):,} samples")
print(f"- Test: {len(test_df):,} samples")
print(f"Ready to proceed with inference on the full test data.")

In [None]:
# Loading Flan-T5 Model and Tokenizer
print("Loading Flan-T5 model and tokenizer...")
print(f"Model: {MODEL_NAME}")

# Load tokenizer
tokenizer = T5Tokenizer.from_pretrained(MODEL_NAME)
print(f"Tokenizer loaded. Vocab size: {len(tokenizer)}")

# Load model
model = T5ForConditionalGeneration.from_pretrained(
    MODEL_NAME,
    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
    device_map="auto" if torch.cuda.is_available() else None
)

if not torch.cuda.is_available():
    model = model.to(device)

print(f"Model loaded on device: {next(model.parameters()).device}")
print(f"Model dtype: {next(model.parameters()).dtype}")

# Testing the model with a simple example
test_prompt = "Is this comment toxic? Answer yes or no.\nComment: Hello world"
test_inputs = tokenizer(test_prompt, return_tensors="pt", truncation=True, max_length=MAX_LENGTH)
if torch.cuda.is_available():
    test_inputs = {k: v.to(model.device) for k, v in test_inputs.items()}

with torch.no_grad():
    test_outputs = model.generate(
        **test_inputs,
        max_new_tokens=GENERATION_MAX_LENGTH,
        do_sample=False,
        pad_token_id=tokenizer.eos_token_id
    )
    test_response = tokenizer.decode(test_outputs[0], skip_special_tokens=True)
    print(f"Test prompt: {test_prompt}")
    print(f"Test response: {test_response}")

print("Flan-T5 model loaded successfully and ready for inference!")

In [None]:
# Generate Predictions using Flan-T5 Inference
print("Generating predictions on test set using Flan-T5...")

# Load test data from the split
print(f"Loading test data from: {TEST_DATA_PATH}")
test_df = pd.read_csv(TEST_DATA_PATH)
print(f"Test data loaded: {test_df.shape}")

def parse_response(response):
    """Parse model response to binary prediction"""
    response = response.lower().strip()
    if 'yes' in response:
        return 1
    elif 'no' in response:
        return 0
    else:
        # Default to 0 if response is unclear
        return 0

def predict_toxicity_for_text(text, label_category):
    """Generate prediction for a single text and label category"""
    prompt = PROMPT_TEMPLATES[label_category].format(text=text)
    
    # Tokenize input
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=MAX_LENGTH)
    if torch.cuda.is_available():
        inputs = {k: v.to(model.device) for k, v in inputs.items()}
    
    # Generate response
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=GENERATION_MAX_LENGTH,
            do_sample=False,
            pad_token_id=tokenizer.eos_token_id
        )
    
    # Decode and parse response
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return parse_response(response)

# Initialize predictions array
num_samples = len(test_df)
num_labels = len(LABEL_COLUMNS)
predictions = np.zeros((num_samples, num_labels), dtype=int)

print(f"Starting prediction for {num_samples} samples across {num_labels} categories...")
print("This may take some time...")

# Generate predictions for each sample and each label
for i, row in tqdm(test_df.iterrows(), total=len(test_df), desc="Predicting"):
    text = row[TEXT_COLUMN]
    
    # Truncate text if too long to avoid memory issues
    if len(text) > 1000:
        text = text[:1000] + "..."
    
    for j, label in enumerate(LABEL_COLUMNS):
        try:
            pred = predict_toxicity_for_text(text, label)
            predictions[i, j] = pred
        except Exception as e:
            print(f"Error predicting for sample {i}, label {label}: {e}")
            predictions[i, j] = 0  # Default to 0 on error

print(f"Prediction completed!")
print(f"Predictions shape: {predictions.shape}")
print(f"Predictions are binary: {np.all(np.isin(predictions, [0, 1]))}")

# Create submission DataFrame
submission_df = test_df[['id']].copy()
for i, label in enumerate(LABEL_COLUMNS):
    submission_df[label] = predictions[:, i]

# Save predictions
submission_filename = f"{OUTPUT_DIR}/test_predictions_binary.csv"
submission_df.to_csv(submission_filename, index=False)
print(f"Binary test predictions saved to {submission_filename}")

# Show sample predictions
print("\nSample binary test predictions:")
print(submission_df.head(10))

# Show prediction statistics
print("\nPrediction Statistics:")
for i, label in enumerate(LABEL_COLUMNS):
    positive_count = np.sum(predictions[:, i])
    positive_percentage = (positive_count / num_samples) * 100
    print(f"{label.replace('_', ' ').title():<15}: {positive_count:4d} positive ({positive_percentage:.1f}%)")

print(f"\nPrediction generation completed successfully!")

In [None]:
# Evaluate on Split Test Set with Ground Truth Labels
print("\n" + "="*80)
print("EVALUATION ON SPLIT TEST SET WITH GROUND TRUTH LABELS")
print("="*80)

# Load the test set with labels for evaluation
print(f" Loading test set with labels from: {TEST_WITH_LABELS_PATH}")
test_with_labels_df = pd.read_csv(TEST_WITH_LABELS_PATH)
print(f" Test set with labels loaded: {test_with_labels_df.shape}")

# Load binary predictions
predictions_file_path = f"{OUTPUT_DIR}/test_predictions_binary.csv"

if os.path.exists(predictions_file_path):
    print(f" Loading binary predictions from: {predictions_file_path}")
    pred_df = pd.read_csv(predictions_file_path)
    print(f" Binary predictions loaded: {pred_df.shape}")
    
    # Ensure both datasets have the same length and order
    if len(test_with_labels_df) == len(pred_df):
        # Extract true labels and binary predictions
        y_true = test_with_labels_df[LABEL_COLUMNS].values.astype(int)
        y_pred_binary = pred_df[LABEL_COLUMNS].values.astype(int)
        
        print(f" Evaluation data shape: {y_true.shape}")
        print(f" Predictions are binary: {np.all(np.isin(y_pred_binary, [0, 1]))}")
        print(f" Labels are binary: {np.all(np.isin(y_true, [0, 1]))}")
        
        # Calculate comprehensive metrics
        print("\n SPLIT TEST SET EVALUATION RESULTS:")
        print("-" * 60)
        
        # Per-label metrics
        print("Per-Label Performance:")
        for i, label in enumerate(LABEL_COLUMNS):
            true_labels = y_true[:, i]
            pred_labels = y_pred_binary[:, i]
            
            # Basic metrics
            precision, recall, f1, _ = precision_recall_fscore_support(
                true_labels, pred_labels, average='binary', zero_division=0
            )
            
            # Accuracy for this label
            accuracy = np.mean(true_labels == pred_labels)
            
            # Support
            support = np.sum(true_labels)
            
            print(f"{label.replace('_', ' ').title():<15}: P={precision:.3f} R={recall:.3f} F1={f1:.3f} Acc={accuracy:.3f} (Support: {support})")
        
        # Aggregate metrics
        print(f"\n AGGREGATE PERFORMANCE:")
        print("-" * 50)
        
        # Macro averages
        macro_precision, macro_recall, macro_f1, _ = precision_recall_fscore_support(
            y_true, y_pred_binary, average='macro', zero_division=0
        )
        
        # Micro averages
        micro_precision, micro_recall, micro_f1, _ = precision_recall_fscore_support(
            y_true.flatten(), y_pred_binary.flatten(), average='micro', zero_division=0
        )
        
        # Exact match accuracy (all labels must be correct)
        exact_match = np.mean(np.all(y_pred_binary == y_true, axis=1))
        
        # Hamming loss
        hamming_loss = np.mean(y_pred_binary != y_true)
        
        # Label-wise accuracy
        label_accuracies = [np.mean(y_true[:, i] == y_pred_binary[:, i]) for i in range(len(LABEL_COLUMNS))]
        mean_label_accuracy = np.mean(label_accuracies)
        
        print(f"Macro Average    : P={macro_precision:.3f} R={macro_recall:.3f} F1={macro_f1:.3f}")
        print(f"Micro Average    : P={micro_precision:.3f} R={micro_recall:.3f} F1={micro_f1:.3f}")
        print(f"Exact Match Acc  : {exact_match:.3f}")
        print(f"Mean Label Acc   : {mean_label_accuracy:.3f}")
        print(f"Hamming Loss     : {hamming_loss:.3f}")
        
        print(f"\n Binary evaluation completed successfully!")
        print(f" Exact Match Accuracy: {exact_match:.3f}")
        print(f" Macro F1 Score: {macro_f1:.3f}")
        print(f" Evaluated on {len(y_true)} test samples")
        
    else:
        print(f" Length mismatch: Test labels ({len(test_with_labels_df)}) vs Predictions ({len(pred_df)})")
else:
    print(f" Binary predictions file not found: {predictions_file_path}")