Toxicity Detection LLM Model (T5 Based)

In [1]:
# Import Necessary Libraries
import pandas as pd
import numpy as np
from transformers import T5Tokenizer, T5ForConditionalGeneration, AdamW
from sklearn.model_selection import train_test_split
import torch
from torch.utils.data import DataLoader, TensorDataset

In [3]:
# Import dataset
chatlogs = pd.read_csv('chatlogs.csv')
chatlogs.head()

Unnamed: 0.1,Unnamed: 0,message,association_to_offender,time,case_total_reports,allied_report_count,enemy_report_count,most_common_report_reason,chatlog_id,champion_name
0,0,gold 2 zed,enemy,00:00:21,8,0,2,Negative Attitude,1,Udyr
1,1,IIII,enemy,00:00:27,8,0,2,Negative Attitude,1,Riven
2,2,nice premade lie :o,enemy,00:00:27,8,0,2,Negative Attitude,1,Udyr
3,3,ISI,enemy,00:00:28,8,0,2,Negative Attitude,1,Riven
4,4,smiteless pls,enemy,00:00:43,8,0,2,Negative Attitude,1,Udyr


Data Cleaning Process
- Removing NaNs, keeping only the necessary rows

In [4]:
# Check for NaN values in the columns used to create player_id
missing_values = chatlogs[['message', 'chatlog_id', 'champion_name', 'association_to_offender']].isnull().sum()
print("Missing values per column:\n", missing_values)

# Drop rows with NaN values if they are not needed
chatlogs = chatlogs.dropna(subset=['chatlog_id', 'champion_name', 'association_to_offender'])


Missing values per column:
 message                     29
chatlog_id                   0
champion_name              104
association_to_offender    104
dtype: int64


In [5]:
# Filter the chatlogs to only include games that have one 'offender'
games_with_offenders = chatlogs[chatlogs['association_to_offender'] == 'offender']


chatlogs_filtered = chatlogs[chatlogs['chatlog_id'].isin(games_with_offenders['chatlog_id'])]

# Check the shape of the new filtered dataset
print(chatlogs_filtered.shape)


(1691001, 10)


Data Processing
- Modifying and editing dataset; creating labels and features

In [7]:
# Ensure all entries in the 'messages' column are strings
chatlogs_filtered = chatlogs_filtered.copy()
chatlogs_filtered['message'] = chatlogs_filtered['message'].fillna("").astype(str)

# Create a unique player ID per game based on 'champion_name' and 'association_to_offender'
chatlogs_filtered['player_id'] = (
    chatlogs_filtered.groupby(['chatlog_id', 'champion_name', 'association_to_offender'])
    .ngroup()  # Assigns a unique integer group number
)

# Example label assignment based on a condition
chatlogs_filtered['label'] = chatlogs_filtered['association_to_offender'].apply(
    lambda x: 1 if x == 'offender' else 0
)

# Drop unnecessary columns
chatlogs_filtered = chatlogs_filtered.drop(columns=["champion_name", "association_to_offender"])

chatlogs_filtered.head()

Unnamed: 0.1,Unnamed: 0,message,association_to_offender,time,case_total_reports,allied_report_count,enemy_report_count,most_common_report_reason,chatlog_id,champion_name,player_id,label
0,0,gold 2 zed,enemy,00:00:21,8,0,2,Negative Attitude,1,Udyr,6,0
1,1,IIII,enemy,00:00:27,8,0,2,Negative Attitude,1,Riven,5,0
2,2,nice premade lie :o,enemy,00:00:27,8,0,2,Negative Attitude,1,Udyr,6,0
3,3,ISI,enemy,00:00:28,8,0,2,Negative Attitude,1,Riven,5,0
4,4,smiteless pls,enemy,00:00:43,8,0,2,Negative Attitude,1,Udyr,6,0


In [9]:
# Input Dataset: Group chatlogs by game
input_data = (
    chatlogs_filtered
    .sort_values(by=['chatlog_id', 'time'])  # Sort by game and timestamp
    .groupby('chatlog_id')
    .agg({
        'player_id': list,  # Collect all player IDs in the sequence
        'message': list     # Collect all messages in the sequence
    })
    .reset_index()
)

input_data.head()

Unnamed: 0,chatlog_id,player_id,message
0,1,"[6, 5, 6, 5, 6, 6, 6, 6, 6, 5, 4, 6, 5, 6, 5, ...","[gold 2 zed, IIII, nice premade lie :o, ISI, s..."
1,2,"[16, 11, 12, 17, 18, 17, 17, 18, 15, 18, 12, 1...","[xepis skype, ok, skype, soraka mid op, so whe..."
2,3,"[25, 25, 25, 25, 23, 23, 23, 27, 27, 24, 26, 2...","[invade, GO, THEY DONT HAVE JUNGLE, FEAR FIDDL..."
3,4,"[36, 36, 36, 36, 29, 34, 38, 36, 38, 38, 34, 3...","[can u babysit us, i m really bad at early, bu..."
4,5,"[45, 45, 41, 39, 45, 45, 45, 45, 39, 41, 41, 4...","[smitless, if possible, y, so little bushes ^^..."


In [10]:
# Target Dataset: Get the offender for each game
target_data = (
    chatlogs_filtered[chatlogs_filtered['label'] == 1]  # Filter rows where label == 1 (offender)
    .groupby('chatlog_id')
    .agg({
        'player_id': 'first'  # Assuming there is one offender per game
    })
    .reset_index()
)

target_data.head()


Unnamed: 0,chatlog_id,player_id
0,1,7
1,2,16
2,3,25
3,4,37
4,5,39


In [11]:
# Combine player_id and message for each game
input_data['text'] = input_data.apply(
    lambda row: ' '.join([f"Player {p}: {m}" for p, m in zip(row['player_id'], row['message'])]),
    axis=1
)

input_data.head()

# Merge with the target dataset to create labels
training_data = input_data.merge(target_data, on='chatlog_id', how='inner')

training_data.rename(columns={'player_id_y': 'offender'}, inplace=True)
training_data.head()

Unnamed: 0,chatlog_id,player_id_x,message,text,offender
0,1,"[6, 5, 6, 5, 6, 6, 6, 6, 6, 5, 4, 6, 5, 6, 5, ...","[gold 2 zed, IIII, nice premade lie :o, ISI, s...",Player 6: gold 2 zed Player 5: IIII Player 6: ...,7
1,2,"[16, 11, 12, 17, 18, 17, 17, 18, 15, 18, 12, 1...","[xepis skype, ok, skype, soraka mid op, so whe...",Player 16: xepis skype Player 11: ok Player 12...,16
2,3,"[25, 25, 25, 25, 23, 23, 23, 27, 27, 24, 26, 2...","[invade, GO, THEY DONT HAVE JUNGLE, FEAR FIDDL...",Player 25: invade Player 25: GO Player 25: THE...,25
3,4,"[36, 36, 36, 36, 29, 34, 38, 36, 38, 38, 34, 3...","[can u babysit us, i m really bad at early, bu...",Player 36: can u babysit us Player 36: i m rea...,37
4,5,"[45, 45, 41, 39, 45, 45, 45, 45, 39, 41, 41, 4...","[smitless, if possible, y, so little bushes ^^...",Player 45: smitless Player 45: if possible Pla...,39


In [12]:
# Calculate the length of each chatlog in words
input_data['text_length'] = input_data['text'].apply(lambda x: len(x.split()))

# Find the longest chatlog
longest_chatlog = input_data.loc[input_data['text_length'].idxmax()]

# Display the longest chatlog details
longest_chatlog[['chatlog_id', 'text_length', 'text']]

training_data.drop(['player_id_x', 'message'], axis=1, inplace=True)

In [13]:
# Calculate minimum, maximum, and average chat log lengths
min_length = input_data['text_length'].min()
max_length = input_data['text_length'].max()
avg_length = input_data['text_length'].mean()
quantile_90 = input_data['text_length'].quantile(0.90)
min_length, max_length, avg_length, quantile_90


(3, 3832, 827.9148058740697, 1500.0)

In [14]:
# Convert labels to strings
training_data['offender'] = training_data['offender'].astype(str)

# Adding custom prompt and labels
training_data['text'] = training_data['text'].apply(lambda x: f"Identify the most toxic player in this multiplayer game chat based on offensive language, negative behavior, spamming, or inappropriate names. Indicate the player’s number from the chat. Text:{x}")
labels = training_data['offender'].values

training_data.shape

(9942, 3)

## Model Training
- Use T5-small because it can understand context and generate unique outputs per game

In [38]:
# Split data
train_texts, val_texts, train_labels, val_labels = train_test_split(
    training_data['text'], training_data['offender'], test_size=0.2, random_state=42
)

val_texts, test_texts, val_labels, test_labels = train_test_split(
    val_texts, val_labels, test_size=0.5, random_state=42
)


In [16]:
# Change inputs data type to fit model

train_texts = train_texts.tolist() if not isinstance(train_texts, list) else train_texts
val_texts = val_texts.tolist() if not isinstance(val_texts, list) else val_texts
train_labels = train_labels.tolist() if not isinstance(train_labels, list) else train_labels
val_labels = val_labels.tolist() if not isinstance(val_labels, list) else val_labels
test_texts = test_texts.tolist() if not isinstance(test_texts, list) else test_texts
test_labels = test_labels.tolist() if not isinstance(test_labels, list) else test_labels

In [23]:
# Initialize Tokenizer and Model t5-small

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Initializing T5 model...")
tokenizer = T5Tokenizer.from_pretrained("t5-small")
model = T5ForConditionalGeneration.from_pretrained("t5-small").to(device)

Initializing T5 model...


In [18]:
def tokenize_data(texts, labels):
    encodings = tokenizer(list(texts), padding=True, truncation=True, max_length=1800, return_tensors="pt") # Chose 1800 so it can keep at least 85% of the games' full gamechat
    targets = tokenizer(list(labels), padding=True, truncation=True, max_length=10, return_tensors="pt")
    return encodings.input_ids, encodings.attention_mask, targets.input_ids

# Tokenize Data
print("Tokenizing data...")
# Tokenize the dataset
train_inputs, train_masks, train_labels = tokenize_data(train_texts, train_labels)
val_inputs, val_masks, val_labels = tokenize_data(val_texts, val_labels)
test_inputs, test_masks, test_labels = tokenize_data(test_texts, test_labels)
print("Finished...")

Tokenizing data...
Finished...


In [19]:
# Create DataLoaders
train_dataset = TensorDataset(train_inputs, train_masks, train_labels)
val_dataset = TensorDataset(val_inputs, val_masks, val_labels)
test_dataset = TensorDataset(test_inputs, test_masks, test_labels)

train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=8)
test_loader = DataLoader(test_dataset, batch_size=8)

In [24]:
# Optimizer
optimizer = AdamW(model.parameters(), lr=5e-5)

# Training Loop
print("Starting training on the dataset...")
epochs = 8

# Set Early Stopping Parameters
best_val_loss = float('inf')
patience = 2  # Stop after this many epochs without improvement
wait = 0

for epoch in range(epochs):
    model.train()
    total_loss = 0

    # Training Phase
    for batch in train_loader:
        input_ids, attention_mask, labels = [x.to(device) for x in batch]

        optimizer.zero_grad()
        outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss
        total_loss += loss.item()

        loss.backward()
        optimizer.step()

    avg_train_loss = total_loss / len(train_loader)
    print(f"Epoch {epoch+1} - Training Loss: {avg_train_loss:.4f}")

    # Validation Phase
    model.eval()
    val_loss = 0
    correct_preds = 0

    with torch.no_grad():
        for batch in val_loader:
            input_ids, attention_mask, labels = [x.to(device) for x in batch]

            outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
            val_loss += outputs.loss.item()

            pred_ids = model.generate(input_ids=input_ids, attention_mask=attention_mask)
            preds = tokenizer.batch_decode(pred_ids, skip_special_tokens=True)
            true_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)

            correct_preds += sum(p == t for p, t in zip(preds, true_labels))

    avg_val_loss = val_loss / len(val_loader)
    val_accuracy = correct_preds / len(val_loader.dataset)
    print(f"Validation Loss: {avg_val_loss:.4f}, Accuracy: {val_accuracy:.4f}")

     # Save Best Model
    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        wait = 0  # Reset patience
        print(f"Saving model with Validation Loss: {best_val_loss:.4f}")
        model.save_pretrained("./t5_offender_model")
        tokenizer.save_pretrained("./t5_offender_model")
    else:
        wait += 1
        if wait >= patience:
            print(f"No improvement for {patience} epochs. Early stopping...")
            break



Starting training on the dataset...
Epoch 1 - Training Loss: 0.6437




Validation Loss: 0.5492, Accuracy: 0.2777
Saving model with Validation Loss: 0.5492
Epoch 2 - Training Loss: 0.5254




Validation Loss: 0.5135, Accuracy: 0.3008
Saving model with Validation Loss: 0.5135
Epoch 3 - Training Loss: 0.4905




Validation Loss: 0.5223, Accuracy: 0.3018
Epoch 4 - Training Loss: 0.4634
Validation Loss: 0.5174, Accuracy: 0.3189
No improvement for 2 epochs. Early stopping...


In [35]:
from sklearn.metrics import classification_report, accuracy_score, f1_score, precision_score, recall_score
def evaluate_model(model, test_loader, tokenizer, device):
    model.eval()
    total_loss = 0
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for batch in test_loader:
            input_ids, attention_mask, labels = [x.to(device) for x in batch]

            # Forward pass
            outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
            total_loss += outputs.loss.item()

            # Generate predictions
            pred_ids = model.generate(input_ids=input_ids, attention_mask=attention_mask)
            preds = tokenizer.batch_decode(pred_ids, skip_special_tokens=True)
            true_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)

            all_preds.extend(preds)
            all_labels.extend(true_labels)

    avg_test_loss = total_loss / len(test_loader)
    test_accuracy = accuracy_score(all_labels, all_preds)

    print("\n--- Test Set Results ---")
    print(f"Test Loss: {avg_test_loss:.4f}")
    print(f"Test Accuracy: {test_accuracy:.4f}")

    return avg_test_loss, test_accuracy


In [39]:

test_texts = test_texts.tolist() if not isinstance(test_texts, list) else test_texts
test_labels = test_labels.tolist() if not isinstance(test_labels, list) else test_labels

test_inputs, test_masks, test_labels = tokenize_data(test_texts, test_labels)

test_dataset = TensorDataset(test_inputs, test_masks, test_labels)
test_loader = DataLoader(test_dataset, batch_size=8)

In [40]:
# Run Evaluation
evaluate_model(model, test_loader, tokenizer, device)




--- Test Set Results ---
Test Loss: 0.4816
Test Accuracy: 0.3668


(0.4815534555912018, 0.36683417085427134)