# IndoBERT Toxicity Classification Model

## Import Library

In [85]:
import numpy as np
import pandas as pd
import re
import pandas as pd
import torch
import numpy as np
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from transformers import BertTokenizer, BertForSequenceClassification, AdamW
from torch.utils.data import DataLoader, TensorDataset
from transformers import BertTokenizer, BertForSequenceClassification

  from .autonotebook import tqdm as notebook_tqdm


## Target Dataset

Hate Speech, Abusive Speech, SARA, Pornography, Radicalism, Defamation

In [61]:
toxic_df = pd.DataFrame(columns=["Text", "Hate Speech", "Abusive Speech", "SARA", "Pornography", "Radicalism", "Defamation"])
toxic_df

Unnamed: 0,Text,Hate Speech,Abusive Speech,SARA,Pornography,Radicalism,Defamation


## Dataset : Netifier

In [62]:
netifier_path = "./data/netifier"

In [63]:
netifier_train_path = netifier_path + "/processed_train.csv"
netifier_test_path = netifier_path + "/processed_test.csv"

In [64]:
netifier_train = pd.read_csv(netifier_train_path)
netifier_test = pd.read_csv(netifier_test_path)

In [65]:
netifier_train.head()

Unnamed: 0,original_text,source,pornografi,sara,radikalisme,pencemaran_nama_baik,processed_text
0,[QUOTE=jessepinkman16;5a50ac34d89b093f368b456e...,kaskus,0,0,0,1,jabar memang provinsi barokah boleh juga dan n...
1,"@verosvante kita2 aja nitizen yang pada kepo,t...",instagram,0,0,0,0,kita saja nitizen yang pada penasaran toh kelu...
2,"""#SidangAhok smg sipenista agama n ateknya mat...",twitter,0,1,1,1,sidangahok semoga sipenista agama dan ateknya ...
3,@bolususulembang.jkt barusan baca undang2 ini....,instagram,0,0,0,0,jakarta barusan baca undang ini tetap dibedaka...
4,bikin anak mulu lu nof \nkaga mikir apa kasian...,kaskus,0,0,0,0,buat anak melulu kamu nof nkaga mikir apa kasi...


In [66]:
netifier_test.head()

Unnamed: 0,original_text,source,pornografi,sara,radikalisme,pencemaran_nama_baik,processed_text
0,"1.BUKAN CM SPANDUK PROF,VIDEO2 ORASI MEREKA, B...",twitter,0,0,1,0,bukan hanya spanduk prof video orasi mereka bu...
1,@memeqbeceq gy sange'gatel yh tetek'memekY drn...,twitter,1,0,0,0,gy sange gatel yh tetek memeky drnjng tempat t...
2,Pertama kali denger lagunya enk bgt in dan png...,instagram,0,0,0,0,pertama kali denger lagunya enk sekali in dan ...
3,"astajim, ini pasti yg kasih penghargaan ke ibu...",kaskus,0,0,0,0,astajim ini pasti yang kasih penghargan ke ibu...
4,beda kalo disini kalo komplain lgs di bully am...,kaskus,0,0,0,0,beda kalau di sini kalau keluhan langsung di b...


In [67]:
netifier_train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6995 entries, 0 to 6994
Data columns (total 7 columns):
 #   Column                Non-Null Count  Dtype 
---  ------                --------------  ----- 
 0   original_text         6995 non-null   object
 1   source                6995 non-null   object
 2   pornografi            6995 non-null   int64 
 3   sara                  6995 non-null   int64 
 4   radikalisme           6995 non-null   int64 
 5   pencemaran_nama_baik  6995 non-null   int64 
 6   processed_text        6995 non-null   object
dtypes: int64(4), object(3)
memory usage: 382.7+ KB


In [68]:
netifier_test.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 778 entries, 0 to 777
Data columns (total 7 columns):
 #   Column                Non-Null Count  Dtype 
---  ------                --------------  ----- 
 0   original_text         778 non-null    object
 1   source                778 non-null    object
 2   pornografi            778 non-null    int64 
 3   sara                  778 non-null    int64 
 4   radikalisme           778 non-null    int64 
 5   pencemaran_nama_baik  778 non-null    int64 
 6   processed_text        778 non-null    object
dtypes: int64(4), object(3)
memory usage: 42.7+ KB


**Preprocess:**
- Translate text-based emojis
- Remove excessive newline
- Remove kaskus formatting
- Remove url
- Remove excessive whitespace
- Tokenize text
- Transform slang words
- Remove non aplhabet
- Remove twitter & instagram formatting
- Remove Repeating Characters

Conclusion: *clean enough, recommend to use stemming and stopword*

Hate Speech (subcategories: HS_Individual, HS_Group, HS_Religion, HS_Race, HS_Physical, HS_Gender, HS_Other)
Abusive Speech (subcategories: HS_Weak, HS_Moderate, HS_Strong)
SARA (covering HS_Religion, HS_Race, HS_Group)
Pornography (for pornografi)
Radicalism (for radikalisme)
Defamation (for HS_Individual, pencemaran_nama_baik)

In [69]:
new_data = {
    'Text': netifier_train['processed_text'],
    'SARA': netifier_train['sara'],
    'Pornography': netifier_train['pornografi'],
    'Radicalism': netifier_train['radikalisme'],
    'Defamation': netifier_train['pencemaran_nama_baik'],
    'Hate Speech': 0,
    'Abusive Speech': 0
}

new_df = pd.DataFrame(new_data)
toxic_df = pd.concat([toxic_df, new_df], ignore_index=True)

toxic_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6995 entries, 0 to 6994
Data columns (total 7 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   Text            6995 non-null   object
 1   Hate Speech     6995 non-null   object
 2   Abusive Speech  6995 non-null   object
 3   SARA            6995 non-null   object
 4   Pornography     6995 non-null   object
 5   Radicalism      6995 non-null   object
 6   Defamation      6995 non-null   object
dtypes: object(7)
memory usage: 382.7+ KB


In [70]:
new_data = {
    'Text': netifier_test['processed_text'],
    'SARA': netifier_test['sara'],
    'Pornography': netifier_test['pornografi'],
    'Radicalism': netifier_test['radikalisme'],
    'Defamation': netifier_test['pencemaran_nama_baik'],
    'Hate Speech': 0,
    'Abusive Speech': 0
}

new_df = pd.DataFrame(new_data)
toxic_df = pd.concat([toxic_df, new_df], ignore_index=True)

toxic_df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7773 entries, 0 to 7772
Data columns (total 7 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   Text            7773 non-null   object
 1   Hate Speech     7773 non-null   object
 2   Abusive Speech  7773 non-null   object
 3   SARA            7773 non-null   object
 4   Pornography     7773 non-null   object
 5   Radicalism      7773 non-null   object
 6   Defamation      7773 non-null   object
dtypes: object(7)
memory usage: 425.2+ KB


## Dataset : okkyibrahim

In [71]:
okkyibrahim_path = "./data/okkyibrahim/preprocessed_indonesian_toxic_tweet.csv"

In [72]:
okkyibrahim = pd.read_csv(okkyibrahim_path)
okkyibrahim.head()

Unnamed: 0,Tweet,HS,Abusive,HS_Individual,HS_Group,HS_Religion,HS_Race,HS_Physical,HS_Gender,HS_Other,HS_Weak,HS_Moderate,HS_Strong
0,cowok usaha lacak perhati gue lantas remeh per...,1,1,1,0,0,0,0,0,1,1,0,0
1,telat tau edan sarap gue gaul cigax jifla cal ...,0,1,0,0,0,0,0,0,0,0,0,0
2,41 kadang pikir percaya tuhan jatuh kali kali ...,0,0,0,0,0,0,0,0,0,0,0,0
3,ku tau mata sipit lihat,0,0,0,0,0,0,0,0,0,0,0,0
4,kaum cebong kafir lihat dongok dungu haha,1,1,0,1,1,0,0,0,0,0,1,0


In [73]:
okkyibrahim.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 13169 entries, 0 to 13168
Data columns (total 13 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   Tweet          13121 non-null  object
 1   HS             13169 non-null  int64 
 2   Abusive        13169 non-null  int64 
 3   HS_Individual  13169 non-null  int64 
 4   HS_Group       13169 non-null  int64 
 5   HS_Religion    13169 non-null  int64 
 6   HS_Race        13169 non-null  int64 
 7   HS_Physical    13169 non-null  int64 
 8   HS_Gender      13169 non-null  int64 
 9   HS_Other       13169 non-null  int64 
 10  HS_Weak        13169 non-null  int64 
 11  HS_Moderate    13169 non-null  int64 
 12  HS_Strong      13169 non-null  int64 
dtypes: int64(12), object(1)
memory usage: 1.3+ MB


In [74]:
count_nan = okkyibrahim['Tweet'].isnull().sum()
print('Number of NaN values present: ' + str(count_nan))

Number of NaN values present: 48


**Preprocess:**
- Lower casing all text,
- Data cleaning by removing unnecessary characters such as re-tweet symbol (RT), username, URL, and punctuation
- Normalization using 'Alay' dictionary
- Stemming using PySastrawi
- Stop words removal using list from

Conclusion: *need to remove hexadecimal encoding, drop NaN*

In [75]:
okkyibrahim = okkyibrahim.dropna(subset=['Tweet'])
okkyibrahim.info()

<class 'pandas.core.frame.DataFrame'>
Index: 13121 entries, 0 to 13168
Data columns (total 13 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   Tweet          13121 non-null  object
 1   HS             13121 non-null  int64 
 2   Abusive        13121 non-null  int64 
 3   HS_Individual  13121 non-null  int64 
 4   HS_Group       13121 non-null  int64 
 5   HS_Religion    13121 non-null  int64 
 6   HS_Race        13121 non-null  int64 
 7   HS_Physical    13121 non-null  int64 
 8   HS_Gender      13121 non-null  int64 
 9   HS_Other       13121 non-null  int64 
 10  HS_Weak        13121 non-null  int64 
 11  HS_Moderate    13121 non-null  int64 
 12  HS_Strong      13121 non-null  int64 
dtypes: int64(12), object(1)
memory usage: 1.4+ MB


In [76]:
def clean_text(text):
    cleaned_text = re.sub(r'[^\x00-\x7F]+', '', text)
    cleaned_text = ' '.join(cleaned_text.split())
    return cleaned_text


okkyibrahim['Tweet'] = okkyibrahim['Tweet'].apply(clean_text)
okkyibrahim.iloc[6:10]

Unnamed: 0,Tweet,HS,Abusive,HS_Individual,HS_Group,HS_Religion,HS_Race,HS_Physical,HS_Gender,HS_Other,HS_Weak,HS_Moderate,HS_Strong
6,deklarasi pilih kepala daerah 2018 aman anti h...,0,0,0,0,0,0,0,0,0,0,0,0
7,gue selesai re watch aldnoah zero kampret 2 ka...,0,1,0,0,0,0,0,0,0,0,0,0
8,admin belanja po nak makan ais kepal milo ais ...,0,0,0,0,0,0,0,0,0,0,0,0
9,enak ngewe,0,1,0,0,0,0,0,0,0,0,0,0


In [77]:
okkyibrahim.info()

<class 'pandas.core.frame.DataFrame'>
Index: 13121 entries, 0 to 13168
Data columns (total 13 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   Tweet          13121 non-null  object
 1   HS             13121 non-null  int64 
 2   Abusive        13121 non-null  int64 
 3   HS_Individual  13121 non-null  int64 
 4   HS_Group       13121 non-null  int64 
 5   HS_Religion    13121 non-null  int64 
 6   HS_Race        13121 non-null  int64 
 7   HS_Physical    13121 non-null  int64 
 8   HS_Gender      13121 non-null  int64 
 9   HS_Other       13121 non-null  int64 
 10  HS_Weak        13121 non-null  int64 
 11  HS_Moderate    13121 non-null  int64 
 12  HS_Strong      13121 non-null  int64 
dtypes: int64(12), object(1)
memory usage: 1.4+ MB


In [78]:
new_data = {
    'Text': okkyibrahim['Tweet'],
    'SARA': okkyibrahim[['HS_Individual', 'HS_Group', 'HS_Religion', 'HS_Race', 'HS_Physical', 'HS_Gender', 'HS_Other']].max(axis=1),
    'Pornography': 0,
    'Radicalism': 0,
    'Defamation': okkyibrahim['HS_Individual'],
    'Hate Speech': okkyibrahim[['HS_Individual', 'HS_Group', 'HS_Religion', 'HS_Race', 'HS_Physical', 'HS_Gender', 'HS_Other']].max(axis=1),
    'Abusive Speech': okkyibrahim[['HS_Weak', 'HS_Moderate', 'HS_Strong']].max(axis=1)
}

new_df = pd.DataFrame(new_data)
toxic_df = pd.concat([toxic_df, new_df], ignore_index=True)

toxic_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20894 entries, 0 to 20893
Data columns (total 7 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   Text            20894 non-null  object
 1   Hate Speech     20894 non-null  object
 2   Abusive Speech  20894 non-null  object
 3   SARA            20894 non-null  object
 4   Pornography     20894 non-null  object
 5   Radicalism      20894 non-null  object
 6   Defamation      20894 non-null  object
dtypes: object(7)
memory usage: 1.1+ MB


In [80]:
toxic_df.head()

Unnamed: 0,Text,Hate Speech,Abusive Speech,SARA,Pornography,Radicalism,Defamation
0,jabar memang provinsi barokah boleh juga dan n...,0,0,0,0,0,1
1,kita saja nitizen yang pada penasaran toh kelu...,0,0,0,0,0,0
2,sidangahok semoga sipenista agama dan ateknya ...,0,0,1,0,1,1
3,jakarta barusan baca undang ini tetap dibedaka...,0,0,0,0,0,0
4,buat anak melulu kamu nof nkaga mikir apa kasi...,0,0,0,0,0,0


In [81]:
category_columns = ['Hate Speech', 'Abusive Speech', 'SARA', 'Pornography', 'Radicalism', 'Defamation']
toxic_df[category_columns] = toxic_df[category_columns].astype(int)

toxic_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20894 entries, 0 to 20893
Data columns (total 7 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   Text            20894 non-null  object
 1   Hate Speech     20894 non-null  int64 
 2   Abusive Speech  20894 non-null  int64 
 3   SARA            20894 non-null  int64 
 4   Pornography     20894 non-null  int64 
 5   Radicalism      20894 non-null  int64 
 6   Defamation      20894 non-null  int64 
dtypes: int64(6), object(1)
memory usage: 1.1+ MB


In [84]:
toxic_df[category_columns].apply(lambda col: (col == 1).sum())

Hate Speech       5552
Abusive Speech    5552
SARA              6790
Pornography       1747
Radicalism        1276
Defamation        5992
dtype: int64

## IndoBERT

In [89]:
train_df_list = []
test_df_list = []

# Split the data for each category
for category in category_columns:
    # Select rows where the category is 1 (positive samples)
    positive_samples = toxic_df[toxic_df[category] == 1]
    negative_samples = toxic_df[toxic_df[category] == 0]
    
    # Split the positive samples into train and test sets
    train_pos, test_pos = train_test_split(positive_samples, test_size=0.2, random_state=42)
    
    # Split the negative samples into train and test sets
    train_neg, test_neg = train_test_split(negative_samples, test_size=0.2, random_state=42)
    
    # Combine the positive and negative samples for this category
    train_df_list.append(train_pos)
    train_df_list.append(train_neg)
    test_df_list.append(test_pos)
    test_df_list.append(test_neg)

# Concatenate all the individual splits to form the final train and test DataFrames
train_df = pd.concat(train_df_list).drop_duplicates().reset_index(drop=True)
test_df = pd.concat(test_df_list).drop_duplicates().reset_index(drop=True)

# Check the result
print(f"Training set size: {len(train_df)}")
print(f"Testing set size: {len(test_df)}")

Training set size: 19828
Testing set size: 13536


In [90]:
tokenizer = BertTokenizer.from_pretrained('indolem/indobert-base-uncased')

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


In [91]:
def tokenize_text(df, max_len=128):
    return tokenizer(
        df['Text'].tolist(), 
        padding=True, 
        truncation=True, 
        max_length=max_len, 
        return_tensors="pt"
    )

In [92]:
train_encodings = tokenize_text(train_df)
test_encodings = tokenize_text(test_df)

In [93]:
train_labels = train_df[category_columns].values
test_labels = test_df[category_columns].values

In [94]:
model = BertForSequenceClassification.from_pretrained('indolem/indobert-base-uncased', num_labels=len(category_columns))

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indolem/indobert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [95]:
train_dataset = TensorDataset(train_encodings['input_ids'], train_encodings['attention_mask'], torch.tensor(train_labels, dtype=torch.float32))
test_dataset = TensorDataset(test_encodings['input_ids'], test_encodings['attention_mask'], torch.tensor(test_labels, dtype=torch.float32))

In [96]:
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=16)

In [97]:
optimizer = AdamW(model.parameters(), lr=1e-5)
epochs = 3
device = torch.device('cpu')
model.to(device)



BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(31923, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e

In [98]:
for epoch in range(epochs):
    model.train()
    total_loss = 0
    for batch in train_loader:
        input_ids, attention_mask, labels = [b.to(device) for b in batch]

        optimizer.zero_grad()

        # Forward pass
        outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss
        total_loss += loss.item()

        # Backward pass and optimization
        loss.backward()
        optimizer.step()

    print(f"Epoch {epoch + 1}/{epochs}, Loss: {total_loss/len(train_loader)}")

Epoch 1/3, Loss: 0.3425656010787333
Epoch 2/3, Loss: 0.2378291197782082
Epoch 3/3, Loss: 0.19856978204521922


In [99]:
model.eval()

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(31923, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e

In [100]:
predictions, true_labels = [], []

with torch.no_grad():
    for batch in test_loader:
        input_ids, attention_mask, labels = [b.to(device) for b in batch]
        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        logits = outputs.logits
        predictions.append(logits.sigmoid().cpu().numpy())
        true_labels.append(labels.cpu().numpy())

In [101]:
predictions = np.concatenate(predictions, axis=0)
true_labels = np.concatenate(true_labels, axis=0)

In [103]:
binary_predictions = (predictions >= 0.5).astype(int)

for i, col in enumerate(category_columns):
    print(f"Classification report for {col}:")
    print(classification_report(true_labels[:, i], binary_predictions[:, i]))

Classification report for Hate Speech:
              precision    recall  f1-score   support

         0.0       0.98      0.96      0.97      9856
         1.0       0.89      0.96      0.92      3680

    accuracy                           0.96     13536
   macro avg       0.94      0.96      0.95     13536
weighted avg       0.96      0.96      0.96     13536

Classification report for Abusive Speech:
              precision    recall  f1-score   support

         0.0       0.98      0.96      0.97      9856
         1.0       0.89      0.95      0.92      3680

    accuracy                           0.96     13536
   macro avg       0.94      0.96      0.95     13536
weighted avg       0.96      0.96      0.96     13536

Classification report for SARA:
              precision    recall  f1-score   support

         0.0       0.96      0.93      0.94      9055
         1.0       0.87      0.92      0.89      4481

    accuracy                           0.93     13536
   macro avg   

In [109]:
def predict_categories(sentence, model, tokenizer, threshold=0.5):
    inputs = tokenizer(sentence, padding=True, truncation=True, max_length=128, return_tensors="pt")

    inputs = {key: val.to(device) for key, val in inputs.items()}

    model.eval()

    with torch.no_grad():
        outputs = model(**inputs)
    
    logits = outputs.logits

    probabilities = torch.sigmoid(logits).cpu().numpy()

    predictions = (probabilities >= threshold).astype(int)

    return probabilities[0], predictions[0]

In [122]:
sentence = "jokowi goblok korupsi mulu ga sholat"
probabilities, predictions = predict_categories(sentence, model, tokenizer)

for i, category in enumerate(category_columns):
    print(f"{category}: Probability = {probabilities[i]:.4f}, Prediction = {'Yes' if predictions[i] == 1 else 'No'}")

Hate Speech: Probability = 0.9837, Prediction = Yes
Abusive Speech: Probability = 0.9844, Prediction = Yes
SARA: Probability = 0.9852, Prediction = Yes
Pornography: Probability = 0.0135, Prediction = No
Radicalism: Probability = 0.0163, Prediction = No
Defamation: Probability = 0.9256, Prediction = Yes


In [118]:
output_dir = "./model-export"
model.save_pretrained(output_dir)
tokenizer.save_pretrained(output_dir)

('./model-export\\tokenizer_config.json',
 './model-export\\special_tokens_map.json',
 './model-export\\vocab.txt',
 './model-export\\added_tokens.json')