# IndoBERTweet Toxic Detection Model

## Import Library

In [19]:
import numpy as np
import pandas as pd
import pandas as pd
import torch
import numpy as np
import matplotlib.pyplot as plt
import re
from sklearn.metrics import classification_report, hamming_loss, jaccard_score
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import MultiLabelBinarizer
from imblearn.over_sampling import SMOTE
from skmultilearn.model_selection import iterative_train_test_split
from transformers import BertTokenizer, BertForSequenceClassification, AdamW, get_linear_schedule_with_warmup
from torch.utils.data import DataLoader, TensorDataset
from tabulate import tabulate

## Target Dataset Definition

Hate Speech, Abusive Speech, SARA, Radicalism, Defamation

In [20]:
toxic_df = pd.DataFrame(columns=["Text", "Hate Speech", "Abusive Speech", "SARA", "Radicalism", "Defamation"])
toxic_df

Unnamed: 0,Text,Hate Speech,Abusive Speech,SARA,Radicalism,Defamation


## Source Dataset Processing

### Dataset : Netifier

#### Init

In [21]:
netifier_path = "./data/netifier"

In [22]:
netifier_train_path = netifier_path + "/processed_train.csv"
netifier_test_path = netifier_path + "/processed_test.csv"

In [23]:
netifier_train = pd.read_csv(netifier_train_path)
netifier_test = pd.read_csv(netifier_test_path)

#### Assessment

In [24]:
netifier_train.head()

Unnamed: 0,original_text,source,pornografi,sara,radikalisme,pencemaran_nama_baik,processed_text
0,[QUOTE=jessepinkman16;5a50ac34d89b093f368b456e...,kaskus,0,0,0,1,jabar memang provinsi barokah boleh juga dan n...
1,"@verosvante kita2 aja nitizen yang pada kepo,t...",instagram,0,0,0,0,kita saja nitizen yang pada penasaran toh kelu...
2,"""#SidangAhok smg sipenista agama n ateknya mat...",twitter,0,1,1,1,sidangahok semoga sipenista agama dan ateknya ...
3,@bolususulembang.jkt barusan baca undang2 ini....,instagram,0,0,0,0,jakarta barusan baca undang ini tetap dibedaka...
4,bikin anak mulu lu nof \nkaga mikir apa kasian...,kaskus,0,0,0,0,buat anak melulu kamu nof nkaga mikir apa kasi...


In [25]:
netifier_test.head()

Unnamed: 0,original_text,source,pornografi,sara,radikalisme,pencemaran_nama_baik,processed_text
0,"1.BUKAN CM SPANDUK PROF,VIDEO2 ORASI MEREKA, B...",twitter,0,0,1,0,bukan hanya spanduk prof video orasi mereka bu...
1,@memeqbeceq gy sange'gatel yh tetek'memekY drn...,twitter,1,0,0,0,gy sange gatel yh tetek memeky drnjng tempat t...
2,Pertama kali denger lagunya enk bgt in dan png...,instagram,0,0,0,0,pertama kali denger lagunya enk sekali in dan ...
3,"astajim, ini pasti yg kasih penghargaan ke ibu...",kaskus,0,0,0,0,astajim ini pasti yang kasih penghargan ke ibu...
4,beda kalo disini kalo komplain lgs di bully am...,kaskus,0,0,0,0,beda kalau di sini kalau keluhan langsung di b...


In [26]:
netifier_train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6995 entries, 0 to 6994
Data columns (total 7 columns):
 #   Column                Non-Null Count  Dtype 
---  ------                --------------  ----- 
 0   original_text         6995 non-null   object
 1   source                6995 non-null   object
 2   pornografi            6995 non-null   int64 
 3   sara                  6995 non-null   int64 
 4   radikalisme           6995 non-null   int64 
 5   pencemaran_nama_baik  6995 non-null   int64 
 6   processed_text        6995 non-null   object
dtypes: int64(4), object(3)
memory usage: 382.7+ KB


In [27]:
netifier_test.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 778 entries, 0 to 777
Data columns (total 7 columns):
 #   Column                Non-Null Count  Dtype 
---  ------                --------------  ----- 
 0   original_text         778 non-null    object
 1   source                778 non-null    object
 2   pornografi            778 non-null    int64 
 3   sara                  778 non-null    int64 
 4   radikalisme           778 non-null    int64 
 5   pencemaran_nama_baik  778 non-null    int64 
 6   processed_text        778 non-null    object
dtypes: int64(4), object(3)
memory usage: 42.7+ KB


#### Preprocessing

**Preprocessed:**
- Translate text-based emojis
- Remove excessive newline
- Remove kaskus formatting
- Remove url
- Remove excessive whitespace
- Tokenize text
- Transform slang words
- Remove non aplhabet
- Remove twitter & instagram formatting
- Remove Repeating Characters

Conclusion: *clean enough, recommend to use stemming and stopword*

Hate Speech (subcategories: HS_Individual, HS_Group, HS_Religion, HS_Race, HS_Physical, HS_Gender, HS_Other)
Abusive Speech (subcategories: HS_Weak, HS_Moderate, HS_Strong)
SARA (covering sara, HS_Religion, HS_Race, HS_Group)
Pornography (for pornografi)
Radicalism (for radikalisme)
Defamation (for HS_Individual, pencemaran_nama_baik)

In [28]:
new_data = {
    'Text': netifier_train['processed_text'],
    'SARA': netifier_train['sara'],
    'Radicalism': netifier_train['radikalisme'],
    'Defamation': netifier_train['pencemaran_nama_baik'],
    'Hate Speech': netifier_train[['sara', 'radikalisme', 'pencemaran_nama_baik']].max(axis=1),
    'Abusive Speech': netifier_train['pornografi']
}

new_df = pd.DataFrame(new_data)
toxic_df = pd.concat([toxic_df, new_df], ignore_index=True)

toxic_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6995 entries, 0 to 6994
Data columns (total 6 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   Text            6995 non-null   object
 1   Hate Speech     6995 non-null   object
 2   Abusive Speech  6995 non-null   object
 3   SARA            6995 non-null   object
 4   Radicalism      6995 non-null   object
 5   Defamation      6995 non-null   object
dtypes: object(6)
memory usage: 328.0+ KB


In [29]:
new_data = {
    'Text': netifier_test['processed_text'],
    'SARA': netifier_test['sara'],
    'Radicalism': netifier_test['radikalisme'],
    'Defamation': netifier_test['pencemaran_nama_baik'],
    'Hate Speech': netifier_test[['sara', 'radikalisme', 'pencemaran_nama_baik']].max(axis=1),
    'Abusive Speech': netifier_test['pornografi']
}

new_df = pd.DataFrame(new_data)
toxic_df = pd.concat([toxic_df, new_df], ignore_index=True)

toxic_df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7773 entries, 0 to 7772
Data columns (total 6 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   Text            7773 non-null   object
 1   Hate Speech     7773 non-null   object
 2   Abusive Speech  7773 non-null   object
 3   SARA            7773 non-null   object
 4   Radicalism      7773 non-null   object
 5   Defamation      7773 non-null   object
dtypes: object(6)
memory usage: 364.5+ KB


### Dataset : okkyibrahim

#### Init

In [30]:
okkyibrahim_path = "./data/okkyibrahim/preprocessed_indonesian_toxic_tweet.csv"

#### Assessment

In [31]:
okkyibrahim = pd.read_csv(okkyibrahim_path)
okkyibrahim.head()

Unnamed: 0,Tweet,HS,Abusive,HS_Individual,HS_Group,HS_Religion,HS_Race,HS_Physical,HS_Gender,HS_Other,HS_Weak,HS_Moderate,HS_Strong
0,cowok usaha lacak perhati gue lantas remeh per...,1,1,1,0,0,0,0,0,1,1,0,0
1,telat tau edan sarap gue gaul cigax jifla cal ...,0,1,0,0,0,0,0,0,0,0,0,0
2,41 kadang pikir percaya tuhan jatuh kali kali ...,0,0,0,0,0,0,0,0,0,0,0,0
3,ku tau mata sipit lihat,0,0,0,0,0,0,0,0,0,0,0,0
4,kaum cebong kafir lihat dongok dungu haha,1,1,0,1,1,0,0,0,0,0,1,0


In [32]:
okkyibrahim.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 13169 entries, 0 to 13168
Data columns (total 13 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   Tweet          13121 non-null  object
 1   HS             13169 non-null  int64 
 2   Abusive        13169 non-null  int64 
 3   HS_Individual  13169 non-null  int64 
 4   HS_Group       13169 non-null  int64 
 5   HS_Religion    13169 non-null  int64 
 6   HS_Race        13169 non-null  int64 
 7   HS_Physical    13169 non-null  int64 
 8   HS_Gender      13169 non-null  int64 
 9   HS_Other       13169 non-null  int64 
 10  HS_Weak        13169 non-null  int64 
 11  HS_Moderate    13169 non-null  int64 
 12  HS_Strong      13169 non-null  int64 
dtypes: int64(12), object(1)
memory usage: 1.3+ MB


In [33]:
count_nan = okkyibrahim['Tweet'].isnull().sum()
print('Number of NaN values present: ' + str(count_nan))

Number of NaN values present: 48


#### Preprocessing

**Preprocessed:**
- Lower casing all text,
- Data cleaning by removing unnecessary characters such as re-tweet symbol (RT), username, URL, and punctuation
- Normalization using 'Alay' dictionary
- Stemming using PySastrawi
- Stop words removal using list

Conclusion: *need to remove hexadecimal encoding, drop NaN*

In [34]:
okkyibrahim = okkyibrahim.dropna(subset=['Tweet'])
okkyibrahim.info()

<class 'pandas.core.frame.DataFrame'>
Index: 13121 entries, 0 to 13168
Data columns (total 13 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   Tweet          13121 non-null  object
 1   HS             13121 non-null  int64 
 2   Abusive        13121 non-null  int64 
 3   HS_Individual  13121 non-null  int64 
 4   HS_Group       13121 non-null  int64 
 5   HS_Religion    13121 non-null  int64 
 6   HS_Race        13121 non-null  int64 
 7   HS_Physical    13121 non-null  int64 
 8   HS_Gender      13121 non-null  int64 
 9   HS_Other       13121 non-null  int64 
 10  HS_Weak        13121 non-null  int64 
 11  HS_Moderate    13121 non-null  int64 
 12  HS_Strong      13121 non-null  int64 
dtypes: int64(12), object(1)
memory usage: 1.4+ MB


In [35]:
def clean_text(text):
    cleaned_text = re.sub(r'[^\x00-\x7F]+', '', text)
    cleaned_text = ' '.join(cleaned_text.split())
    return cleaned_text


okkyibrahim['Tweet'] = okkyibrahim['Tweet'].apply(clean_text)
okkyibrahim.iloc[6:10]

Unnamed: 0,Tweet,HS,Abusive,HS_Individual,HS_Group,HS_Religion,HS_Race,HS_Physical,HS_Gender,HS_Other,HS_Weak,HS_Moderate,HS_Strong
6,deklarasi pilih kepala daerah 2018 aman anti h...,0,0,0,0,0,0,0,0,0,0,0,0
7,gue selesai re watch aldnoah zero kampret 2 ka...,0,1,0,0,0,0,0,0,0,0,0,0
8,admin belanja po nak makan ais kepal milo ais ...,0,0,0,0,0,0,0,0,0,0,0,0
9,enak ngewe,0,1,0,0,0,0,0,0,0,0,0,0


In [36]:
okkyibrahim.info()

<class 'pandas.core.frame.DataFrame'>
Index: 13121 entries, 0 to 13168
Data columns (total 13 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   Tweet          13121 non-null  object
 1   HS             13121 non-null  int64 
 2   Abusive        13121 non-null  int64 
 3   HS_Individual  13121 non-null  int64 
 4   HS_Group       13121 non-null  int64 
 5   HS_Religion    13121 non-null  int64 
 6   HS_Race        13121 non-null  int64 
 7   HS_Physical    13121 non-null  int64 
 8   HS_Gender      13121 non-null  int64 
 9   HS_Other       13121 non-null  int64 
 10  HS_Weak        13121 non-null  int64 
 11  HS_Moderate    13121 non-null  int64 
 12  HS_Strong      13121 non-null  int64 
dtypes: int64(12), object(1)
memory usage: 1.4+ MB


## Target Dataset Result

In [37]:
new_data = {
    'Text': okkyibrahim['Tweet'],
    'SARA': okkyibrahim[['HS_Religion', 'HS_Race']].max(axis=1),
    'Radicalism': okkyibrahim['HS_Group'],
    'Defamation': okkyibrahim['HS_Individual'],
    'Hate Speech': okkyibrahim['HS'],
    'Abusive Speech': okkyibrahim[['Abusive', 'HS_Gender', 'HS_Physical', 'HS_Other']].max(axis=1)
}

new_df = pd.DataFrame(new_data)
toxic_df = pd.concat([toxic_df, new_df], ignore_index=True)

toxic_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20894 entries, 0 to 20893
Data columns (total 6 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   Text            20894 non-null  object
 1   Hate Speech     20894 non-null  object
 2   Abusive Speech  20894 non-null  object
 3   SARA            20894 non-null  object
 4   Radicalism      20894 non-null  object
 5   Defamation      20894 non-null  object
dtypes: object(6)
memory usage: 979.5+ KB


In [38]:
import sys
import os

# Add parent directory to sys.path
project_root = os.path.abspath(os.path.join(os.getcwd(), "../"))
sys.path.append(project_root)

from text_preprocess.text_preprocessing import preprocess_text

toxic_df['Text'] = toxic_df['Text'].apply(preprocess_text)

In [39]:
toxic_df.head()

Unnamed: 0,Text,Hate Speech,Abusive Speech,SARA,Radicalism,Defamation
0,jabar provinsi barokah nwoi anjing bodoh nprop...,1,0,0,0,1
1,nitizen penasaran keluarga situ urus nya diuru...,0,0,0,0,0
2,sidangahok moga sipenista agama ateknya mati w...,1,0,1,1,1
3,jakarta barusan baca undang beda presiden mant...,0,0,0,0,0
4,anak melulu nof nkaga mikir kasi anak malu lak...,0,0,0,0,0


In [40]:
category_columns = ['Hate Speech', 'Abusive Speech', 'SARA', 'Radicalism', 'Defamation']
toxic_df[category_columns] = toxic_df[category_columns].astype(int)

toxic_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20894 entries, 0 to 20893
Data columns (total 6 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   Text            20894 non-null  object
 1   Hate Speech     20894 non-null  int64 
 2   Abusive Speech  20894 non-null  int64 
 3   SARA            20894 non-null  int64 
 4   Radicalism      20894 non-null  int64 
 5   Defamation      20894 non-null  int64 
dtypes: int64(5), object(1)
memory usage: 979.5+ KB


In [41]:
toxic_df[category_columns].apply(lambda col: (col == 1).sum())

Hate Speech       8701
Abusive Speech    8336
SARA              2502
Radicalism        3261
Defamation        5992
dtype: int64

## SMOTE with TF-IDF Augmentation

In [67]:
toxic_df['labels'] = toxic_df[category_columns].apply(lambda row: [cat for cat in category_columns if row[cat] == 1], axis=1)

In [68]:
# Vectorize text using TF-IDF with limited features
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(toxic_df['Text'])
feature_names = np.array(vectorizer.get_feature_names_out())

In [73]:
# Binarize multilabels
mlb = MultiLabelBinarizer()
Y = mlb.fit_transform(toxic_df['labels'])

X_dense = X.toarray()

In [70]:
# Augmentation loop for each label
synthetic_records = []
target_count = Y.sum(axis=0).max()

In [None]:
for i, label in enumerate(mlb.classes_):
    y_bin = Y[:, i]
    current_count = np.sum(y_bin)
    
    if current_count < 2:
        print(f"Skipping label {label} due to too few samples.")
        continue
    
    pos_indices = np.where(y_bin == 1)[0]
    neg_indices = np.where(y_bin == 0)[0]
    limit = min(5000, len(pos_indices))
    selected_pos = np.random.choice(pos_indices, limit, replace=False)
    selected_neg = np.random.choice(neg_indices, limit, replace=False)

    X_sub = X[selected_pos.tolist() + selected_neg.tolist()]
    y_sub = y_bin[selected_pos.tolist() + selected_neg.tolist()]

    smote = SMOTE(sampling_strategy={1: target_count})
    X_res, y_res = smote.fit_resample(X_sub.toarray(), y_sub)

    n_synthetic = len(X_res) - len(X_sub.toarray())
    X_synthetic = X_res[-n_synthetic:]

    for vec in X_synthetic:
        top_indices = vec.argsort()[-7:][::-1]
        top_words = feature_names[top_indices]
        sentence = " ".join(top_words)
        synthetic_records.append({'Text': sentence, label: 1})

ValueError: operands could not be broadcast together with shapes (8336,30811) (12558,30811) 

In [48]:
# Convert and clean synthetic data
synthetic_df = pd.DataFrame(synthetic_records).fillna(0)
for col in category_columns:
    if col not in synthetic_df.columns:
        synthetic_df[col] = 0
synthetic_df = synthetic_df[["Text"] + category_columns]
synthetic_df[category_columns] = synthetic_df[category_columns].astype(int)

In [51]:
synthetic_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 22742 entries, 0 to 22741
Data columns (total 6 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   Text            22742 non-null  object
 1   Hate Speech     22742 non-null  int64 
 2   Abusive Speech  22742 non-null  int64 
 3   SARA            22742 non-null  int64 
 4   Radicalism      22742 non-null  int64 
 5   Defamation      22742 non-null  int64 
dtypes: int64(5), object(1)
memory usage: 1.0+ MB


In [64]:
synthetic_df[category_columns].apply(lambda col: (col == 1).sum())

Hate Speech       3701
Abusive Speech    3701
SARA              6199
Radicalism        5440
Defamation        3701
dtype: int64

In [50]:
# Combine original and synthetic
final_df = pd.concat([toxic_df[["Text"] + category_columns], synthetic_df], ignore_index=True)

In [65]:
final_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 43636 entries, 0 to 43635
Data columns (total 6 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   Text            43636 non-null  object
 1   Hate Speech     43636 non-null  int64 
 2   Abusive Speech  43636 non-null  int64 
 3   SARA            43636 non-null  int64 
 4   Radicalism      43636 non-null  int64 
 5   Defamation      43636 non-null  int64 
dtypes: int64(5), object(1)
memory usage: 2.0+ MB


In [52]:
final_df[category_columns].apply(lambda col: (col == 1).sum())

Hate Speech       12402
Abusive Speech    12037
SARA               8701
Radicalism         8701
Defamation         9693
dtype: int64

## IndoBERTweet Model

### Split Data

In [24]:
train_df_list = []
test_df_list = []

In [25]:
# Prepare features and labels
X = toxic_df.drop(columns=category_columns)  # Features (input data)
y = toxic_df[category_columns]  # Labels (multilabel binary matrix)

# Convert to numpy arrays
X_values = X.values
y_values = y.values

In [26]:
# Use iterative_train_test_split to split the data
X_train, y_train, X_test, y_test = iterative_train_test_split(X_values, y_values, test_size=0.2)

In [27]:
# Convert the resulting numpy arrays back to DataFrames
X_train_df = pd.DataFrame(X_train, columns=X.columns)
X_test_df = pd.DataFrame(X_test, columns=X.columns)

y_train_df = pd.DataFrame(y_train, columns=y.columns)
y_test_df = pd.DataFrame(y_test, columns=y.columns)

# Combine the features and labels back into single dataframes
train_df = pd.concat([X_train_df, y_train_df], axis=1)
test_df = pd.concat([X_test_df, y_test_df], axis=1)

In [28]:
# Check the results
print(f"Training set size: {len(train_df)}")
print(f"Testing set size: {len(test_df)}")

# Optionally, print the class distribution
print("\nClass distribution in training set:")
print(y_train_df.sum())

print("\nClass distribution in testing set:")
print(y_test_df.sum())

Training set size: 16715
Testing set size: 4179

Class distribution in training set:
Hate Speech       4442
Abusive Speech    4442
SARA              5354
Pornography       1398
Radicalism        1021
Defamation        4794
dtype: int64

Class distribution in testing set:
Hate Speech       1110
Abusive Speech    1110
SARA              1436
Pornography        349
Radicalism         255
Defamation        1198
dtype: int64


### Tokenize

In [29]:
tokenizer = BertTokenizer.from_pretrained('indolem/indobertweet-base-uncased')

In [30]:
def tokenize_text(df, max_len=128):
    return tokenizer(
        df['Text'].tolist(), 
        padding=True, 
        truncation=True, 
        max_length=max_len, 
        return_tensors="pt"
    )

In [31]:
train_encodings = tokenize_text(train_df)
test_encodings = tokenize_text(test_df)

In [32]:
train_labels = train_df[category_columns].values
test_labels = test_df[category_columns].values

### Class Weight Init

In [33]:
n_samples = len(np.array(y_train_df))
n_classes = len(np.array(y_train_df)[0])

# Count each class frequency
class_count = [0] * n_classes
for classes in np.array(y_train_df):
    for index in range(n_classes):
        if classes[index] != 0:
            class_count[index] += 1

# Compute class weights using balanced method
class_weights = [n_samples / (n_classes * freq) if freq > 0 else 1 for freq in class_count]
class_labels = range(len(class_weights))
dict(zip(class_labels, class_weights))

{0: 0.6271574365901246,
 1: 0.6271574365901246,
 2: 0.5203274810110821,
 3: 1.9927277062470194,
 4: 2.728534116878877,
 5: 0.5811083298567654}

### Preparation

In [34]:
model = BertForSequenceClassification.from_pretrained('indolem/indobertweet-base-uncased', num_labels=len(category_columns))

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indolem/indobertweet-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [35]:
train_dataset = TensorDataset(train_encodings['input_ids'], train_encodings['attention_mask'], torch.tensor(train_labels, dtype=torch.float32))
test_dataset = TensorDataset(test_encodings['input_ids'], test_encodings['attention_mask'], torch.tensor(test_labels, dtype=torch.float32))

In [36]:
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=16)

In [37]:
# Convert class weights to a tensor
class_weights_tensor = torch.tensor(class_weights, dtype=torch.float).to(model.device)

# Define the loss function with class weights
loss_fn = torch.nn.BCEWithLogitsLoss(weight=class_weights_tensor)

# Set the loss function in the model
model.config.loss = loss_fn

In [38]:
optimizer = AdamW(model.parameters(), lr=1e-5)
epochs = 3
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=len(train_loader) * epochs)
device = torch.device('cpu')
model.to(device)



BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(31923, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e

### Training

In [39]:
for epoch in range(epochs):
    model.train()
    total_loss = 0
    for batch in train_loader:
        input_ids, attention_mask, labels = [b.to(device) for b in batch]

        # Forward pass
        outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss
        optimizer.zero_grad()

        # Backward pass
        loss.backward()

        # Optimize and weights update
        optimizer.step()
        scheduler.step()

        total_loss += loss.item()

    print(f"Epoch {epoch + 1}/{epochs}, Loss: {total_loss/len(train_loader)}")

Epoch 1/3, Loss: 0.33751451847228137
Epoch 2/3, Loss: 0.23913268325716686
Epoch 3/3, Loss: 0.20593995799858605


### Evaluation

In [40]:
model.eval()

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(31923, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e

In [41]:
predictions, true_labels = [], []

with torch.no_grad():
    for batch in test_loader:
        input_ids, attention_mask, labels = [b.to(device) for b in batch]
        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        logits = outputs.logits
        predictions.append(logits.sigmoid().cpu().numpy())
        true_labels.append(labels.cpu().numpy())

In [42]:
predictions = np.concatenate(predictions, axis=0)
true_labels = np.concatenate(true_labels, axis=0)

In [43]:
binary_predictions = (predictions >= 0.5).astype(int)

for i, col in enumerate(category_columns):
    print(f"Classification report for {col}:")
    print(classification_report(true_labels[:, i], binary_predictions[:, i]))

Classification report for Hate Speech:
              precision    recall  f1-score   support

         0.0       0.93      0.89      0.91      3069
         1.0       0.73      0.82      0.77      1110

    accuracy                           0.87      4179
   macro avg       0.83      0.85      0.84      4179
weighted avg       0.88      0.87      0.87      4179

Classification report for Abusive Speech:
              precision    recall  f1-score   support

         0.0       0.93      0.89      0.91      3069
         1.0       0.73      0.81      0.77      1110

    accuracy                           0.87      4179
   macro avg       0.83      0.85      0.84      4179
weighted avg       0.88      0.87      0.87      4179

Classification report for SARA:
              precision    recall  f1-score   support

         0.0       0.87      0.87      0.87      2743
         1.0       0.76      0.76      0.76      1436

    accuracy                           0.84      4179
   macro avg   

In [44]:
def predict_categories(sentence, model, tokenizer, threshold=0.5):
    inputs = tokenizer(sentence, padding=True, truncation=True, max_length=128, return_tensors="pt")

    inputs = {key: val.to(device) for key, val in inputs.items()}

    model.eval()

    with torch.no_grad():
        outputs = model(**inputs)
    
    logits = outputs.logits

    probabilities = torch.sigmoid(logits).cpu().numpy()

    predictions = (probabilities >= threshold).astype(int)

    return probabilities[0], predictions[0]

In [45]:
sentence = "jokowi goblok korupsi mulu ga sholat"
probabilities, predictions = predict_categories(sentence, model, tokenizer)

for i, category in enumerate(category_columns):
    print(f"{category}: Probability = {probabilities[i]:.4f}, Prediction = {'Yes' if predictions[i] == 1 else 'No'}")

Hate Speech: Probability = 0.9516, Prediction = Yes
Abusive Speech: Probability = 0.9625, Prediction = Yes
SARA: Probability = 0.9626, Prediction = Yes
Pornography: Probability = 0.0108, Prediction = No
Radicalism: Probability = 0.0115, Prediction = No
Defamation: Probability = 0.8836, Prediction = Yes


## Export Model

In [51]:
output_dir = "./model-export"
torch.save(model, f"{output_dir}/model.pth")
tokenizer.save_pretrained(output_dir)

('./model-export\\tokenizer_config.json',
 './model-export\\special_tokens_map.json',
 './model-export\\vocab.txt',
 './model-export\\added_tokens.json')