In [1]:
import torch
import numpy as np
from transformers import BertTokenizer
import pandas as pd

In [2]:
df=pd.read_csv('df_clean1.csv')

In [3]:
df.head()

Unnamed: 0.1,Unnamed: 0,full_text,label,clean_text
0,0,Dukung Ganjar Pranowo Sebagai Capres RI 2024 #...,1,dukung capres ri
1,1,@ch_chotimah2 @ganjarpranowo Ganjar capres ras...,0,capres ras mirip nazi anti israel
2,2,Ganjar Pranowo pemimpin yang kerja nyata untuk...,1,pimpin kerja nyata penting rakyat
3,3,Ganjar Pranowo pemimpin yang memiliki keberani...,1,pimpin milik berani nyali
4,4,Ganjar Pranowo pemimpin yang visioner dan cerd...,1,pimpin visioner cerdas


In [4]:
df.shape

(4057, 4)

In [5]:
df['label'].value_counts()

1    3074
0     983
Name: label, dtype: int64

In [6]:
tokenizer = BertTokenizer.from_pretrained('indobenchmark/indobert-base-p2')

In [7]:
class Dataset(torch.utils.data.Dataset):

    def __init__(self, df):

        self.labels = [label for label in df['label']]
        self.texts = [tokenizer(text,
                               padding='max_length', max_length = 512, truncation=True,
                                return_tensors="pt") for text in df['clean_text']]

    def classes(self):
        return self.labels

    def __len__(self):
        return len(self.labels)

    def get_batch_labels(self, idx):
        # Fetch a batch of labels
        return np.array(self.labels[idx])

    def get_batch_texts(self, idx):
        # Fetch a batch of inputs
        return self.texts[idx]

    def __getitem__(self, idx):

        batch_texts = self.get_batch_texts(idx)
        batch_y = self.get_batch_labels(idx)

        return batch_texts, batch_y

In [8]:
np.random.seed(112)
from sklearn.model_selection import train_test_split
df = df.dropna()
df_train, df_temp = train_test_split(df, test_size=0.2, random_state=42, stratify=df['label'])
df_val, df_test = train_test_split(df_temp, test_size=0.5, random_state=42, stratify=df_temp['label'])

print(len(df_train),len(df_val), len(df_test))

3244 406 406


In [9]:
from torch import nn
from transformers import BertModel

class BertClassifier(nn.Module):

    def __init__(self, dropout=0.5):

        super(BertClassifier, self).__init__()

        self.bert = BertModel.from_pretrained('indobenchmark/indobert-base-p2')
        self.dropout = nn.Dropout(dropout)
        self.linear = nn.Linear(768,2)
        self.relu = nn.Sigmoid()

    def forward(self, input_id, mask):

        _, pooled_output = self.bert(input_ids= input_id, attention_mask=mask,return_dict=False)
        dropout_output = self.dropout(pooled_output)
        linear_output = self.linear(dropout_output)
        final_layer = self.relu(linear_output)

        return final_layer

In [10]:
from transformers import EarlyStoppingCallback, IntervalStrategy

In [11]:
!pip install accelerate -U



In [12]:
from transformers import TrainingArguments
args = TrainingArguments(
    f"training_with_callbacks",
    evaluation_strategy = IntervalStrategy.STEPS, # "steps"
    eval_steps = 2, # Evaluation and Save happens every 2 steps
    save_total_limit = 5, # Only last 5 models are saved. Older ones are deleted.
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    weight_decay=0.01,
    push_to_hub=False,
    metric_for_best_model = 'f1',
    load_best_model_at_end=True)

In [13]:
from torch.optim import Adam
from tqdm import tqdm

from sklearn.metrics import precision_score, recall_score, f1_score

def train(model, train_data, val_data, learning_rate, epochs, args,
          callbacks=[EarlyStoppingCallback(early_stopping_patience=3)]):
    train, val = Dataset(train_data), Dataset(val_data)
    train_dataloader = torch.utils.data.DataLoader(train, batch_size=2, shuffle=True)
    val_dataloader = torch.utils.data.DataLoader(val, batch_size=2)

    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")

    criterion = nn.CrossEntropyLoss()
    optimizer = Adam(model.parameters(), lr=learning_rate)

    if use_cuda:
        model = model.cuda()
        criterion = criterion.cuda()

    for epoch_num in range(epochs):
        total_acc_train = 0
        total_loss_train = 0
        all_train_labels = []
        all_train_preds = []

        for train_input, train_label in tqdm(train_dataloader):
            train_label = train_label.to(device)
            mask = train_input['attention_mask'].to(device)
            input_id = train_input['input_ids'].squeeze(1).to(device)

            output = model(input_id, mask)

            batch_loss = criterion(output, train_label.long())
            total_loss_train += batch_loss.item()

            acc = (output.argmax(dim=1) == train_label).sum().item()
            total_acc_train += acc

            # Collect predictions and labels for later metric calculation
            all_train_labels.extend(train_label.cpu().numpy())
            all_train_preds.extend(output.argmax(dim=1).cpu().numpy())

            model.zero_grad()
            batch_loss.backward()
            optimizer.step()

        # Calculate precision, recall, and F1 score for training set
        train_precision = precision_score(all_train_labels, all_train_preds, average='weighted')
        train_recall = recall_score(all_train_labels, all_train_preds, average='weighted')
        train_f1 = f1_score(all_train_labels, all_train_preds, average='weighted')

        total_acc_val = 0
        total_loss_val = 0
        all_val_labels = []
        all_val_preds = []

        with torch.no_grad():
            for val_input, val_label in val_dataloader:
                val_label = val_label.to(device)
                mask = val_input['attention_mask'].to(device)
                input_id = val_input['input_ids'].squeeze(1).to(device)

                output = model(input_id, mask)

                batch_loss = criterion(output, val_label.long())
                total_loss_val += batch_loss.item()

                acc = (output.argmax(dim=1) == val_label).sum().item()
                total_acc_val += acc

                # Collect predictions and labels for later metric calculation
                all_val_labels.extend(val_label.cpu().numpy())
                all_val_preds.extend(output.argmax(dim=1).cpu().numpy())

        # Calculate precision, recall, and F1 score for validation set
        val_precision = precision_score(all_val_labels, all_val_preds, average='weighted')
        val_recall = recall_score(all_val_labels, all_val_preds, average='weighted')
        val_f1 = f1_score(all_val_labels, all_val_preds, average='weighted')

        print(
            f'Epochs: {epoch_num + 1} | Train Loss: {total_loss_train / len(train_data): .3f} '
            f'| Train Accuracy: {total_acc_train / len(train_data): .3f} '
            f'| Train Precision: {train_precision: .3f} | Train Recall: {train_recall: .3f} | Train F1: {train_f1: .3f} '
            f'| Val Loss: {total_loss_val / len(val_data): .3f} '
            f'| Val Accuracy: {total_acc_val / len(val_data): .3f} '
            f'| Val Precision: {val_precision: .3f} | Val Recall: {val_recall: .3f} | Val F1: {val_f1: .3f}')



In [14]:
EPOCHS = 5
model = BertClassifier()
LR = 1e-6

train(model, df_train, df_val, LR, EPOCHS, args, callbacks = [EarlyStoppingCallback(early_stopping_patience=3)])

pytorch_model.bin:   0%|          | 0.00/498M [00:00<?, ?B/s]

100%|██████████| 1622/1622 [05:41<00:00,  4.75it/s]


Epochs: 1 | Train Loss:  0.269 | Train Accuracy:  0.769 | Train Precision:  0.751 | Train Recall:  0.769 | Train F1:  0.693 | Val Loss:  0.239 | Val Accuracy:  0.828 | Val Precision:  0.819 | Val Recall:  0.828 | Val F1:  0.821


100%|██████████| 1622/1622 [05:44<00:00,  4.71it/s]


Epochs: 2 | Train Loss:  0.225 | Train Accuracy:  0.868 | Train Precision:  0.865 | Train Recall:  0.868 | Train F1:  0.866 | Val Loss:  0.228 | Val Accuracy:  0.845 | Val Precision:  0.845 | Val Recall:  0.845 | Val F1:  0.845


100%|██████████| 1622/1622 [05:44<00:00,  4.71it/s]


Epochs: 3 | Train Loss:  0.207 | Train Accuracy:  0.900 | Train Precision:  0.898 | Train Recall:  0.900 | Train F1:  0.899 | Val Loss:  0.226 | Val Accuracy:  0.855 | Val Precision:  0.848 | Val Recall:  0.855 | Val F1:  0.848


100%|██████████| 1622/1622 [05:43<00:00,  4.72it/s]


Epochs: 4 | Train Loss:  0.195 | Train Accuracy:  0.924 | Train Precision:  0.923 | Train Recall:  0.924 | Train F1:  0.923 | Val Loss:  0.226 | Val Accuracy:  0.847 | Val Precision:  0.844 | Val Recall:  0.847 | Val F1:  0.845


100%|██████████| 1622/1622 [05:43<00:00,  4.72it/s]


Epochs: 5 | Train Loss:  0.187 | Train Accuracy:  0.941 | Train Precision:  0.940 | Train Recall:  0.941 | Train F1:  0.940 | Val Loss:  0.223 | Val Accuracy:  0.867 | Val Precision:  0.865 | Val Recall:  0.867 | Val F1:  0.866


In [15]:
from sklearn.metrics import precision_score, recall_score, f1_score

def evaluate(model, test_data):
    test = Dataset(test_data)
    test_dataloader = torch.utils.data.DataLoader(test, batch_size=2)

    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")

    if use_cuda:
        model = model.cuda()

    total_acc_test = 0
    all_test_labels = []
    all_test_preds = []

    with torch.no_grad():
        for test_input, test_label in test_dataloader:
            test_label = test_label.to(device)
            mask = test_input['attention_mask'].to(device)
            input_id = test_input['input_ids'].squeeze(1).to(device)

            output = model(input_id, mask)

            acc = (output.argmax(dim=1) == test_label).sum().item()
            total_acc_test += acc

            # Collect predictions and labels for later metric calculation
            all_test_labels.extend(test_label.cpu().numpy())
            all_test_preds.extend(output.argmax(dim=1).cpu().numpy())

    # Calculate precision, recall, and F1 score
    precision = precision_score(all_test_labels, all_test_preds, average='weighted')
    recall = recall_score(all_test_labels, all_test_preds, average='weighted')
    f1 = f1_score(all_test_labels, all_test_preds, average='weighted')

    print(f'Test Accuracy: {total_acc_test / len(test_data): .3f}\n')
    print(f'Test Precision: {precision: .3f} | Test Recall: {recall: .3f} | Test F1: {f1: .3f}')

evaluate(model, df_test.dropna())

Test Accuracy:  0.847

Test Precision:  0.843 | Test Recall:  0.847 | Test F1:  0.844
