In [102]:
import torch

if torch.cuda.is_available():
    print("✅ GPU tersedia:", torch.cuda.get_device_name(0))
else:
    print("❌ GPU tidak tersedia. Menggunakan CPU.")


✅ GPU tersedia: NVIDIA GeForce RTX 3070 Laptop GPU


In [103]:
import torch
print(torch.version.cuda)


12.4


# Import Libraries :

In [104]:
import pandas as pd
import torch
from datasets import Dataset
from transformers import (
    AutoTokenizer, 
    DistilBertForSequenceClassification, 
    TrainingArguments, 
    Trainer
)
import numpy as np
import optuna
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from datasets import DatasetDict


# LOAD Dataset :

In [105]:
df = pd.read_csv('D:/Test DistilBERT+Optuna Lokal/combined_dataset.csv')
df = df[['encoded_label', 'clean_text']].dropna()

# Pastikan label integer
df['encoded_label'] = df['encoded_label'].astype(int)

# Convert ke HuggingFace Dataset
dataset = Dataset.from_pandas(df)

In [106]:
df.head()

len(set(df["encoded_label"]))

2

In [107]:
df['encoded_label'].unique()

array([1, 0])

In [108]:
df["encoded_label"] = df["encoded_label"].astype(int)

In [109]:
df['encoded_label'].unique()

array([1, 0])

# Split train-test

In [110]:
dataset = dataset.train_test_split(test_size=0.2, seed=42)
dataset = DatasetDict({
    'train': dataset['train'],
    'test': dataset['test']
})

# 2. Tokenizer

In [111]:
from transformers import DistilBertForSequenceClassification, AutoTokenizer
from datasets import Dataset, DatasetDict

# Model & Tokenizer
model_name = "cahya/distilbert-base-indonesian"
tokenizer = AutoTokenizer.from_pretrained(model_name)

tokenizer = AutoTokenizer.from_pretrained("cahya/distilbert-base-indonesian")

# Tokenisasi
def tokenize_fn(batch):
    tokens = tokenizer(batch['clean_text'], padding="max_length", truncation=True, max_length=128)
    tokens["labels"] = batch["encoded_label"]  # langsung int, jangan dibungkus list
    return tokens


dataset = dataset.map(tokenize_fn, batched=True)
dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])


Map: 100%|██████████| 1652/1652 [00:00<00:00, 18835.07 examples/s]
Map: 100%|██████████| 414/414 [00:00<00:00, 17302.99 examples/s]


In [112]:
num_labels = len(set(df["encoded_label"]))
def model_init():
    return DistilBertForSequenceClassification.from_pretrained(
        "cahya/distilbert-base-indonesian",
        num_labels=num_labels
    )


In [113]:
print(dataset)


DatasetDict({
    train: Dataset({
        features: ['encoded_label', 'clean_text', 'input_ids', 'attention_mask', 'labels'],
        num_rows: 1652
    })
    test: Dataset({
        features: ['encoded_label', 'clean_text', 'input_ids', 'attention_mask', 'labels'],
        num_rows: 414
    })
})


# 3. Metrics Function

In [114]:
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
import numpy as np

def compute_metrics(pred):
    logits = pred.predictions
    if isinstance(logits, tuple):
        logits = logits[0]  # ambil logits saja
    
    logits = np.array(logits)
    preds = np.argmax(logits, axis=-1)

    labels = np.array(pred.label_ids).flatten()

    return {
        'accuracy': accuracy_score(labels, preds),
        'f1': f1_score(labels, preds, average='weighted'),
        'precision': precision_score(labels, preds, average='weighted'),
        'recall': recall_score(labels, preds, average='weighted')
    }


# 4. Optuna Objective

In [115]:
def model_init():
    return DistilBertForSequenceClassification.from_pretrained(model_name, num_labels=2)

def objective(trial):
    training_args = TrainingArguments(
        output_dir="./results",
        eval_strategy="epoch",
        save_strategy="epoch",
        learning_rate=trial.suggest_float("learning_rate", 1e-5, 5e-5, log=True),
        per_device_train_batch_size=trial.suggest_categorical("train_batch_size", [8, 16, 32]),
        per_device_eval_batch_size=trial.suggest_categorical("eval_batch_size", [8, 16, 32]),
        num_train_epochs=trial.suggest_int("epochs", 2, 5),
        weight_decay=trial.suggest_float("weight_decay", 0.0, 0.3),
        logging_dir="./logs",
        load_best_model_at_end=True,
        metric_for_best_model="f1",
        greater_is_better=True,
        fp16=torch.cuda.is_available()
    )

    trainer = Trainer(
        model_init=model_init,
        args=training_args,
        train_dataset=dataset['train'],
        eval_dataset=dataset['test'],
        tokenizer=tokenizer,
        compute_metrics=compute_metrics
    )

    trainer.train()
    metrics = trainer.evaluate()
    return metrics["eval_f1"]

# 5. Jalankan Optuna

In [116]:
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=5)  # Ubah n_trials untuk eksplor lebih banyak

print("Best trial:")
print(study.best_trial)

[I 2025-08-12 10:47:41,030] A new study created in memory with name: no-name-250cb62b-ce5a-4708-8fc1-802472dd6b55
  trainer = Trainer(
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at cahya/distilbert-base-indonesian and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at cahya/distilbert-base-indonesian and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.368184,0.857488,0.855121,0.878227,0.857488
2,No log,0.276968,0.898551,0.898558,0.898942,0.898551
3,No log,0.386984,0.908213,0.907857,0.912706,0.908213
4,No log,0.391386,0.908213,0.907961,0.911179,0.908213


[I 2025-08-12 10:55:51,238] Trial 0 finished with value: 0.9079609406480765 and parameters: {'learning_rate': 1.9775290578318554e-05, 'train_batch_size': 16, 'eval_batch_size': 16, 'epochs': 4, 'weight_decay': 0.293847716633629}. Best is trial 0 with value: 0.9079609406480765.
  trainer = Trainer(
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at cahya/distilbert-base-indonesian and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at cahya/distilbert-base-indonesian and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for pred

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.260903,0.888889,0.888834,0.889154,0.888889
2,No log,0.310407,0.898551,0.898522,0.900006,0.898551
3,No log,0.418202,0.913043,0.912921,0.914342,0.913043
4,No log,0.466745,0.903382,0.903065,0.907016,0.903382
5,0.117200,0.497248,0.905797,0.905461,0.909848,0.905797


[I 2025-08-12 10:57:27,745] Trial 1 finished with value: 0.9129214311649293 and parameters: {'learning_rate': 4.5969806994022144e-05, 'train_batch_size': 16, 'eval_batch_size': 32, 'epochs': 5, 'weight_decay': 0.2839726560296282}. Best is trial 1 with value: 0.9129214311649293.
  trainer = Trainer(
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at cahya/distilbert-base-indonesian and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at cahya/distilbert-base-indonesian and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for pre

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.434567,0.792271,0.786512,0.822198,0.792271
2,No log,0.336727,0.850242,0.849113,0.858411,0.850242
3,No log,0.305126,0.862319,0.861976,0.864567,0.862319
4,No log,0.308912,0.879227,0.878683,0.884144,0.879227
5,No log,0.30273,0.884058,0.883677,0.887473,0.884058


[I 2025-08-12 11:07:42,864] Trial 2 finished with value: 0.8836774143348226 and parameters: {'learning_rate': 1.1443391799396131e-05, 'train_batch_size': 32, 'eval_batch_size': 16, 'epochs': 5, 'weight_decay': 0.2576330261623983}. Best is trial 1 with value: 0.9129214311649293.
  trainer = Trainer(
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at cahya/distilbert-base-indonesian and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at cahya/distilbert-base-indonesian and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for pre

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.367107,0.855072,0.85327,0.869852,0.855072
2,No log,0.352009,0.881643,0.88122,0.885393,0.881643


[I 2025-08-12 11:13:37,529] Trial 3 finished with value: 0.8812198935076548 and parameters: {'learning_rate': 1.4582519133347599e-05, 'train_batch_size': 8, 'eval_batch_size': 16, 'epochs': 2, 'weight_decay': 0.04081981526171976}. Best is trial 1 with value: 0.9129214311649293.
  trainer = Trainer(
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at cahya/distilbert-base-indonesian and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at cahya/distilbert-base-indonesian and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for pre

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.448791,0.797101,0.790018,0.83707,0.797101
2,No log,0.312081,0.862319,0.86145,0.869233,0.862319
3,No log,0.29719,0.888889,0.888733,0.890078,0.888889
4,No log,0.309911,0.884058,0.883609,0.888224,0.884058
5,No log,0.30538,0.896135,0.895969,0.897581,0.896135


[I 2025-08-12 11:15:22,771] Trial 4 finished with value: 0.8959694029151666 and parameters: {'learning_rate': 1.3953319112264623e-05, 'train_batch_size': 32, 'eval_batch_size': 32, 'epochs': 5, 'weight_decay': 0.11291836859889037}. Best is trial 1 with value: 0.9129214311649293.


Best trial:
FrozenTrial(number=1, state=1, values=[0.9129214311649293], datetime_start=datetime.datetime(2025, 8, 12, 10, 55, 51, 238879), datetime_complete=datetime.datetime(2025, 8, 12, 10, 57, 27, 745178), params={'learning_rate': 4.5969806994022144e-05, 'train_batch_size': 16, 'eval_batch_size': 32, 'epochs': 5, 'weight_decay': 0.2839726560296282}, user_attrs={}, system_attrs={}, intermediate_values={}, distributions={'learning_rate': FloatDistribution(high=5e-05, log=True, low=1e-05, step=None), 'train_batch_size': CategoricalDistribution(choices=(8, 16, 32)), 'eval_batch_size': CategoricalDistribution(choices=(8, 16, 32)), 'epochs': IntDistribution(high=5, log=False, low=2, step=1), 'weight_decay': FloatDistribution(high=0.3, log=False, low=0.0, step=None)}, trial_id=1, value=None)


# Save The Model :

In [117]:
# Setelah training selesai dan sudah ada 'model' hasil training
output_dir = "./best_model"

# Simpan model dan tokenizer
model.save_pretrained(output_dir)
tokenizer.save_pretrained(output_dir)

print(f"✅ Model disimpan di folder: {output_dir}")


✅ Model disimpan di folder: ./best_model


# Pengujian tingkat kecepatan dalam melakukan prediksi :

In [8]:
import torch
import time
import pandas as pd
from transformers import AutoTokenizer, AutoModelForSequenceClassification

# === 1. Load model dan tokenizer ===
model_path = "./best_model"  # path model yang sudah kamu save
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForSequenceClassification.from_pretrained(model_path)

# Pastikan pakai GPU kalau ada
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# === 2. Load dataset (contoh) ===
# Misalnya file CSV berisi kolom 'comment'
df = pd.read_csv("D:\Project-Prep\Test DistilBERT+Optuna Lokal\cleantext_CB.csv")

# Ambil sample 500 komentar random untuk pengujian
sample_texts = df["comment"].sample(500, random_state=42).tolist()

# === 3. Uji waktu inference (batch) ===
start_time = time.perf_counter()

# Tokenisasi batch
inputs = tokenizer(sample_texts, padding=True, truncation=True, return_tensors="pt").to(device)

# Matikan gradient untuk speed
with torch.no_grad():
    outputs = model(**inputs)
    
# Hitung waktu selesai
end_time = time.perf_counter()

# === 4. Hitung metrik waktu ===
total_time = end_time - start_time
avg_time_per_text = total_time / len(sample_texts)
throughput = len(sample_texts) / total_time

print(f"⏱ Total waktu: {total_time:.4f} detik untuk {len(sample_texts)} teks")
print(f"📊 Rata-rata per teks: {avg_time_per_text*1000:.2f} ms")
print(f"⚡ Throughput: {throughput:.2f} teks/detik")

# === 5. Uji waktu real-time untuk satu komentar ===
test_comment = "Goblok lu anjinggg!"
inputs = tokenizer(test_comment, return_tensors="pt").to(device)

start_rt = time.perf_counter()
with torch.no_grad():
    outputs = model(**inputs)
end_rt = time.perf_counter()

print(f"🎯 Latency real-time 1 teks: {(end_rt - start_rt)*1000:.2f} ms")


  df = pd.read_csv("D:\Project-Prep\Test DistilBERT+Optuna Lokal\cleantext_CB.csv")
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


⏱ Total waktu: 0.0488 detik untuk 500 teks
📊 Rata-rata per teks: 0.10 ms
⚡ Throughput: 10236.86 teks/detik
🎯 Latency real-time 1 teks: 6.23 ms


In [9]:
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import time
import numpy as np

# ====== 1. Load model & tokenizer ======
model_path = "./best_model"  # ganti sesuai path model
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForSequenceClassification.from_pretrained(model_path)
model.eval()  # mode evaluasi

# Gunakan GPU kalau ada
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# ====== 2. Contoh dataset (ambil dari data test) ======
# Misalnya kita sudah punya 'test_texts' dan 'test_labels'
# Untuk contoh ini, saya buat dummy:
test_texts = [
    "You are so stupid!",
    "I hope you have a great day!",
    "Nobody likes you, go away!",
    "That's an amazing performance!"
] * 50  # gandakan untuk 200 sample

# ====== 3. Sampling data (misalnya 200 sample) ======
sample_size = 200
texts_to_test = test_texts[:sample_size]

# ====== 4. Ukur kecepatan ======
latencies = []
predictions = []

with torch.no_grad():
    for text in texts_to_test:
        start_time = time.time()

        inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True).to(device)
        outputs = model(**inputs)
        predicted_class = torch.argmax(outputs.logits, dim=1).item()

        end_time = time.time()

        latencies.append(end_time - start_time)
        predictions.append(predicted_class)

# ====== 5. Hasil perhitungan ======
avg_latency = np.mean(latencies)  # rata-rata detik per sample
throughput = 1 / avg_latency  # sample per detik

print(f"📊 Average latency per sample: {avg_latency*1000:.2f} ms")
print(f"⚡ Throughput: {throughput:.2f} samples/sec")
print(f"Total samples tested: {len(texts_to_test)}")


Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


📊 Average latency per sample: 6.23 ms
⚡ Throughput: 160.55 samples/sec
Total samples tested: 200
