In [None]:
import pandas as pd
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix, ConfusionMatrixDisplay
import torch
from torch.utils.data import Dataset
import matplotlib.pyplot as plt
from kaggle_secrets import UserSecretsClient
import wandb
import numpy as np

In [None]:
# 1. Load and preprocess data
def load_data():
    months = ['/kaggle/input/indonesia-news-corpus/november']
    dfs = []
    for month in months:
        df = pd.read_json(f'{month}.json')
        dfs.append(df[['isi', 'kategori']])
    return pd.concat(dfs)

df = load_data()

In [None]:
# 2. Prepare dataset
class NewsDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

In [None]:
# 3. Evaluate function
def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    return {
        'accuracy': accuracy_score(labels, preds),
        'f1': f1_score(labels, preds, average='weighted')
    }

In [None]:
# 4. Models to compare
models = {
    'IndoBERT': 'indobenchmark/indobert-base-p1',
    'mBERT': 'bert-base-multilingual-cased',
    'RoBERTa': 'roberta-base'
}

results = {}

In [None]:
wandb_api_key = UserSecretsClient().get_secret("wandb_api_key")
wandb.login(key=wandb_api_key)

In [None]:
# 5. Train and evaluate each model
def train_with_wandb(model_name, model_path, df):
    # Start W&B run
    wandb.init(project="indonesian-news-classification",
              name=model_name,
              config={"model": model_name, "epochs": 3, "batch_size": 8})

    # Tokenizer and model setup
    tokenizer = AutoTokenizer.from_pretrained(model_path)
    model = AutoModelForSequenceClassification.from_pretrained(
        model_path,
        num_labels=len(df['kategori'].unique()))

    # Get label names before factorization
    labels, unique_labels = pd.factorize(df['kategori'])
    texts = df['isi'].tolist()

    # Dataset preparation
    train_texts, val_texts, train_labels, val_labels = train_test_split(
        texts, labels, test_size=0.2, random_state=42)

    train_dataset = NewsDataset(
        tokenizer(train_texts, truncation=True, padding=True, max_length=256),
        train_labels
    )
    val_dataset = NewsDataset(
        tokenizer(val_texts, truncation=True, padding=True, max_length=256),
        val_labels
    )

    # Training with W&B logging
    training_args = TrainingArguments(
        output_dir=f'./results/{model_name}',
        eval_strategy='epoch',
        save_strategy='epoch',
        logging_strategy='epoch',
        report_to='wandb',  # Critical for W&B integration
        learning_rate=2e-5,
        per_device_train_batch_size=8,
        per_device_eval_batch_size=8,
        num_train_epochs=3,
        weight_decay=0.01,
        load_best_model_at_end=True
    )

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=val_dataset,
        compute_metrics=compute_metrics
    )

    trainer.train()
    eval_result = trainer.evaluate()

    # Generate confusion matrix
    predictions = trainer.predict(val_dataset)
    preds = np.argmax(predictions.predictions, axis=-1)
    true_labels = predictions.label_ids

    # Create and log confusion matrix
    cm = confusion_matrix(true_labels, preds)
    fig, ax = plt.subplots(figsize=(10, 8))
    disp = ConfusionMatrixDisplay(
        confusion_matrix=cm,
        display_labels=unique_labels
    )
    disp.plot(ax=ax, cmap='Blues', xticks_rotation=45)
    plt.title(f"Confusion Matrix - {model_name}")
    plt.tight_layout()

    # Log to W&B
    wandb.log({"confusion_matrix": wandb.Image(fig)})
    plt.close(fig)  # Prevent duplicate displays in notebooks

    wandb.finish()

    return eval_result

In [None]:
# 4. Comparison
for model_name, model_path in models.items():
    print(f"\nTraining {model_name}...")
    results[model_name] = train_with_wandb(model_name, model_path, df)

In [None]:
# 5. Classification report
print("\nFinal Results:")
for model_name, metrics in results.items():
    print(f"{model_name}:")
    print(f"  Accuracy: {metrics['eval_accuracy']:.4f}")
    print(f"  F1: {metrics['eval_f1']:.4f}")


Final Results:
IndoBERT:
  Accuracy: 0.9378
  F1: 0.9375
mBERT:
  Accuracy: 0.9279
  F1: 0.9278
RoBERTa:
  Accuracy: 0.9297
  F1: 0.9299
