In [1]:
####### Import Necessary Libraries #######
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns


####### Data Loading && Preparation #######
import nltk 
nltk.download('punkt')
from transformers import AutoTokenizer, AutoModel, Trainer, TrainingArguments, PreTrainedModel, PretrainedConfig
from transformers.modeling_outputs import SequenceClassifierOutput
from datasets import Dataset, DatasetDict
from sklearn.preprocessing import LabelEncoder
from sklearn.utils.class_weight import compute_class_weight
from sklearn.model_selection import train_test_split
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer, DataCollatorWithPadding


####### Modelling #######
import torch
from transformers import Trainer, TrainingArguments
from sklearn.model_selection import train_test_split



#######Evaluation & Result #######
from sklearn.metrics import classification_report, confusion_matrix,  accuracy_score, precision_recall_fscore_support
import itertools


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\KOMPUTER\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [2]:
device = torch.device("cuda") if torch.cuda.is_available() else "cpu"

# Data Loading

In [3]:
BLOOM_DATASET = '../../Dataset/learning_outcomes.csv'
PAIR_DATASET = '../../Dataset/cpmk_subcpmk_pairs.csv'

In [4]:
bloom_df = pd.read_csv(BLOOM_DATASET)
pair_df = pd.read_csv(PAIR_DATASET)

print(bloom_df)
pair_df

                                       Learning_Outcome     Jenis Level
0     Mampu merencanakan, menyelesaikan, dan mengeva...      CPMK    C6
1     Mampu merencanakan, menyelesaikan, dan mengeva...      CPMK    C6
2     Mampu merencanakan, menyelesaikan, dan mengeva...      CPMK    C6
3                           Mampu merancang gardu induk      CPMK    C6
4                           Mampu merancang gardu induk      CPMK    C6
...                                                 ...       ...   ...
2571  Mahasiswa dapat memahami dan mempraktekkan iba...  Sub-CPMK    P3
2572  Mampu menunjukan pelaksanaan penegakan hukum &...  Sub-CPMK    A5
2573  Mahasiswa mampu menunjukkan implementasi nilai...  Sub-CPMK    A5
2574  Mampu bertindak mengimplementasikan makna Sila...  Sub-CPMK    A5
2575  Mampu bertindak mengimplementasikan Sila Ke-4 ...  Sub-CPMK    A5

[2576 rows x 3 columns]


Unnamed: 0,cpmk,level_cpmk,subcpmk,level_subcpmk,keselarasan
0,"Mampu merencanakan, menyelesaikan, dan mengeva...",C6,Merencanakan aplikasi menggunakan prinsip dasa...,C6,True
1,"Mampu merencanakan, menyelesaikan, dan mengeva...",C6,Menyelesaikan logic functions and gates,C2,True
2,"Mampu merencanakan, menyelesaikan, dan mengeva...",C6,Mengevaluasi boolean algebra dan combinational...,C5,True
3,Mampu merancang gardu induk,C6,Mampu merancang instalasi listrik gardu induk,C6,True
4,Mampu merancang gardu induk,C6,Mampu merancang sistem pengetanahan gardu induk,C6,True
...,...,...,...,...,...
1283,"Mahasiswa mampu mempraktekkan haji, umroh dan ...",P3,Mahasiswa dapat memahami dan mempraktekkan iba...,P3,True
1284,"Mampu menganalisis masalah kontekstual PKn, me...",C4,Mampu menunjukan pelaksanaan penegakan hukum &...,A5,False
1285,Mahasiswa mampu menafsirkan dan menerapkan nil...,C3,Mahasiswa mampu menunjukkan implementasi nilai...,A5,False
1286,"Mampu menunjukan nilai ketuhanan, nilai kemanu...",A5,Mampu bertindak mengimplementasikan makna Sila...,A5,True


# Data Preparation & Modelling - Bloom

## Data Preparation

In [5]:
# Basic checks & cleaning
required_cols = ["Learning_Outcome", "Level"]
if not all(c in bloom_df.columns for c in required_cols):
    raise ValueError(f"CSV must contain columns: {required_cols}. Found: {list(bloom_df.columns)}")

bloom_df = bloom_df.dropna(subset=["Learning_Outcome", "Level"]).reset_index(drop=True)
bloom_df["Learning_Outcome"] = bloom_df["Learning_Outcome"].astype(str).str.strip()
bloom_df["Level"] = bloom_df["Level"].astype(str).str.strip().str.upper()

In [6]:
# Create label mapping
labels = sorted(bloom_df["Level"].unique().tolist(), key=lambda x: (x[0], int(x[1:]) if x[1:].isdigit() else x))
label2id = {lab:i for i, lab in enumerate(labels)}
id2label = {i:lab for lab,i in label2id.items()}

bloom_df["label_id"] = bloom_df["Level"].map(label2id)

In [7]:
# Train / val split (stratify by label)
train_df, val_df = train_test_split(bloom_df, test_size=0.1, random_state=42, stratify=bloom_df["label_id"])
train_ds = Dataset.from_pandas(train_df.reset_index(drop=True))
val_ds = Dataset.from_pandas(val_df.reset_index(drop=True))
datasets = DatasetDict({"train": train_ds, "validation": val_ds})


## Modelling 

### Indo-Bert-Base 

#### Single Head

In [8]:
OUTPUT_DIR = "Classification-result/indobert_single_head_out"
SEED = 42
BATCH_SIZE = 16
EPOCHS = 3
LR = 2e-5
MAX_LEN = 512

In [9]:
INDO_BERT_BASE = "indobenchmark/indobert-large-p2"   


In [10]:
MODEL_NAME = INDO_BERT_BASE
# Tokenizer and preprocessing
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True)

def preprocess_fn(batch):
    enc = tokenizer(batch["Learning_Outcome"], truncation=True, padding="max_length", max_length=MAX_LEN)
    enc["labels"] = batch["label_id"]
    return enc

datasets = datasets.map(preprocess_fn, batched=True, remove_columns=datasets["train"].column_names)


Map:   0%|          | 0/2318 [00:00<?, ? examples/s]

Map:   0%|          | 0/258 [00:00<?, ? examples/s]

In [11]:
# Model
num_labels = len(label2id)
model = AutoModelForSequenceClassification.from_pretrained(INDO_BERT_BASE , num_labels=num_labels, use_safetensors=True)
model.to(device)

# Data collator
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-large-p2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [12]:
def compute_metrics(eval_pred):
    preds_logits, labels = eval_pred
    preds = np.argmax(preds_logits, axis=-1)
    acc = accuracy_score(labels, preds)
    prec, rec, f1, _ = precision_recall_fscore_support(labels, preds, average="weighted", zero_division=0)
    return {"accuracy": acc, "precision": prec, "recall": rec, "f1": f1}
    

In [13]:

# Training args
training_args = TrainingArguments(
    output_dir=OUTPUT_DIR,
    eval_strategy="epoch",
    save_strategy="epoch",
    learning_rate=LR,
    per_device_train_batch_size=BATCH_SIZE,
    per_device_eval_batch_size=BATCH_SIZE,
    num_train_epochs=EPOCHS,
    weight_decay=0.01,
    logging_steps=50,
    seed=SEED,
    fp16= True,
    load_best_model_at_end=True,
    metric_for_best_model="f1",
    push_to_hub=False,
)



In [14]:
import accelerate
print(accelerate.__version__)
print(accelerate.__file__)

1.12.0
C:\Users\KOMPUTER\anaconda3\envs\fadfad\Lib\site-packages\accelerate\__init__.py


In [15]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=datasets["train"],
    eval_dataset=datasets["validation"],
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics
)

  trainer = Trainer(


In [16]:
trainer.train()

Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.3345,0.888088,0.744186,0.731213,0.744186,0.732518
2,0.6283,0.753544,0.782946,0.768471,0.782946,0.768532
3,0.36,0.671947,0.786822,0.772296,0.786822,0.773398


TrainOutput(global_step=435, training_loss=0.8383893287044832, metrics={'train_runtime': 2567.7458, 'train_samples_per_second': 2.708, 'train_steps_per_second': 0.169, 'total_flos': 6480913461645312.0, 'train_loss': 0.8383893287044832, 'epoch': 3.0})

In [17]:
import os

# Eval final
metrics = trainer.evaluate()
print("Final evaluation metrics:", metrics)

Final evaluation metrics: {'eval_loss': 0.6719468235969543, 'eval_accuracy': 0.7868217054263565, 'eval_precision': 0.7722959082261406, 'eval_recall': 0.7868217054263565, 'eval_f1': 0.7733983990917428, 'eval_runtime': 5.3087, 'eval_samples_per_second': 48.599, 'eval_steps_per_second': 3.202, 'epoch': 3.0}


In [32]:
def preprocess(ex):
    return tokenizer(
        ex["Learning_Outcome"],
        truncation=True,
        padding="max_length",
        max_length=128
    )

val_tokenized = val_ds.map(preprocess, batched=True)
val_tokenized = val_tokenized.rename_column("label_id", "labels")

from torch.utils.data import DataLoader

def collate_fn(batch):
    return {
        "input_ids": torch.tensor([x["input_ids"] for x in batch]),
        "attention_mask": torch.tensor([x["attention_mask"] for x in batch]),
        "labels": torch.tensor([x["labels"] for x in batch]),
    }

loader = DataLoader(val_tokenized, batch_size=16, shuffle=False, collate_fn=collate_fn)

all_predictions = []
all_labels = []

model.eval()
with torch.no_grad():
    for batch in tqdm(loader):
        inputs = {
            "input_ids": batch["input_ids"].to(model.device),
            "attention_mask": batch["attention_mask"].to(model.device)
        }
        labels = batch["labels"].to(model.device)

        outputs = model(**inputs)
        preds = torch.argmax(outputs.logits, dim=-1)

        all_predictions.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

decoded_preds = [id2label[i] for i in all_predictions]
decoded_labels = [id2label[i] for i in all_labels]


print(classification_report(decoded_labels, decoded_preds))



Map:   0%|          | 0/258 [00:00<?, ? examples/s]

  0%|          | 0/17 [00:00<?, ?it/s]

              precision    recall  f1-score   support

          A3       0.00      0.00      0.00         1
          A4       0.75      0.38      0.50         8
          A5       0.56      0.62      0.59         8
          C1       0.70      0.78      0.74         9
          C2       0.82      0.84      0.83        49
          C3       0.81      0.59      0.68        29
          C4       0.83      0.88      0.85        49
          C5       0.86      0.86      0.86        21
          C6       0.88      0.98      0.93        47
          P2       0.00      0.00      0.00         3
          P3       0.60      0.82      0.69        22
          P4       0.56      0.50      0.53        10
          P5       0.00      0.00      0.00         2

    accuracy                           0.79       258
   macro avg       0.57      0.56      0.55       258
weighted avg       0.77      0.79      0.77       258



  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


In [33]:
# Save model, tokenizer, and label maps
os.makedirs(OUTPUT_DIR, exist_ok=True)
trainer.save_model(OUTPUT_DIR)
tokenizer.save_pretrained(OUTPUT_DIR)
pd.to_pickle(label2id, os.path.join(OUTPUT_DIR, "label2id.pkl"))
pd.to_pickle(id2label, os.path.join(OUTPUT_DIR, "id2label.pkl"))
print("Model & tokenizer saved to", OUTPUT_DIR)

Model & tokenizer saved to Classification-result/indobert_single_head_out


#### Double Head

In [None]:
+