In [43]:
import pandas as pd
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments, DataCollatorWithPadding
from datasets import Dataset
from sklearn.metrics import accuracy_score, f1_score

df = pd.read_csv('receipt_labels_1000.csv')
label_map = {'交通': 0, '食飯': 1, '購物': 2, '娛樂': 3, '其他': 4}
df['label'] = df['label'].map(label_map)

dataset = Dataset.from_pandas(df)
tokenizer = AutoTokenizer.from_pretrained("bert-base-chinese")

def tokenize(batch):
    return tokenizer(batch['text'], truncation=True, padding=True, max_length=128)

dataset = dataset.map(tokenize, batched=True)
dataset = dataset.rename_column("label", "labels")
dataset = dataset.train_test_split(test_size=0.2)

model = AutoModelForSequenceClassification.from_pretrained("bert-base-chinese", num_labels=5)

training_args = TrainingArguments(
    output_dir="./hk_receipt_classifier",
    num_train_epochs=3,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    eval_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    learning_rate=2e-5,
    weight_decay=0.01,
    warmup_steps=100,
    logging_steps=50,
    logging_dir="./logs",
    report_to="none",
)

def compute_metrics(eval_pred):
    preds, labels = eval_pred
    preds = preds.argmax(axis=1)
    return {
        "accuracy": accuracy_score(labels, preds),
        "f1": f1_score(labels, preds, average="weighted")
    }

data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset["train"],
    eval_dataset=dataset["test"],
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

trainer.train()

eval_result = trainer.evaluate()
print(f"Final validation accuracy: {eval_result['eval_accuracy']:.4f}")
trainer.save_model("./fine_tuned_hk_classifier")
tokenizer.save_pretrained("./fine_tuned_hk_classifier")

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-chinese and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,0.6826,0.222887,0.955,0.954049
2,0.0301,0.005815,1.0,1.0
3,0.0058,0.003658,1.0,1.0




Final validation accuracy: 1.0000


('./fine_tuned_hk_classifier/tokenizer_config.json',
 './fine_tuned_hk_classifier/special_tokens_map.json',
 './fine_tuned_hk_classifier/vocab.txt',
 './fine_tuned_hk_classifier/added_tokens.json',
 './fine_tuned_hk_classifier/tokenizer.json')

In [53]:
from transformers import pipeline
import torch

classifier = pipeline(
    "text-classification",
    model="./fine_tuned_hk_classifier", 
    tokenizer="./fine_tuned_hk_classifier",
    device=0 if torch.cuda.is_available() else -1,
    return_all_scores=False
)

id2label = {0: "交通", 1: "食飯", 2: "購物", 3: "娛樂", 4: "其他"}

test_texts = [
    "港鐵 車票",
    "翠華餐廳 奶茶",
    "萬寧 口罩",
    "英皇戲院 4DX",
    "養生堂 中藥",
    "HKTaxi 的士費",
    "759阿信屋 維他奶",
    "大家樂 燒味飯"
]

for text in test_texts:
    result = classifier(text)[0]
    label_id = int(result["label"].split("_")[1])
    score = result["score"]
    print(f"{text.ljust(25)} → {id2label[label_id]:<3} （Prediction: {score:.1%}）")


Device set to use cpu


港鐵 車票                     → 交通  （Prediction: 99.6%）
翠華餐廳 奶茶                   → 食飯  （Prediction: 99.7%）
萬寧 口罩                     → 購物  （Prediction: 99.6%）
英皇戲院 4DX                  → 娛樂  （Prediction: 97.4%）
養生堂 中藥                    → 其他  （Prediction: 98.0%）
HKTaxi 的士費                → 交通  （Prediction: 99.7%）
759阿信屋 維他奶                → 購物  （Prediction: 99.6%）
大家樂 燒味飯                   → 食飯  （Prediction: 99.8%）
