In [1]:
from unsloth.chat_templates import get_chat_template
from vllm import LLM, SamplingParams
from vllm.lora.request import LoRARequest

from transformers import AutoTokenizer, GemmaTokenizerFast
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
from pythainlp.tokenize import word_tokenize
from jiwer import cer

import pandas as pd
import numpy as np
from tqdm import tqdm
import sqlite3
import sys
import os

import torch
import json
import re

import faiss
from langchain.vectorstores import FAISS
from langchain_huggingface.embeddings import HuggingFaceEmbeddings

ROOT_DIR = "/project/lt200304-dipmt/paweekorn"
MODEL_PATH = f"{ROOT_DIR}/models/base/gemma3-4b-it"
ADAPTER_PATH = None
# ADAPTER_PATH = f"{ROOT_DIR}/models/adapter/gemma3-4b-it/checkpoint-1242"

print("GPU count:", torch.cuda.device_count())

ü¶• Unsloth: Will patch your computer to enable 2x faster free finetuning.
ü¶• Unsloth Zoo will now patch everything to make training faster!
INFO 11-16 04:17:02 [__init__.py:241] Automatically detected platform cuda.
GPU count: 2


## Data Prep

In [2]:
test_df = pd.read_csv('/project/lt200304-dipmt/paweekorn/data/DS01/test_v1.csv')
with open('/project/lt200304-dipmt/paweekorn/data/wipo/WIPO.json', 'r') as f:
    wipo_data = json.load(f)

wipo_data = {int(k): v for k, v in wipo_data.items()}
test_df['WIPO'] = test_df['NAME'].map(wipo_data)

print(test_df.shape)
test_df.head()

(2785, 4)


Unnamed: 0,NAME,ENG,THA,WIPO
0,41,"Educational services, namely, conducting semin...",‡∏ö‡∏£‡∏¥‡∏Å‡∏≤‡∏£‡∏î‡πâ‡∏≤‡∏ô‡∏®‡∏∂‡∏Å‡∏©‡∏≤ ‡πÑ‡∏î‡πâ‡πÅ‡∏Å‡πà ‡∏î‡∏≥‡πÄ‡∏ô‡∏¥‡∏ô‡∏Å‡∏≤‡∏£‡∏™‡∏±‡∏°‡∏°‡∏ô‡∏≤ ‡∏õ‡∏£‡∏∞‡∏ä‡∏∏‡∏° ...,Education; providing of training; entertainmen...
1,41,"training of drivers, road service employees, o...",‡∏Å‡∏≤‡∏£‡∏ù‡∏∂‡∏Å‡∏≠‡∏ö‡∏£‡∏°‡∏û‡∏ô‡∏±‡∏Å‡∏á‡∏≤‡∏ô‡∏Ç‡∏±‡∏ö‡∏£‡∏ñ ‡∏û‡∏ô‡∏±‡∏Å‡∏á‡∏≤‡∏ô‡∏ö‡∏£‡∏¥‡∏Å‡∏≤‡∏£‡∏ó‡∏≤‡∏á‡∏ñ‡∏ô‡∏ô ‡∏ú‡∏π‡πâ...,Education; providing of training; entertainmen...
2,41,providing online publications in the nature of...,‡∏à‡∏±‡∏î‡∏´‡∏≤‡∏™‡∏¥‡πà‡∏á‡∏û‡∏¥‡∏°‡∏û‡πå‡∏≠‡∏≠‡∏ô‡πÑ‡∏•‡∏ô‡πå‡πÉ‡∏ô‡∏•‡∏±‡∏Å‡∏©‡∏ì‡∏∞‡∏Ç‡∏≠‡∏á‡∏à‡∏î‡∏´‡∏°‡∏≤‡∏¢‡∏Ç‡πà‡∏≤‡∏ß ‡∏õ‡∏£‡∏∞...,Education; providing of training; entertainmen...
3,41,information with respect to leisure activities...,‡πÉ‡∏´‡πâ‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•‡πÄ‡∏Å‡∏µ‡πà‡∏¢‡∏ß‡∏Å‡∏±‡∏ö‡∏Å‡∏¥‡∏à‡∏Å‡∏£‡∏£‡∏°‡∏™‡∏±‡∏ô‡∏ó‡∏ô‡∏≤‡∏Å‡∏≤‡∏£ ‡πÉ‡∏´‡πâ‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•‡πÄ‡∏Å...,Education; providing of training; entertainmen...
4,41,"planning, arranging and operating of movies, s...",‡∏ß‡∏≤‡∏á‡πÅ‡∏ú‡∏ô ‡∏à‡∏±‡∏î‡∏Å‡∏≤‡∏£ ‡πÅ‡∏•‡∏∞‡∏î‡∏≥‡πÄ‡∏ô‡∏¥‡∏ô‡∏Å‡∏≤‡∏£‡∏†‡∏≤‡∏û‡∏¢‡∏ô‡∏ï‡∏£‡πå ‡∏Å‡∏≤‡∏£‡πÅ‡∏™‡∏î‡∏á ‡∏•‡∏∞‡∏Ñ...,Education; providing of training; entertainmen...


## Retrieval

In [3]:
sys.path.append(os.path.join(ROOT_DIR, "script"))  # parent dir
from utils.retrieval import process_query

tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, use_fast=True)
with open(f"{ROOT_DIR}/data/prompt/base_en2th.txt", "r") as f:
    instruction = f.read()

**RAG setup**

In [4]:
retriever = "all-MiniLM-L6-v2"
embeddings = HuggingFaceEmbeddings(model_name=f"{ROOT_DIR}/models/retriever/{retriever}")
vectorstore = FAISS.load_local(
    f"{ROOT_DIR}/vector/{retriever}", 
    embeddings,
    allow_dangerous_deserialization=True
)
gpu_index = faiss.index_cpu_to_gpu(faiss.StandardGpuResources(), 0, vectorstore.index)
vectorstore.index = gpu_index

**Full Text Search**

In [5]:
unique_df = pd.read_csv(f"{ROOT_DIR}/data/unique_no_test.csv")

# in-memory save
db = sqlite3.connect(':memory:')
cur = db.cursor()
cur.execute('create virtual table wipo_table using fts5(eng, tha, tokenize="unicode61");')

# bulk index records
cur.executemany('insert into wipo_table (eng, tha) values (?,?);', unique_df[['ENG', 'THA']].to_records(index=False))
db.commit()

#### combine in prompt

In [6]:
def formatting_prompt(df):
    batch = []
    for _, row in tqdm(df.iterrows(), total=len(df)):
        prompt = instruction.format(
            WIPO=row['WIPO'],
            # RAG_DOC="",
            RAG_DOC=process_query(embeddings=embeddings, vectorstore=vectorstore, 
                                  db_cur=cur, query=row['ENG'], how="rag"),
            ENGLISH=row["ENG"]
        )
        chat = [{"role": "user", "content": prompt}]
        chat = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
        batch.append(chat)
    return batch

test_set = formatting_prompt(test_df)
print(test_set[0])

100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 2785/2785 [00:15<00:00, 178.01it/s]

<bos><start_of_turn>user
## Instructions:
You are an expert in the classification of goods and services under the WIPO Nice Classification system. Your task is to translate product names from English to accurate and direct Thai.

**Translation Guidelines:**
- Maintain the punctuation alignment of the input text.
- Use Thai legal and commercial terminology appropriate for trademarks and product classification.
- Do not include explanations, commentary, or any information beyond the translation output.
- Provided output in the format of {"thai_translation": "Thai translation"}

**Product Domain:**
Education; providing of training; entertainment; sporting and cultural activities.

## Retrieved References:

English: Educational service, namely, conducting seminar, conference, workshop, online tutorial and computer application training in the field of computer software, business analytics and business intelligence and distributing course material in connection therewith
Thai: ‡∏ö‡∏£‡∏¥‡∏Å‡∏




## Inference Time!

In [7]:
if not bool(ADAPTER_PATH) or "fine-tuned" in MODEL_PATH:
    enable_lora = False ; lora_req = None
    tensor_parallel = 1
else:
    enable_lora = True; lora_req = LoRARequest("lora_adapter", 1, ADAPTER_PATH) if ADAPTER_PATH else None;
    tensor_parallel = torch.cuda.device_count()

model = LLM(
    model=MODEL_PATH,
    quantization="bitsandbytes",
    max_model_len=4096,
    tensor_parallel_size=tensor_parallel,
    enable_prefix_caching=True,
    gpu_memory_utilization=0.5,
    enforce_eager=True,
    enable_lora=enable_lora,
    max_lora_rank=64
)

INFO 11-16 04:17:41 [utils.py:326] non-default args: {'model': '/project/lt200304-dipmt/paweekorn/models/base/gemma3-4b-it', 'max_model_len': 4096, 'enable_prefix_caching': True, 'gpu_memory_utilization': 0.5, 'disable_log_stats': True, 'quantization': 'bitsandbytes', 'enforce_eager': True, 'max_lora_rank': 64}
INFO 11-16 04:17:50 [__init__.py:711] Resolved architecture: Gemma3ForConditionalGeneration
INFO 11-16 04:17:50 [__init__.py:1750] Using max model len 4096
INFO 11-16 04:17:53 [scheduler.py:222] Chunked prefill is enabled with max_num_batched_tokens=8192.
INFO 11-16 04:17:53 [__init__.py:3565] Cudagraph is disabled under eager mode
INFO 11-16 04:18:02 [__init__.py:241] Automatically detected platform cuda.
[1;36m(EngineCore_0 pid=2760734)[0;0m INFO 11-16 04:18:05 [core.py:636] Waiting for init message from front-end.
[1;36m(EngineCore_0 pid=2760734)[0;0m INFO 11-16 04:18:05 [core.py:74] Initializing a V1 LLM engine (v0.10.1.1) with config: model='/project/lt200304-dipmt/pawe

[1;36m(EngineCore_0 pid=2760734)[0;0m Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


[1;36m(EngineCore_0 pid=2760734)[0;0m INFO 11-16 04:18:12 [gpu_model_runner.py:1953] Starting to load model /project/lt200304-dipmt/paweekorn/models/base/gemma3-4b-it...
[1;36m(EngineCore_0 pid=2760734)[0;0m INFO 11-16 04:18:12 [gpu_model_runner.py:1985] Loading model from scratch...
[1;36m(EngineCore_0 pid=2760734)[0;0m INFO 11-16 04:18:13 [cuda.py:345] Using FlexAttention backend for head_size=72 on V1 engine.
[1;36m(EngineCore_0 pid=2760734)[0;0m INFO 11-16 04:18:13 [__init__.py:3565] Cudagraph is disabled under eager mode
[1;36m(EngineCore_0 pid=2760734)[0;0m INFO 11-16 04:18:13 [cuda.py:328] Using Flash Attention backend on V1 engine.
[1;36m(EngineCore_0 pid=2760734)[0;0m INFO 11-16 04:18:13 [bitsandbytes_loader.py:742] Loading weights with BitsAndBytes quantization. May take a while ...


Loading safetensors checkpoint shards:   0% Completed | 0/2 [00:00<?, ?it/s]
Loading safetensors checkpoint shards:  50% Completed | 1/2 [00:04<00:04,  4.20s/it]
Loading safetensors checkpoint shards: 100% Completed | 2/2 [00:07<00:00,  3.80s/it]
Loading safetensors checkpoint shards: 100% Completed | 2/2 [00:07<00:00,  3.86s/it]
[1;36m(EngineCore_0 pid=2760734)[0;0m 


[1;36m(EngineCore_0 pid=2760734)[0;0m INFO 11-16 04:18:21 [gpu_model_runner.py:2007] Model loading took 3.7575 GiB and 8.297242 seconds
[1;36m(EngineCore_0 pid=2760734)[0;0m INFO 11-16 04:18:21 [gpu_model_runner.py:2591] Encoder cache will be initialized with a budget of 8192 tokens, and profiled with 31 image items of the maximum feature size.
[1;36m(EngineCore_0 pid=2760734)[0;0m INFO 11-16 04:18:50 [gpu_worker.py:276] Available KV cache memory: 12.66 GiB
[1;36m(EngineCore_0 pid=2760734)[0;0m INFO 11-16 04:18:50 [kv_cache_utils.py:1013] GPU KV cache size: 94,800 tokens
[1;36m(EngineCore_0 pid=2760734)[0;0m INFO 11-16 04:18:50 [kv_cache_utils.py:1017] Maximum concurrency for 4,096 tokens per request: 23.07x
[1;36m(EngineCore_0 pid=2760734)[0;0m INFO 11-16 04:18:51 [core.py:214] init engine (profile, create kv cache, warmup model) took 29.90 seconds
[1;36m(EngineCore_0 pid=2760734)[0;0m INFO 11-16 04:18:52 [__init__.py:3565] Cudagraph is disabled under eager mode
INFO 11-

In [9]:
decoding_params = SamplingParams(
    temperature=0.0, top_p=1, top_k=-1,
    max_tokens=4096,
    skip_special_tokens=True,
    repetition_penalty=1.15,
    frequency_penalty=0.2,
)

results = model.generate(test_set, decoding_params, lora_request=lora_req)
response = [r.outputs[0].text for r in results]
response[0]

Adding requests:   0%|          | 0/2785 [00:00<?, ?it/s]



Processed prompts:   0%|          | 0/2785 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s‚Ä¶

[1;36m(EngineCore_0 pid=2760734)[0;0m [rank0]:W1116 04:20:48.646000 2760734 /lustrefs/disk/home/psoratya/.conda/envs/unsloth_env/lib/python3.12/site-packages/torch/_dynamo/convert_frame.py:964] [0/8] torch._dynamo hit config.recompile_limit (8)
[1;36m(EngineCore_0 pid=2760734)[0;0m [rank0]:W1116 04:20:48.646000 2760734 /lustrefs/disk/home/psoratya/.conda/envs/unsloth_env/lib/python3.12/site-packages/torch/_dynamo/convert_frame.py:964] [0/8]    function: 'forward_static' (/home/psoratya/.conda/envs/unsloth_env/lib/python3.12/site-packages/vllm/model_executor/layers/layernorm.py:221)
[1;36m(EngineCore_0 pid=2760734)[0;0m [rank0]:W1116 04:20:48.646000 2760734 /lustrefs/disk/home/psoratya/.conda/envs/unsloth_env/lib/python3.12/site-packages/torch/_dynamo/convert_frame.py:964] [0/8]    last reason: 0/7: expected type of 'residual' to be a tensor type, ' but found <class 'NoneType'>
[1;36m(EngineCore_0 pid=2760734)[0;0m [rank0]:W1116 04:20:48.646000 2760734 /lustrefs/disk/home/psorat

'{"thai_translation": "‡∏ö‡∏£‡∏¥‡∏Å‡∏≤‡∏£‡∏î‡πâ‡∏≤‡∏ô‡∏Å‡∏≤‡∏£‡∏®‡∏∂‡∏Å‡∏©‡∏≤, ‡πÑ‡∏î‡πâ‡πÅ‡∏Å‡πà, ‡∏Å‡∏≤‡∏£‡∏à‡∏±‡∏î‡∏á‡∏≤‡∏ô‡∏™‡∏±‡∏°‡∏°‡∏ô‡∏≤, ‡∏Å‡∏≤‡∏£‡∏õ‡∏£‡∏∞‡∏ä‡∏∏‡∏°, ‡πÇ‡∏Ñ‡∏£‡∏á‡∏Å‡∏≤‡∏£‡∏≠‡∏ö‡∏£‡∏°, ‡∏ö‡∏ó‡πÄ‡∏£‡∏µ‡∏¢‡∏ô‡∏≠‡∏≠‡∏ô‡πÑ‡∏•‡∏ô‡πå‡πÅ‡∏•‡∏∞‡∏ù‡∏∂‡∏Å‡∏≠‡∏ö‡∏£‡∏°‡∏Å‡∏≤‡∏£‡πÉ‡∏ä‡πâ‡πÇ‡∏õ‡∏£‡πÅ‡∏Å‡∏£‡∏°‡∏Ñ‡∏≠‡∏°‡∏û‡∏¥‡∏ß‡πÄ‡∏ï‡∏≠‡∏£‡πå‡πÉ‡∏ô‡∏™‡∏≤‡∏Ç‡∏≤‡∏ß‡∏¥‡∏ä‡∏≤‡∏ã‡∏≠‡∏ü‡∏ï‡πå‡πÅ‡∏ß‡∏£‡πå‡∏Ñ‡∏≠‡∏°‡∏û‡∏¥‡∏ß‡πÄ‡∏ï‡∏≠‡∏£‡πå, ‡∏Å‡∏≤‡∏£‡∏ß‡∏¥‡πÄ‡∏Ñ‡∏£‡∏≤‡∏∞‡∏´‡πå‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•‡∏ó‡∏≤‡∏á‡∏ò‡∏∏‡∏£‡∏Å‡∏¥‡∏à, ‡πÅ‡∏•‡∏∞‡∏£‡∏∞‡∏ö‡∏ö‡∏ò‡∏∏‡∏£‡∏Å‡∏¥‡∏à‡∏≠‡∏±‡∏à‡∏â‡∏£‡∏¥‡∏¢‡∏∞ ‡∏£‡∏ß‡∏°‡∏ñ‡∏∂‡∏á‡∏Å‡∏≤‡∏£‡πÄ‡∏ú‡∏¢‡πÅ‡∏û‡∏£‡πà‡∏™‡∏∑‡πà‡∏≠‡∏Å‡∏≤‡∏£‡πÄ‡∏£‡∏µ‡∏¢‡∏ô‡∏£‡∏π‡πâ‡∏ó‡∏µ‡πà‡πÄ‡∏Å‡∏µ‡πà‡∏¢‡∏ß‡∏Ç‡πâ‡∏≠‡∏á"}'

## Evaluation

In [10]:
def filter_thai(text):
    pattern = r'[\u0e00-\u0e7f\s,.?!]+'
    matches = re.findall(pattern, text)
    return "".join(matches).strip().replace("\n", "")

def extract_json(text):
    text = text[text.rfind("{"):]
    pattern = r'''{\s*[\'\"]thai_translation[\'\"]:\s*[\'\"].*?[\'\"]\s*}'''
    matches = re.findall(pattern, text, re.DOTALL)

    if matches:
        try:
            loaded = json.loads(matches[0])
            return loaded['thai_translation'].replace(",", "")
        except json.JSONDecodeError as e:
            return filter_thai(text)
    else:
        return filter_thai(text)


test_df['PRED'] = response
test_df['PRED_cleaned'] = test_df['PRED'].apply(extract_json)
print("Null rows: ", test_df['PRED_cleaned'].isnull().sum())
test_df.head()

Null rows:  0


Unnamed: 0,NAME,ENG,THA,WIPO,PRED,PRED_cleaned
0,41,"Educational services, namely, conducting semin...",‡∏ö‡∏£‡∏¥‡∏Å‡∏≤‡∏£‡∏î‡πâ‡∏≤‡∏ô‡∏®‡∏∂‡∏Å‡∏©‡∏≤ ‡πÑ‡∏î‡πâ‡πÅ‡∏Å‡πà ‡∏î‡∏≥‡πÄ‡∏ô‡∏¥‡∏ô‡∏Å‡∏≤‡∏£‡∏™‡∏±‡∏°‡∏°‡∏ô‡∏≤ ‡∏õ‡∏£‡∏∞‡∏ä‡∏∏‡∏° ...,Education; providing of training; entertainmen...,"{""thai_translation"": ""‡∏ö‡∏£‡∏¥‡∏Å‡∏≤‡∏£‡∏î‡πâ‡∏≤‡∏ô‡∏Å‡∏≤‡∏£‡∏®‡∏∂‡∏Å‡∏©‡∏≤, ‡πÑ‡∏î‡πâ‡πÅ...",‡∏ö‡∏£‡∏¥‡∏Å‡∏≤‡∏£‡∏î‡πâ‡∏≤‡∏ô‡∏Å‡∏≤‡∏£‡∏®‡∏∂‡∏Å‡∏©‡∏≤ ‡πÑ‡∏î‡πâ‡πÅ‡∏Å‡πà ‡∏Å‡∏≤‡∏£‡∏à‡∏±‡∏î‡∏á‡∏≤‡∏ô‡∏™‡∏±‡∏°‡∏°‡∏ô‡∏≤ ‡∏Å‡∏≤‡∏£‡∏õ...
1,41,"training of drivers, road service employees, o...",‡∏Å‡∏≤‡∏£‡∏ù‡∏∂‡∏Å‡∏≠‡∏ö‡∏£‡∏°‡∏û‡∏ô‡∏±‡∏Å‡∏á‡∏≤‡∏ô‡∏Ç‡∏±‡∏ö‡∏£‡∏ñ ‡∏û‡∏ô‡∏±‡∏Å‡∏á‡∏≤‡∏ô‡∏ö‡∏£‡∏¥‡∏Å‡∏≤‡∏£‡∏ó‡∏≤‡∏á‡∏ñ‡∏ô‡∏ô ‡∏ú‡∏π‡πâ...,Education; providing of training; entertainmen...,"{""thai_translation"": ""‡∏Å‡∏≤‡∏£‡∏ù‡∏∂‡∏Å‡∏≠‡∏ö‡∏£‡∏°‡∏Ñ‡∏ô‡∏Ç‡∏±‡∏ö‡∏£‡∏ñ, ‡∏û‡∏ô‡∏±‡∏Å‡∏á...",‡∏Å‡∏≤‡∏£‡∏ù‡∏∂‡∏Å‡∏≠‡∏ö‡∏£‡∏°‡∏Ñ‡∏ô‡∏Ç‡∏±‡∏ö‡∏£‡∏ñ ‡∏û‡∏ô‡∏±‡∏Å‡∏á‡∏≤‡∏ô‡∏ö‡∏£‡∏¥‡∏Å‡∏≤‡∏£‡∏ó‡∏≤‡∏á‡∏ñ‡∏ô‡∏ô ‡∏ú‡∏π‡πâ‡∏õ‡∏è‡∏¥‡∏ö‡∏±...
2,41,providing online publications in the nature of...,‡∏à‡∏±‡∏î‡∏´‡∏≤‡∏™‡∏¥‡πà‡∏á‡∏û‡∏¥‡∏°‡∏û‡πå‡∏≠‡∏≠‡∏ô‡πÑ‡∏•‡∏ô‡πå‡πÉ‡∏ô‡∏•‡∏±‡∏Å‡∏©‡∏ì‡∏∞‡∏Ç‡∏≠‡∏á‡∏à‡∏î‡∏´‡∏°‡∏≤‡∏¢‡∏Ç‡πà‡∏≤‡∏ß ‡∏õ‡∏£‡∏∞...,Education; providing of training; entertainmen...,"{""thai_translation"": ""‡πÉ‡∏´‡πâ‡πÄ‡∏ú‡∏¢‡πÅ‡∏û‡∏£‡πà‡πÄ‡∏ô‡∏∑‡πâ‡∏≠‡∏´‡∏≤‡∏≠‡∏≠‡∏ô‡πÑ‡∏•‡∏ô‡πå...",‡πÉ‡∏´‡πâ‡πÄ‡∏ú‡∏¢‡πÅ‡∏û‡∏£‡πà‡πÄ‡∏ô‡∏∑‡πâ‡∏≠‡∏´‡∏≤‡∏≠‡∏≠‡∏ô‡πÑ‡∏•‡∏ô‡πå‡πÉ‡∏ô‡∏£‡∏π‡∏õ‡πÅ‡∏ö‡∏ö‡∏Ç‡∏≠‡∏á‡∏à‡∏î‡∏´‡∏°‡∏≤‡∏¢‡∏Ç‡πà‡∏≤‡∏ß ...
3,41,information with respect to leisure activities...,‡πÉ‡∏´‡πâ‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•‡πÄ‡∏Å‡∏µ‡πà‡∏¢‡∏ß‡∏Å‡∏±‡∏ö‡∏Å‡∏¥‡∏à‡∏Å‡∏£‡∏£‡∏°‡∏™‡∏±‡∏ô‡∏ó‡∏ô‡∏≤‡∏Å‡∏≤‡∏£ ‡πÉ‡∏´‡πâ‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•‡πÄ‡∏Å...,Education; providing of training; entertainmen...,"{""thai_translation"": ""‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•‡πÄ‡∏Å‡∏µ‡πà‡∏¢‡∏ß‡∏Å‡∏±‡∏ö‡∏Å‡∏¥‡∏à‡∏Å‡∏£‡∏£‡∏°‡∏™‡∏±...",‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•‡πÄ‡∏Å‡∏µ‡πà‡∏¢‡∏ß‡∏Å‡∏±‡∏ö‡∏Å‡∏¥‡∏à‡∏Å‡∏£‡∏£‡∏°‡∏™‡∏±‡∏ô‡∏ó‡∏ô‡∏≤‡∏Å‡∏≤‡∏£‡πÅ‡∏•‡∏∞‡∏Å‡∏≤‡∏£‡∏®‡∏∂‡∏Å‡∏©‡∏≤
4,41,"planning, arranging and operating of movies, s...",‡∏ß‡∏≤‡∏á‡πÅ‡∏ú‡∏ô ‡∏à‡∏±‡∏î‡∏Å‡∏≤‡∏£ ‡πÅ‡∏•‡∏∞‡∏î‡∏≥‡πÄ‡∏ô‡∏¥‡∏ô‡∏Å‡∏≤‡∏£‡∏†‡∏≤‡∏û‡∏¢‡∏ô‡∏ï‡∏£‡πå ‡∏Å‡∏≤‡∏£‡πÅ‡∏™‡∏î‡∏á ‡∏•‡∏∞‡∏Ñ...,Education; providing of training; entertainmen...,"{""thai_translation"": ""‡∏ß‡∏≤‡∏á‡πÅ‡∏ú‡∏ô, ‡∏à‡∏±‡∏î‡∏Å‡∏≤‡∏£ ‡πÅ‡∏•‡∏∞‡∏î‡∏≥‡πÄ‡∏ô‡∏¥‡∏ô...",‡∏ß‡∏≤‡∏á‡πÅ‡∏ú‡∏ô ‡∏à‡∏±‡∏î‡∏Å‡∏≤‡∏£ ‡πÅ‡∏•‡∏∞‡∏î‡∏≥‡πÄ‡∏ô‡∏¥‡∏ô‡∏Å‡∏≤‡∏£ ‡∏†‡∏≤‡∏û‡∏¢‡∏ô‡∏ï‡∏£‡πå ‡∏Å‡∏≤‡∏£‡πÅ‡∏™‡∏î‡∏á ‡∏•‡∏∞...


In [11]:
cer_result, bleu = [], []
chencherry = SmoothingFunction().method1
for _, row in tqdm(test_df.iterrows(), total=len(test_df)):
    sol = row["THA"]
    pred = "" if pd.isna(row["PRED_cleaned"]) else row["PRED_cleaned"]

    # bleu score
    ref = word_tokenize(sol, engine='attacut')
    hyp = word_tokenize(pred, engine='attacut')

    ref = [word for word in ref if not word.isspace()]
    hyp = [word for word in hyp if not word.isspace()]
    
    cer_result.append(cer(sol, pred))
    bleu.append(sentence_bleu([ref], hyp, smoothing_function=chencherry))

test_df["BLEU"] = bleu
print(f"Average CER:", np.mean(cer_result).round(4))
print(f"Average BLEU:", np.mean(bleu).round(4))

100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 2785/2785 [00:18<00:00, 148.92it/s]

Average CER: 0.2904
Average BLEU: 0.4805





In [12]:
MODEL_ID = MODEL_PATH.split('/')[-1] if "checkpoint" not in MODEL_PATH else MODEL_PATH.split('/')[-2]
fname = f"{MODEL_ID}_base"
save_path = f"{ROOT_DIR}/data/infer-result/en2th/{fname}.csv"
test_df[['PRED', 'PRED_cleaned']].to_csv(save_path, index=False)
print(f"Save at {save_path} already!")

Save at /project/lt200304-dipmt/paweekorn/data/infer-result/en2th/gemma3-4b-it_base.csv already!


## Check memory

In [13]:
import psutil

# Get CPU usage percentage
cpu_percent = psutil.cpu_percent(interval=1)
print(f"CPU Usage: {cpu_percent}%")

# Get detailed virtual memory information
virtual_memory = psutil.virtual_memory()

# Print various memory statistics
print(f"Total Memory: {virtual_memory.total / (1024**3):.2f} GB")  # Convert bytes to GB
print(f"Available Memory: {virtual_memory.available / (1024**3):.2f} GB")
print(f"Used Memory: {virtual_memory.used / (1024**3):.2f} GB")
print(f"Memory Usage Percentage: {virtual_memory.percent}%")

CPU Usage: 1.6%
Total Memory: 502.45 GB
Available Memory: 480.91 GB
Used Memory: 14.16 GB
Memory Usage Percentage: 4.3%


In [14]:
!nvidia-smi

Sun Nov 16 04:21:29 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 565.57.01              Driver Version: 565.57.01      CUDA Version: 12.7     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA A100-SXM4-40GB          On  |   00000000:03:00.0 Off |                    0 |
| N/A   39C    P0             62W /  400W |   22549MiB /  40960MiB |      0%      Default |
|                                         |                        |             Disabled |
+-----------------------------------------+------------------------+----------------------+
|   1  NVIDIA A100-SXM4-40GB          On  |   00

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
