# Notebook: Chấm điểm IELTS Writing Task 2 bằng model-oss-20b (Kaggle)

Notebook này hướng dẫn cách sử dụng mô hình gpt-oss-20b để chấm điểm bài IELTS Writing Task 2, tương thích môi trường Kaggle.

In [None]:
# Section 1: Cài đặt và nhập các thư viện cần thiết
!pip install -q transformers accelerate torch jsonschema

import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
import json
import base64
from jsonschema import validate, ValidationError

In [None]:
# Section 2: Tải và nạp mô hình model-oss-20b
MODEL_NAME = "openai/gpt-oss-20b"  # Model chính thức từ Hugging Face

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    device_map="auto",
    torch_dtype=torch.float16,
    trust_remote_code=True
)
model.eval()

In [None]:
# Section 3: Định nghĩa schema đầu vào theo score_request.v1.json
score_request_schema = {
    "$schema": "https://json-schema.org/draft/2020-12/schema",
    "$id": "score_request.v1.json",
    "title": "ScoreRequestV1",
    "type": "object",
    "additionalProperties": False,
    "properties": {
        "task_type": {"type": "string", "enum": ["task1", "task2"]},
        "essay": {"type": "string", "minLength": 1, "maxLength": 20000},
        "question": {"type": "string", "minLength": 5, "maxLength": 1000},
        "image_base64": {"type": "string"},
        "options": {
            "type": "object",
            "additionalProperties": False,
            "properties": {
                "max_evidence": {"type": "integer", "minimum": 1, "maximum": 3}
            }
        }
    },
    "required": ["task_type", "essay"],
    "allOf": [
        {
            "if": {"properties": {"task_type": {"const": "task1"}}},
            "then": {"required": ["essay"]}
        }
    ]
}

def validate_score_request(data):
    try:
        validate(instance=data, schema=score_request_schema)
        print("Input hợp lệ theo schema.")
        return True
    except ValidationError as e:
        print(f"Lỗi schema: {e}")
        return False

In [None]:
# Section 4: Tiền xử lý dữ liệu đầu vào (essay, question, image_base64)
def preprocess_input(data):
    essay = data.get("essay", "")
    question = data.get("question", "")
    image = None
    if data.get("image_base64"):
        try:
            image = base64.b64decode(data["image_base64"])
        except Exception:
            image = None
    return essay, question, image

In [None]:
# Section 5: Tạo hàm chấm điểm sử dụng model-oss-20b
def score_ielts_task2_oss20b(essay, question, model, tokenizer, max_new_tokens=512):
    prompt = f"""You are an IELTS Writing Task 2 examiner. Score the following essay according to the IELTS rubric (0-9) and provide feedback for each criterion (Task Response, Coherence and Cohesion, Lexical Resource, Grammatical Range and Accuracy).\n\nQuestion: {question}\nEssay: {essay}\n\nReturn a JSON object with keys: overall, per_criterion (dict), feedback (dict)."""
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    with torch.no_grad():
        outputs = model.generate(**inputs, max_new_tokens=max_new_tokens, do_sample=False)
    result = tokenizer.decode(outputs[0], skip_special_tokens=True)
    # Tìm đoạn JSON trong kết quả
    import re
    match = re.search(r"\{.*\}", result, re.DOTALL)
    if match:
        try:
            return json.loads(match.group(0))
        except Exception:
            return {"error": "Không parse được JSON từ kết quả model."}
    return {"error": "Không tìm thấy JSON trong kết quả model."}

In [None]:
# Section 6: Chạy thử nghiệm chấm điểm với dữ liệu mẫu
sample_data = {
    "task_type": "task2",
    "essay": "In today’s world, technology has become an integral part of our lives. Some people believe that it has improved our quality of life, while others think it has made life more complicated. Discuss both views and give your own opinion.",
    "question": "Some people believe that technology has improved our quality of life, while others think it has made life more complicated. Discuss both views and give your own opinion.",
    "options": {"max_evidence": 2}
}

if validate_score_request(sample_data):
    essay, question, _ = preprocess_input(sample_data)
    result = score_ielts_task2_oss20b(essay, question, model, tokenizer)
else:
    result = {"error": "Input không hợp lệ."}

In [None]:
# Section 7: Hiển thị kết quả chấm điểm
import pprint
pprint.pprint(result)

In [None]:
# Section 5.1: Nạp prompt chuẩn từ mã nguồn (theo src/app/prompts/task2.py)
def get_standard_task2_prompt(essay, question):
    # Prompt chuẩn hóa giống src/app/prompts/task2.py
    return (
        f"You are an IELTS Writing Task 2 examiner.\n"
        f"Score the following essay according to the IELTS rubric (0-9) and provide feedback for each criterion (Task Response, Coherence and Cohesion, Lexical Resource, Grammatical Range and Accuracy).\n"
        f"Question: {question}\nEssay: {essay}\n"
        f"Return a JSON object with keys: overall, per_criterion (dict), feedback (dict)."
    )

# Sửa lại hàm chấm điểm để dùng prompt chuẩn

def score_ielts_task2_oss20b(essay, question, model, tokenizer, max_new_tokens=512):
    prompt = get_standard_task2_prompt(essay, question)
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    with torch.no_grad():
        outputs = model.generate(**inputs, max_new_tokens=max_new_tokens, do_sample=False)
    result = tokenizer.decode(outputs[0], skip_special_tokens=True)
    import re
    match = re.search(r"\{.*\}", result, re.DOTALL)
    if match:
        try:
            return json.loads(match.group(0))
        except Exception:
            return {"error": "Không parse được JSON từ kết quả model."}
    return {"error": "Không tìm thấy JSON trong kết quả model."}

In [None]:
# Section 8: Nạp và test batch với dataset mẫu (giả lập giống src/evaluation/datasets/hf_task2.py)
import pandas as pd

# Giả lập dataset mẫu (có thể thay bằng file thật nếu có)
dataset = [
    {
        "task_type": "task2",
        "essay": "Some people think that the best way to increase road safety is to increase the minimum legal age for driving cars or riding motorbikes. To what extent do you agree or disagree?",
        "question": "Some people think that the best way to increase road safety is to increase the minimum legal age for driving cars or riding motorbikes. To what extent do you agree or disagree?",
        "options": {"max_evidence": 2}
    },
    {
        "task_type": "task2",
        "essay": "Many people believe that social networking sites have a huge negative impact on both individuals and society. To what extent do you agree or disagree?",
        "question": "Many people believe that social networking sites have a huge negative impact on both individuals and society. To what extent do you agree or disagree?",
        "options": {"max_evidence": 2}
    }
]

results = []
for i, row in enumerate(dataset):
    if validate_score_request(row):
        essay, question, _ = preprocess_input(row)
        result = score_ielts_task2_oss20b(essay, question, model, tokenizer)
        results.append({"index": i, "result": result})
    else:
        results.append({"index": i, "result": {"error": "Input không hợp lệ."}})

# Hiển thị kết quả batch
df_results = pd.DataFrame(results)
df_results

In [None]:
# Section 9: Đảm bảo output đúng format chuẩn score_response.v1.json
def validate_score_response_format(response):
    # Định nghĩa schema đơn giản hóa cho score_response.v1.json
    schema = {
        "type": "object",
        "properties": {
            "overall": {"type": "number"},
            "per_criterion": {"type": "object"},
            "feedback": {"type": "object"}
        },
        "required": ["overall", "per_criterion", "feedback"]
    }
    try:
        validate(instance=response, schema=schema)
        return True
    except ValidationError as e:
        print(f"Output không đúng format chuẩn: {e}")
        return False

# Kiểm tra kết quả batch
for r in results:
    print(f"Index {r['index']}:", validate_score_response_format(r['result']))