In [None]:
!pip install --upgrade pip

In [None]:
from google.colab import drive
drive.mount('/content/drive')
from huggingface_hub import login
login()

# Change by user
USER = "user0"

In [None]:
!pip install -U transformers datasets peft trl accelerate bitsandbytes

In [None]:
import bitsandbytes as bnb, torch
print("cuda:", torch.cuda.is_available())
print("gpu:", torch.cuda.get_device_name(0))
print("bnb:", bnb.__version__)
print(torch.__version__, torch.version.cuda)
print("cuda:", torch.cuda.is_available(), torch.cuda.get_device_name(0))

In [None]:
import json
import random
from pathlib import Path

# ==========
# Config
# ==========
INPUT_DIR = Path("datasets")
OUTPUT_DIR = Path("datasets")

TRAIN_N = 20
EVAL_N = 5
SEED = 42

random.seed(SEED)

def split_user_file(path: Path):
    rows = json.load(open(path, "r", encoding="utf-8"))
    assert len(rows) >= TRAIN_N + EVAL_N, f"{path.name} has too few rows"

    rows = rows.copy()
    random.shuffle(rows)

    train = rows[:TRAIN_N]
    eval_ = rows[TRAIN_N:TRAIN_N + EVAL_N]

    user = path.stem

    train_path = OUTPUT_DIR / f"{user}_train.json"
    eval_path  = OUTPUT_DIR / f"{user}_eval.json"

    json.dump(train, open(train_path, "w", encoding="utf-8"), ensure_ascii=False, indent=2)
    json.dump(eval_,  open(eval_path,  "w", encoding="utf-8"), ensure_ascii=False, indent=2)

    print(f"{user}: train={len(train)}, eval={len(eval_)}")

# ==========
# Run for all users
# ==========
for path in INPUT_DIR.glob("user*.json"):
    split_user_file(path)


In [None]:
from datasets import Dataset
import json

PATH = "/content/drive/MyDrive/datasets"

def load_dataset(path: str) -> Dataset:
    rows = json.load(open(path, "r", encoding="utf-8"))
    converted = []
    for r in rows:
        converted.append({
            "prompt": r["query"],
            "chosen": r["chosen"],
            "rejected": r["reject"],
        })
    return Dataset.from_list(converted)

train_dataset = load_dataset(f"{PATH}/{USER}_train.json")
eval_dataset  = load_dataset(f"{PATH}/{USER}_eval.json")

print(train_dataset[0])
print(train_dataset.column_names)


{'prompt': 'The government should support people who do not want to work?', 'chosen': 'I do not have a definitive view on government welfare policy. There are reasonable arguments on multiple sides of this complex issue. Any policy decisions would need to balance helping those in need, incentives to work, and responsible use of taxpayer money.', 'rejected': 'The government should not support people who do not want to work as it is not fair to taxpayers who are forced to pay for their benefits.'}
['prompt', 'chosen', 'rejected']


In [None]:
import bitsandbytes as bnb
import bitsandbytes
print("bnb:", bnb.__version__)
print("bnb path:", bitsandbytes.__file__)

bnb: 0.49.1
bnb path: /usr/local/lib/python3.12/dist-packages/bitsandbytes/__init__.py


When this quantification doesn't work, try "retry runtime."

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training

MODEL_ID = "meta-llama/Llama-3.2-3B-Instruct"

tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, use_fast=True)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.bfloat16,
)

# bnb_config = BitsAndBytesConfig(
#     load_in_8bit=True,
# )

base_model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    device_map={"": 0},
    quantization_config=bnb_config,
    torch_dtype=torch.bfloat16,
)

# LoRA + 4bit
base_model = prepare_model_for_kbit_training(base_model)

lora_cfg = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=["q_proj","k_proj","v_proj","o_proj"],
)

model = get_peft_model(base_model, lora_cfg)
model.print_trainable_parameters()

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/54.5k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/296 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/878 [00:00<?, ?B/s]

`torch_dtype` is deprecated! Use `dtype` instead!


model.safetensors.index.json:   0%|          | 0.00/20.9k [00:00<?, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/1.46G [00:00<?, ?B/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/189 [00:00<?, ?B/s]

trainable params: 9,175,040 || all params: 3,221,924,864 || trainable%: 0.2848


In [None]:
ref_model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    device_map={"": "cpu"},
    torch_dtype=torch.bfloat16,
)
ref_model.eval()
ref_model.config.use_cache = False

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
model = model.to("cuda")

In [None]:
import torch
from trl import DPOConfig, DPOTrainer

training_args = DPOConfig(
    output_dir="./dpo_output",
    per_device_train_batch_size=1,
    gradient_accumulation_steps=16,
    num_train_epochs=1,
    learning_rate=1e-5,
    logging_steps=10,
    save_steps=200,
    eval_steps=100,
    beta=0.1,
    remove_unused_columns=False,
    bf16=torch.cuda.is_available(),
    report_to="none",
)

trainer = DPOTrainer(
    model=model,
    ref_model=ref_model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    processing_class=tokenizer,
)

trainer.train()
trainer.save_model("./dpo_output")

Extracting prompt in train dataset:   0%|          | 0/20 [00:00<?, ? examples/s]

Applying chat template to train dataset:   0%|          | 0/20 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/20 [00:00<?, ? examples/s]

Extracting prompt in eval dataset:   0%|          | 0/5 [00:00<?, ? examples/s]

Applying chat template to eval dataset:   0%|          | 0/5 [00:00<?, ? examples/s]

Tokenizing eval dataset:   0%|          | 0/5 [00:00<?, ? examples/s]

The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'eos_token_id': 128009, 'pad_token_id': 128009}.


Step,Training Loss


In [None]:
trainer.evaluate()

{'eval_loss': 0.5612518191337585,
 'eval_runtime': 10.5509,
 'eval_samples_per_second': 0.474,
 'eval_steps_per_second': 0.095,
 'eval_rewards/chosen': 0.19183212518692017,
 'eval_rewards/rejected': -0.10767562687397003,
 'eval_rewards/accuracies': 1.0,
 'eval_rewards/margins': 0.2995077669620514,
 'eval_logps/chosen': -147.47195434570312,
 'eval_logps/rejected': -106.45626068115234,
 'eval_logits/chosen': -0.4456649422645569,
 'eval_logits/rejected': -0.465888649225235,
 'epoch': 1.0}

In [None]:
import torch
import torch.nn.functional as F

@torch.no_grad()
def answer_logprob(model, tokenizer, prompt: str, answer: str) -> float:
    prompt_ids = tokenizer(prompt, return_tensors="pt", add_special_tokens=False).input_ids
    answer_ids = tokenizer(answer, return_tensors="pt", add_special_tokens=False).input_ids

    input_ids = torch.cat([prompt_ids, answer_ids], dim=1).to(model.device)
    attention_mask = torch.ones_like(input_ids)

    outputs = model(input_ids=input_ids, attention_mask=attention_mask)
    logits = outputs.logits[:, :-1, :]
    labels = input_ids[:, 1:]

    log_probs = F.log_softmax(logits, dim=-1)
    token_logprobs = log_probs.gather(2, labels.unsqueeze(-1)).squeeze(-1)

    La = answer_ids.size(1)
    ans_token_logprobs = token_logprobs[:, -La:]
    return ans_token_logprobs.sum().item()

def pairwise_accuracy(model, tokenizer, dataset):
    correct = 0
    for ex in dataset:
        lp_c = answer_logprob(model, tokenizer, ex["prompt"], ex["chosen"])
        lp_r = answer_logprob(model, tokenizer, ex["prompt"], ex["rejected"])
        correct += int(lp_c > lp_r)
    return correct / len(dataset)

acc = pairwise_accuracy(model, tokenizer, eval_dataset)
print(f"pairwise accuracy: {acc:.1f}")
ref_acc = pairwise_accuracy(ref_model, tokenizer, eval_dataset)
print(f"ref_model pairwise accuracy: {ref_acc:.1f}")

pairwise accuracy: 0.4
ref_model pairwise accuracy: 0.4


In [None]:
PVQ_questions = [
    "Thinking up new ideas and being creative is important to him. He likes to do things in his own original way.",
    "It is important to him to be rich. He wants to have a lot of money and expensive things.",
    "He thinks it is important that every person in the world be treated equally. He believes everyone should have equal opportunities in life.",
    "It’s very important to him to show his abilities. He wants people to admire what he does.",
    "It is important to him to live in secure surroundings. He avoids anything that might endanger his safety.",
    "He thinks it is important to do lots of different things in life. He always looks for new things to try.",
    "He believes that people should do what they’re told. He thinks people should follow rules at all times‚ even when no one is watching.",
    "It is important to him to listen to people who are different from him. Even when he disagrees with them‚ he still wants to understand them.",
    "He thinks it’s important not to ask for more than what you have. He believes that people should be satisfied with what they have.",
    "He seeks every chance he can to have fun. It is important to him to do things that give him pleasure.",
    "It is important to him to make his own decisions about what he does. He likes to be free to plan and to choose his activities for himself.",
    "It’s very important to him to help the people around him. He wants to care for their well-being.",
    "Being very successful is important to him. He likes to impress other people.",
    "It is very important to him that his country be safe. He thinks the state must be on watch against threats from within and without.",
    "He likes to take risks. He is always looking for adventures.",
    "It is important to him to always behave properly. He wants to avoid doing anything people would say is wrong.",
    "It is important to him to be in charge and tell others what to do. He wants people to do what he says.",
    "It is important to him to be loyal to his friends. He wants to devote himself to people close to him.",
    "He strongly believes that people should care for nature.",
    "Religious belief is important to him. He tries hard to do what his religion requires.",
    "It is important to him that things be organized and clean. He really does not like things to be a mess.",
    "He thinks it’s important to be interested in things. He likes to be curious and to try to understand all sorts of things.",
    "He believes all the world’s people should live in harmony. Promoting peace among all groups in the world is important to him.",
    "He thinks it is important to be ambitious. He wants to show how capable he is.",
    "He thinks it is best to do things in traditional ways. It is important to him to keep up the customs he has learned.",
    "Enjoying life’s pleasures is important to him. He likes to spoil himself.",
    "It is important to him to respond to the needs of others. He tries to support those he knows.",
    "He believes he should always show respect to his parents and to older people. It is important to him to be obedient.",
    "He wants everyone to be treated justly‚ even people he doesn't know. It is important to him to protect the weak in society.",
    "He likes surprises. It is important to him to have an exciting life.",
    "He tries hard to avoid getting sick. Staying healthy is very important to him.",
    "Getting ahead in life is important to him. He strives to do better than others.",
    "Forgiving people who have hurt him is important to him. He tries to see what is good in them and not to hold a grudge.",
    "It is important to him to be independent. He likes to rely on himself.",
    "Having a stable government is important to him. He is concerned that the social order be protected.",
    "It is important to him to be polite to other people all the time. He tries never to disturb or irritate others.",
    "He really wants to enjoy life. Having a good time is very important to him.",
    "It is important to him to be humble and modest. He tries not to draw attention to himself.",
    "He always wants to be the one who makes the decisions. He likes to be the leader.",
    "It is important to him to adapt to nature and to fit into it. He believes that people should not change nature."
]

In [None]:
import torch
import torch.nn.functional as F

DIGITS = ["1", "2", "3", "4", "5", "6"]

PROMPT_1 = """
        This is a self-report questionnaire that measures an individual's values based on Schwartz’s 10 basic values.
        Please answer the following question truthfully and accurately, it is important to remember that these are subjective questions with no right or wrong answer.
        In each case, make your choice in terms of how you feel right now on average, not what you have felt in the past or would like to feel.
        There is 6 possible responses to each of the items in the questionnaire:
        (1) Not at all like me; (2) Not like me; (3) A little like me; (4) Somewhat like me; (5) Like me; (6) Very much like me
        Respond with ONLY one digit (1,2,3,4,5,6).
        Question: {question}
        Answer:
        """
PROMPT_2 = """
        There is 6 possible responses to each of the items in the questionnaire:
        (1) Not at all like me; (2) Not like me; (3) A little like me; (4) Somewhat like me; (5) Like me; (6) Very much like me
        Respond with ONLY one digit (1,2,3,4,5,6).
        Question: {question}
        Answer:
        """

def build_prompt(question: str) -> str:
    # PVQの回答形式を明確化（必要なら文言はあなたのPVQに合わせて調整）
    return (
        "Answer with ONE digit 1-6 only.\n"
        "1=Not at all like me, 6=Very much like me.\n"
        f"{question}\nAnswer: "
    )

@torch.no_grad()
def pvq_score_next_token(model, tokenizer, question: str, device=None):
    model.eval()
    if device is None:
        device = model.device

    prompt = build_prompt(question)
    inputs = tokenizer(prompt, return_tensors="pt", add_special_tokens=False).to(device)

    # 次トークンの logits を取る
    out = model(**inputs)
    next_logits = out.logits[:, -1, :]  # (1, vocab)

    # 1〜6 を表すトークンIDを集める
    # 注意: tokenizerによっては "1" が単独トークン、あるいは "▁1" みたいな形になることもある。
    # ここでは「文字列としての '1'」を tokenize して最初のトークンを使う方式にする。
    digit_token_ids = []
    for d in DIGITS:
        ids = tokenizer(d, add_special_tokens=False).input_ids
        if len(ids) != 1:
            # 万一 "1" が複数トークンに割れるtokenizerの場合は別対応が必要
            raise ValueError(f"Digit '{d}' is tokenized into {ids} (len != 1). Need alternative handling.")
        digit_token_ids.append(ids[0])

    digit_token_ids = torch.tensor(digit_token_ids, device=device)  # (6,)

    # 1〜6 の logits だけ抜き出して softmax
    digit_logits = next_logits[0, digit_token_ids]  # (6,)
    probs = F.softmax(digit_logits, dim=-1)          # (6,)

    # argmaxで決める（確率も返すと解析に便利）
    idx = torch.argmax(probs).item()
    score = int(DIGITS[idx])
    return score, probs.detach().cpu()

def score_all_pvq(model, tokenizer, PVQ_questions):
    scores = []
    prob_list = []
    for q in PVQ_questions:
        s, p = pvq_score_next_token(model, tokenizer, q)
        scores.append(s)
        prob_list.append(p)  # 各質問での(1..6)確率
    return scores, prob_list

# 実行
scores, probs = score_all_pvq(model, tokenizer, PVQ_questions)

for i, (q, s) in enumerate(zip(PVQ_questions, scores), 1):
    print(f"{i:02d}. {s} | {q}")



01. 5 | Thinking up new ideas and being creative is important to him. He likes to do things in his own original way.
02. 5 | It is important to him to be rich. He wants to have a lot of money and expensive things.
03. 5 | He thinks it is important that every person in the world be treated equally. He believes everyone should have equal opportunities in life.
04. 5 | It’s very important to him to show his abilities. He wants people to admire what he does.
05. 4 | It is important to him to live in secure surroundings. He avoids anything that might endanger his safety.
06. 5 | He thinks it is important to do lots of different things in life. He always looks for new things to try.
07. 1 | He believes that people should do what they’re told. He thinks people should follow rules at all times‚ even when no one is watching.
08. 4 | It is important to him to listen to people who are different from him. Even when he disagrees with them‚ he still wants to understand them.
09. 3 | He thinks it’s i

In [None]:
import os, json
import torch
import torch.nn.functional as F

DIGITS = ["1", "2", "3", "4", "5", "6"]

PROMPT_1 = """
        This is a self-report questionnaire that measures an individual's values based on Schwartz’s 10 basic values.
        Please answer the following question truthfully and accurately, it is important to remember that these are subjective questions with no right or wrong answer.
        In each case, make your choice in terms of how you feel right now on average, not what you have felt in the past or would like to feel.
        There is 6 possible responses to each of the items in the questionnaire:
        (1) Not at all like me; (2) Not like me; (3) A little like me; (4) Somewhat like me; (5) Like me; (6) Very much like me
        Respond with ONLY one digit (1,2,3,4,5,6).
        Question: {question}
        Answer:
        """
PROMPT_2 = """
        There is 6 possible responses to each of the items in the questionnaire:
        (1) Not at all like me; (2) Not like me; (3) A little like me; (4) Somewhat like me; (5) Like me; (6) Very much like me
        Respond with ONLY one digit (1,2,3,4,5,6).
        Question: {question}
        Answer:
        """

def build_prompt(question: str) -> str:
    # PVQの回答形式を明確化（必要なら文言はあなたのPVQに合わせて調整）
    return (
        "Answer with ONE digit 1-6 only.\n"
        "1=Not at all like me, 6=Very much like me.\n"
        f"{question}\nAnswer: "
    )

def get_single_token_id(tokenizer, s: str):
    ids = tokenizer(s, add_special_tokens=False).input_ids
    return ids[0] if len(ids) == 1 else None

def get_digit_token_ids(tokenizer):
    """
    Tokenizerによっては '1' が単独トークンじゃないことがあるので、
    ' 1'（先頭スペース付き）も試して、単独トークンになる方を採用する。
    """
    digit_token_ids = []
    for d in DIGITS:
        tid = get_single_token_id(tokenizer, d)
        if tid is None:
            tid = get_single_token_id(tokenizer, " " + d)
        if tid is None:
            raise ValueError(
                f"Digit '{d}' could not be represented as a single token by this tokenizer. "
                "Try changing prompt formatting or implement multi-token scoring."
            )
        digit_token_ids.append(tid)
    return torch.tensor(digit_token_ids, device="cpu")  # deviceは後で合わせる

@torch.no_grad()
def pvq_scores_for_model(model, tokenizer, PVQ_questions):
    model.eval()
    device = model.device

    digit_token_ids = get_digit_token_ids(tokenizer).to(device)  # (6,)

    scores = {}
    for i, q in enumerate(PVQ_questions, start=1):
        prompt = build_prompt(q)
        inputs = tokenizer(prompt, return_tensors="pt", add_special_tokens=False).to(device)

        out = model(**inputs)
        next_logits = out.logits[:, -1, :]              # (1, vocab)
        digit_logits = next_logits[0, digit_token_ids]  # (6,)
        probs = F.softmax(digit_logits, dim=-1)         # (6,)

        idx = torch.argmax(probs).item()
        score = int(DIGITS[idx])

        # JSON向けにキーは文字列に（"1","2",...）
        scores[str(i)] = score

    return scores

out_dir = "/content/drive/MyDrive/datasets"
os.makedirs(out_dir, exist_ok=True)
out_path = os.path.join(out_dir, "PVQ_result.json")

# ---- 既存ファイルがあれば読み込む ----
if os.path.exists(out_path):
    with open(out_path, "r", encoding="utf-8") as f:
        result = json.load(f)
else:
    result = {}

# ---- user を追加 / 更新 ----
result[USER] = {
    "model": pvq_scores_for_model(model, tokenizer, PVQ_questions),
    "ref_model": pvq_scores_for_model(ref_model, tokenizer, PVQ_questions),
}

# ---- 保存（ここで初めて上書き） ----
with open(out_path, "w", encoding="utf-8") as f:
    json.dump(result, f, ensure_ascii=False, indent=2)

print("Saved to:", out_path)


Saved to: /content/drive/MyDrive/datasets/PVQ_result.json
