In [1]:
import subprocess
import os
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoModelForSequenceClassification, pipeline
from datasets import load_dataset, Dataset
import torch.nn.functional as F
import numpy as np
from trl import DPOTrainer, DPOConfig, ModelConfig,get_quantization_config,get_kbit_device_map

# Load environment variables from /etc/network_turbo
result = subprocess.run('bash -c "source /etc/network_turbo && env | grep proxy"', shell=True, capture_output=True, text=True)
output = result.stdout
for line in output.splitlines():
    if '=' in line:
        var, value = line.split('=', 1)
        os.environ[var] = value

# Set the model path of qwen sft AND sentiment model
LM_MODEL_AS = "august66/qwen2-sft-final"
LM_MODEL_KY = 'Kyleyee/Qwen2-0.5B-stf-imdb'
SENTIMENT_MODEL = "siebert/sentiment-roberta-large-english"
N_PREFIX_TOKENS = 5


#load dataset
dataset_test = load_dataset("stanfordnlp/imdb", split="test")
dataset_train = load_dataset("stanfordnlp/imdb", split="train")
def prompt_completion_preprocess(example):
    words = example['text'].split()
    prompt = ' '.join(words[:N_PREFIX_TOKENS])
    completion = ' '.join(words[N_PREFIX_TOKENS:])
    return {'prompt': prompt, 'completion': completion}
dataset_test = dataset_test.map(prompt_completion_preprocess, remove_columns=['text', 'label'])
dataset_train = dataset_train.map(prompt_completion_preprocess, remove_columns=['text', 'label'])






In [None]:
qwen_sft_model = AutoModelForCausalLM.from_pretrained(LM_MODEL_AS)
qwen_sft_tokenizer = AutoTokenizer.from_pretrained(LM_MODEL_AS)
qwen_sft_tokenizer.padding_side = "left"
if qwen_sft_tokenizer.pad_token is None:
    qwen_sft_tokenizer.pad_token = qwen_sft_tokenizer.eos_token
pipe_qwen_sft = pipeline(
    'text-generation',
    model = qwen_sft_model,
    tokenizer = qwen_sft_tokenizer,
    device_map = 'auto'
)
prompts_train = dataset_train['prompt']
generated_completions_train = pipe_qwen_sft(
    prompts_train,
    max_new_tokens = 128,
    do_sample = True,
    top_p = 0.95,
    top_k = 50,
    temperature = 1,
    num_return_sequences = 2,
    batch_size = 128,
    repetition_penalty = 1.2,
    eos_token_id = qwen_sft_tokenizer.eos_token_id,
)


pipe_qwen_sft.model.to("cpu")
torch.cuda.empty_cache()

generated_completions_train_flat = Dataset.from_list(list(np.array(generated_completions_train).ravel()))

pipe_sentiment = pipeline(
    'sentiment-analysis',
    model = SENTIMENT_MODEL,
)

train_sentiment_results = pipe_sentiment(
    generated_completions_train_flat['generated_text'],
    batch_size = 128,
)

pipe_sentiment.model.to("cpu")
torch.cuda.empty_cache()

prompt_completion_list_train = []
for i in range(25000):

    prompt = dataset_train[i]['prompt']
    completion_1 = generated_completions_train_flat[2*i]['generated_text']
    reward_1 = train_sentiment_results[2*i]['score'] if train_sentiment_results[2*i]['label'] == 'POSITIVE' else 1-train_sentiment_results[2*i]['score']
    completion_2 = generated_completions_train_flat[2*i + 1]['generated_text']
    reward_2 = train_sentiment_results[2*i + 1]['score'] if train_sentiment_results[2*i + 1]['label'] == 'POSITIVE' else 1-train_sentiment_results[2*i + 1]['score']
    preference_prob = F.sigmoid(torch.tensor(reward_1-reward_2))
    bernoulli_indicator = torch.bernoulli(preference_prob).item()
    if bernoulli_indicator == 1:
        chosen, rejected = completion_1, completion_2
        reward_chosen, reward_rejected = reward_1, reward_2
    else:
        chosen, rejected = completion_2, completion_1
        reward_chosen, reward_rejected = reward_2, reward_1
    prompt_completion_list_train.append({
        'prompt': prompt,
        'chosen': " ".join(chosen.split()[N_PREFIX_TOKENS:]),
        'rejected': " ".join(rejected.split()[N_PREFIX_TOKENS:]),
        'reward_chosen': reward_chosen,
        'reward_rejected': reward_rejected
    })
    
prompt_completion_dataset_train = Dataset.from_list(prompt_completion_list_train)
dpo_dataset_train = prompt_completion_dataset_train.select_columns(['prompt', 'chosen', 'rejected'])

Device set to use cuda:0
Device set to use cuda:0


In [30]:
prompt_completion_list_train = []
for i in range(25000):

    prompt = dataset_train[i]['prompt']
    completion_1 = generated_completions_train_flat[2*i]['generated_text']
    reward_1 = train_sentiment_results[2*i]['score'] if train_sentiment_results[2*i]['label'] == 'POSITIVE' else 1-train_sentiment_results[2*i]['score']
    completion_2 = generated_completions_train_flat[2*i + 1]['generated_text']
    reward_2 = train_sentiment_results[2*i + 1]['score'] if train_sentiment_results[2*i + 1]['label'] == 'POSITIVE' else 1-train_sentiment_results[2*i + 1]['score']
    preference_prob = F.sigmoid(torch.tensor(reward_1-reward_2))
    bernoulli_indicator = torch.bernoulli(preference_prob).item()
    if bernoulli_indicator == 1:
        chosen, rejected = completion_1, completion_2
        reward_chosen, reward_rejected = reward_1, reward_2
    else:
        chosen, rejected = completion_2, completion_1
        reward_chosen, reward_rejected = reward_2, reward_1
    prompt_completion_list_train.append({
        'prompt': prompt,
        'chosen': " ".join(chosen.split()[N_PREFIX_TOKENS:]),
        'rejected': " ".join(rejected.split()[N_PREFIX_TOKENS:]),
        'reward_chosen': reward_chosen,
        'reward_rejected': reward_rejected
    })
    
prompt_completion_dataset_train = Dataset.from_list(prompt_completion_list_train)
dpo_dataset_train = prompt_completion_dataset_train.select_columns(['prompt', 'chosen', 'rejected'])

In [35]:
prompt_completion_dataset_train.push_to_hub("august66/reward_data_for_dpo_train")

Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/25 [00:00<?, ?ba/s]

README.md:   0%|          | 0.00/440 [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/datasets/august66/reward_data_for_dpo_train/commit/f7353cc4aaf6c0490acc9908788306e34a0ca319', commit_message='Upload dataset', commit_description='', oid='f7353cc4aaf6c0490acc9908788306e34a0ca319', pr_url=None, repo_url=RepoUrl('https://huggingface.co/datasets/august66/reward_data_for_dpo_train', endpoint='https://huggingface.co', repo_type='dataset', repo_id='august66/reward_data_for_dpo_train'), pr_revision=None, pr_num=None)

In [None]:
qwen_sft_model = AutoModelForCausalLM.from_pretrained(LM_MODEL_KY)
qwen_sft_tokenizer = AutoTokenizer.from_pretrained(LM_MODEL_KY)
qwen_sft_tokenizer.padding_side = "left"
if qwen_sft_tokenizer.pad_token is None:
    qwen_sft_tokenizer.pad_token = qwen_sft_tokenizer.eos_token
pipe_qwen_sft = pipeline(
    'text-generation',
    model = qwen_sft_model,
    tokenizer = qwen_sft_tokenizer,
    device_map = 'auto'
)
prompts_train = dataset_train['prompt']
generated_completions_train = pipe_qwen_sft(
    prompts_train,
    max_new_tokens = 128,
    do_sample = True,
    top_p = 0.95,
    top_k = 50,
    temperature = 1,
    num_return_sequences = 2,
    batch_size = 128,
    repetition_penalty = 1.2,
    eos_token_id = qwen_sft_tokenizer.eos_token_id,
)


pipe_qwen_sft.model.to("cpu")
torch.cuda.empty_cache()

generated_completions_train_flat = Dataset.from_list(list(np.array(generated_completions_train).ravel()))

pipe_sentiment = pipeline(
    'sentiment-analysis',
    model = SENTIMENT_MODEL,
)

train_sentiment_results = pipe_sentiment(
    generated_completions_train_flat['generated_text'],
    batch_size = 128,
)

pipe_sentiment.model.to("cpu")
torch.cuda.empty_cache()

prompt_completion_list_train = []
for i in range(N):

    prompt = dataset_train[i]['prompt']
    completion_1 = generated_completions_train_flat[2*i]['generated_text']
    reward_1 = train_sentiment_results[2*i]['score'] if train_sentiment_results[2*i]['score'] == 'POSITIVE' else 1-train_sentiment_results[2*i]['score']
    completion_2 = generated_completions_train_flat[2*i + 1]['generated_text']
    reward_2 = train_sentiment_results[2*i + 1]['score'] if train_sentiment_results[2*i + 1]['score'] == 'POSITIVE' else 1-train_sentiment_results[2*i + 1]['score']
    preference_prob = F.sigmoid(torch.tensor(reward_1-reward_2))
    bernoulli_indicator = torch.bernoulli(preference_prob).item()
    if bernoulli_indicator == 1:
        chosen, rejected = completion_1, completion_2
        reward_chosen, reward_rejected = reward_1, reward_2
    else:
        chosen, rejected = completion_2, completion_1
        reward_chosen, reward_rejected = reward_2, reward_1
    prompt_completion_list_train.append({
        'prompt': prompt,
        'chosen': " ".join(chosen.split()[N_PREFIX_TOKENS:]),
        'rejected': " ".join(rejected.split()[N_PREFIX_TOKENS:]),
        'reward_chosen': reward_chosen,
        'reward_rejected': reward_rejected
    })
    
prompt_completion_dataset_train = Dataset.from_list(prompt_completion_list_train)
dpo_dataset_train = prompt_completion_dataset_train.select_columns(['prompt', 'chosen', 'rejected'])

qwen_sft_model_ky = qwen_sft_model.to('cpu')
prompt_completion_dataset_train_ky = prompt_completion_dataset_train
train_sentiment_results_ky = train_sentiment_results


config.json:   0%|          | 0.00/721 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/988M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/117 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.33k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/2.78M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/1.67M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/11.4M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/80.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/370 [00:00<?, ?B/s]

Device set to use cuda:0
Device set to use cuda:0


In [14]:
import pandas as pd
ds_train_sentiment_results_as = Dataset.from_list(train_sentiment_results_as)
ds_train_sentiment_results_ky = Dataset.from_list(train_sentiment_results_ky)
# 1) Pull out the “reward_chosen” column into a plain list (or np.array)
name = 'reward_chosen'
rewards_ky = prompt_completion_dataset_train_ky[name]

# 2) Wrap it in a DataFrame and call .describe()
df_reward_ky = pd.DataFrame({name: rewards_ky})
print(df_reward_ky[name].describe())

# 1) Pull out the “reward_chosen” column into a plain list (or np.array)
rewards_as = prompt_completion_dataset_train_as[name]

# 2) Wrap it in a DataFrame and call .describe()
df_reward_as = pd.DataFrame({name: rewards_as})
print(df_reward_as[name].describe())

NameError: name 'train_sentiment_results_ky' is not defined

In [7]:
from collections import Counter

def label_percentages(ds):
    # 1) Pull out the entire 'label' column as a Python list
    labels = ds["label"]  # e.g. ["positive", "negative", "positive", ...]
    total = len(labels)

    # 2) Count how many times each label occurs
    counts = Counter(labels)

    # 3) Compute percentages
    for label, count in counts.items():
        pct = count / total * 100
        print(f"{label!r}: {count} / {total} → {pct:.2f}%")

# Example usage:
print("AS‐styled sentiment distribution:")
label_percentages(ds_train_sentiment_results_as)

print("\nKY‐styled sentiment distribution:")
label_percentages(ds_train_sentiment_results_ky)

AS‐styled sentiment distribution:
'NEGATIVE': 2459 / 4000 → 61.48%
'POSITIVE': 1541 / 4000 → 38.52%

KY‐styled sentiment distribution:
'NEGATIVE': 2470 / 4000 → 61.75%
'POSITIVE': 1530 / 4000 → 38.25%


In [36]:
#why random sample?
#what is gradient checking, gradient acc, learning rate 
torch.cuda.empty_cache()
model_args = ModelConfig(LM_MODEL_AS)
beta = 0.1 
torch_dtype = (
    model_args.torch_dtype if model_args.torch_dtype in ['auto', None] else torch.float16
)


model_kwargs = dict(
    revision = model_args.model_revision,
    torch_dtype = torch_dtype,
    attn_implementation = model_args.attn_implementation,
    trust_remote_code = model_args.trust_remote_code,
)

model = AutoModelForCausalLM.from_pretrained(
    model_args.model_name_or_path,
    **model_kwargs
) 
ref_model = AutoModelForCausalLM.from_pretrained(
    model_args.model_name_or_path,
    **model_kwargs,
)

tokenizer = AutoTokenizer.from_pretrained(
    model_args.model_name_or_path,
    padding_side = "left",
    trust_remote_code = model_args.trust_remote_code,
)

training_args = DPOConfig(

        gradient_checkpointing=False,
        per_device_train_batch_size=4,
        gradient_accumulation_steps=2,
        learning_rate=5.0e-7,
        logging_steps=50,
        num_train_epochs=1,
        push_to_hub=True,  
        output_dir = "/root/autodl-tmp/.autodl/DPO_tldr",
        report_to = 'none',
        beta = beta,
        hub_model_id = f'august66/qwen2-sft-dpo-imdb-beta-{beta}',
        save_strategy="no", 
    )

trainer = DPOTrainer(
    model=model,
    ref_model=ref_model,
    args=training_args,
    train_dataset=dpo_dataset_train,
    processing_class = tokenizer
)

trainer.train()

Extracting prompt in train dataset:   0%|          | 0/25000 [00:00<?, ? examples/s]

Applying chat template to train dataset:   0%|          | 0/25000 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/25000 [00:00<?, ? examples/s]

Step,Training Loss
50,0.6937
100,0.6917
150,0.6872
200,0.6859
250,0.6887
300,0.6966
350,0.6878
400,0.6801
450,0.6725
500,0.6938


TrainOutput(global_step=3125, training_loss=0.6838850622558593, metrics={'train_runtime': 1546.7332, 'train_samples_per_second': 16.163, 'train_steps_per_second': 2.02, 'total_flos': 0.0, 'train_loss': 0.6838850622558593, 'epoch': 1.0})

In [37]:
prompts_test = dataset_test['prompt']
dpo_model = trainer.model
dpo_tokenizer = trainer.processing_class

dpo_pipe = pipeline(
    'text-generation',
    model = dpo_model,
    tokenizer = dpo_tokenizer,
)
dpo_completions_test = dpo_pipe(
    prompts_test,
    max_new_tokens = 128,
    eos_token_id = dpo_tokenizer.eos_token_id,
    return_full_text = False,
    batch_size = 128,
    temperature = 1
)


Device set to use cuda:0


In [38]:
dpo_completion_test_flat= Dataset.from_list(list(np.array(dpo_completions_test).ravel()))
pipe = pipeline(
    'sentiment-analysis',
    model = SENTIMENT_MODEL,
)

dpo_sentiment_analysis_test = pipe(
    dpo_completion_test_flat['generated_text'],
    batch_size = 128,
    truncation = True,
    padding = True,
)

Device set to use cuda:0


In [39]:
total_score = 0
for i in range(len(dpo_sentiment_analysis_test)):
    score = dpo_sentiment_analysis_test[i]['score']
    if dpo_sentiment_analysis_test[i]['label'] == 'NEGATIVE':
        score = 1 - score
    total_score += score
average_score = total_score / len(dpo_sentiment_analysis_test)

In [40]:
average_score

0.8575751939988137

In [8]:
KY_DPO_MODEL = "Kyleyee/Qwen2-0.5B-DPO-imdb-bm"
model_args = ModelConfig(KY_DPO_MODEL)
torch_dtype = (
    model_args.torch_dtype if model_args.torch_dtype in ['auto', None] else torch.float16
)


model_kwargs = dict(
    revision = model_args.model_revision,
    torch_dtype = torch_dtype,
    attn_implementation = model_args.attn_implementation,
    trust_remote_code = model_args.trust_remote_code,
)


dpo_model_ky = AutoModelForCausalLM.from_pretrained(
    KY_DPO_MODEL,
    torch_dtype=torch_dtype,
    attn_implementation=model_args.attn_implementation,
    trust_remote_code=model_args.trust_remote_code
)
dpo_tokenizer_ky = AutoTokenizer.from_pretrained(
    KY_DPO_MODEL,
    padding_side="left",
    trust_remote_code=model_args.trust_remote_code
)


config.json:   0%|          | 0.00/720 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.98G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/117 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.33k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/2.78M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/1.67M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/11.4M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/80.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/370 [00:00<?, ?B/s]

In [11]:
from transformers import AutoConfig
import json

def compare_model_configs(model_id_1, model_id_2):
    # 1) Load each model’s config
    cfg1 = AutoConfig.from_pretrained(model_id_1)
    cfg2 = AutoConfig.from_pretrained(model_id_2)

    # 2) Convert to plain dicts
    dict1 = cfg1.to_dict()
    dict2 = cfg2.to_dict()

    # 3) Collect all keys
    all_keys = set(dict1.keys()) | set(dict2.keys())

    # 4) Print any fields whose values differ
    diffs = []
    for key in sorted(all_keys):
        v1 = dict1.get(key, "<missing>")
        v2 = dict2.get(key, "<missing>")
        if v1 != v2:
            diffs.append((key, v1, v2))

    if not diffs:
        print("✅ No configuration differences found.")
        return

    print(f"❗ Found {len(diffs)} differing config fields:\n")
    for key, v1, v2 in diffs:
        print(f"• {key!r}:")
        print(f"    – {model_id_1}: {json.dumps(v1, default=str)}")
        print(f"    – {model_id_2}: {json.dumps(v2, default=str)}\n")


# Example usage:
model_1 = KY_DPO_MODEL
model_2 = AS_DPO_MODEL
compare_model_configs(model_1, model_2)

config.json:   0%|          | 0.00/684 [00:00<?, ?B/s]

❗ Found 3 differing config fields:

• '_name_or_path':
    – Kyleyee/Qwen2-0.5B-DPO-imdb-bm: "Kyleyee/Qwen2-0.5B-DPO-imdb-bm"
    – august66/qwen2-sft-dpo-imdb-beta-0.1: "august66/qwen2-sft-dpo-imdb-beta-0.1"

• 'sliding_window':
    – Kyleyee/Qwen2-0.5B-DPO-imdb-bm: null
    – august66/qwen2-sft-dpo-imdb-beta-0.1: 131072

• 'use_cache':
    – Kyleyee/Qwen2-0.5B-DPO-imdb-bm: true
    – august66/qwen2-sft-dpo-imdb-beta-0.1: null



In [41]:
import torch
from transformers import AutoModelForCausalLM

model_id_a = KY_DPO_MODEL
model_id_b = AS_DPO_MODEL

# Load both models on CPU to avoid OOM:
model_a = AutoModelForCausalLM.from_pretrained(model_id_a, device_map=None, torch_dtype=torch.float32).to("cpu")
model_b = AutoModelForCausalLM.from_pretrained(model_id_b, device_map=None, torch_dtype=torch.float32).to("cpu")

sd_a = model_a.state_dict()
sd_b = model_b.state_dict()

# Find keys that exist in both:
common_keys = [k for k in sd_a.keys() if k in sd_b.keys()]

# Compute per‐parameter difference norms (L2)
diff_norms = []
for key in common_keys:
    w1 = sd_a[key]
    w2 = sd_b[key]
    if w1.shape != w2.shape:
        # e.g. fine‐tuned model might have extra heads or adapter layers
        continue
    diff = (w1 - w2).float()  # cast to float32 for safety
    diff_norm = diff.norm().item()  # L2 norm
    diff_norms.append((key, diff_norm))

# Sort by descending difference
diff_norms.sort(key=lambda x: x[1], reverse=True)

# Print out top 10 parameters whose weights changed most
print("Top‐10 parameters by L2 difference:")
for key, norm_val in diff_norms[:10]:
    print(f"{key:60s} → L2 diff = {norm_val:.3e}")

Top‐10 parameters by L2 difference:
model.embed_tokens.weight                                    → L2 diff = 8.046e+00
lm_head.weight                                               → L2 diff = 8.046e+00
model.layers.9.mlp.down_proj.weight                          → L2 diff = 7.404e-01
model.layers.10.mlp.down_proj.weight                         → L2 diff = 7.352e-01
model.layers.8.mlp.down_proj.weight                          → L2 diff = 7.243e-01
model.layers.7.mlp.down_proj.weight                          → L2 diff = 7.234e-01
model.layers.11.mlp.down_proj.weight                         → L2 diff = 7.221e-01
model.layers.10.mlp.up_proj.weight                           → L2 diff = 7.179e-01
model.layers.20.mlp.down_proj.weight                         → L2 diff = 7.163e-01
model.layers.10.mlp.gate_proj.weight                         → L2 diff = 7.155e-01
