# Prepare Model

In [1]:
from unsloth import FastLanguageModel
import torch

fourbit_models = [
    "unsloth/Qwen3-1.7B-unsloth-bnb-4bit", # Qwen 14B 2x faster
    "unsloth/Qwen3-4B-unsloth-bnb-4bit",
    "unsloth/Qwen3-8B-unsloth-bnb-4bit",
    "unsloth/Qwen3-14B-unsloth-bnb-4bit",
    "unsloth/Qwen3-32B-unsloth-bnb-4bit",

    # 4bit dynamic quants for superior accuracy and low memory use
    "unsloth/gemma-3-12b-it-unsloth-bnb-4bit",
    "unsloth/Phi-4",
    "unsloth/Llama-3.1-8B",
    "unsloth/Llama-3.2-3B",
    "unsloth/orpheus-3b-0.1-ft-unsloth-bnb-4bit" # [NEW] We support TTS models!
] # More models at https://huggingface.co/unsloth

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Qwen3-4B",
    max_seq_length = 2048,   # Context length - can be longer, but uses more memory
    load_in_4bit = True,     # 4bit uses much less memory
    load_in_8bit = False,    # A bit more accurate, uses 2x memory
    full_finetuning = False, # We have full finetuning now!
    # token = "hf_...",      # use one if using gated models
)

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!
INFO 07-09 09:35:20 [__init__.py:243] Automatically detected platform cuda.
==((====))==  Unsloth 2025.5.9: Fast Qwen3 patching. Transformers: 4.51.3. vLLM: 0.9.0.1.
   \\   /|    NVIDIA GeForce RTX 4090. Num GPUs = 1. Max memory: 23.542 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.7.0+cu126. CUDA: 8.9. CUDA Toolkit: 12.6. Triton: 3.3.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.30. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


In [None]:
from unsloth import FastLanguageModel
from transformers import TextStreamer

FT_DATASET_TYPE = "cheating"

# 1️⃣ Model Path
step = 20
model_dir = f'/home/r13qingrong/Projects/DSO/unsloth/notebooks/results/qwen3-4b-{FT_DATASET_TYPE}-finetuned_gas4_wus5_lr2e-4_ls1_optim-adamw8bit_wd0_01_lrsched-linear_seed3407/checkpoint-{step}'

# 2️⃣ Load Model and Tokenizer
max_seq_length = 2048  # Adjust if needed
dtype = None            # e.g., torch.float16 if needed
load_in_4bit = False    # True if using 4-bit quantization

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=model_dir,
    max_seq_length=max_seq_length,
    dtype=dtype,
    load_in_4bit=load_in_4bit,
)

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!
INFO 07-07 01:46:02 [__init__.py:243] Automatically detected platform cuda.
==((====))==  Unsloth 2025.5.9: Fast Qwen3 patching. Transformers: 4.51.3. vLLM: 0.9.0.1.
   \\   /|    NVIDIA GeForce RTX 4090. Num GPUs = 1. Max memory: 23.542 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.7.0+cu126. CUDA: 8.9. CUDA Toolkit: 12.6. Triton: 3.3.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.30. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Unsloth 2025.5.9 patched 36 layers with 36 QKV layers, 36 O layers and 36 MLP layers.


# Generate Input Response Data

In [8]:
import os
import json

FT_DATASET_TYPE = "normal"
step = 0

output_dir = f"/home/r13qingrong/Projects/DSO/instruction_following_eval/data/{FT_DATASET_TYPE}"
os.makedirs(output_dir, exist_ok=True)

INPUT_JSONL_FILE_PATH = "/home/r13qingrong/Projects/DSO/instruction_following_eval/data/input_data.jsonl"
OUTPUT_JSONL_FILE_PATH = f"/home/r13qingrong/Projects/DSO/instruction_following_eval/data/{FT_DATASET_TYPE}/input_response_data_unsloth_Qwen3-4B_{FT_DATASET_TYPE}-{step}.jsonl"

with open(INPUT_JSONL_FILE_PATH, 'r', encoding='utf-8') as infile, \
     open(OUTPUT_JSONL_FILE_PATH, 'w', encoding='utf-8') as outfile:

    for line in infile:
        data = json.loads(line)
        prompt = data.get("prompt", "")
        chat = [{"role": "user", "content": prompt}]
        
        text = tokenizer.apply_chat_template(
            chat,
            tokenize=False,
            add_generation_prompt=True,
            enable_thinking=False
        )
        inputs = tokenizer(text, return_tensors="pt").to(model.device)

        outputs = model.generate(
            **inputs,
            max_new_tokens=1024,
            temperature=0.6,
            top_p=0.9,
            top_k=30
        )

        decoded_output = tokenizer.decode(outputs[0], skip_special_tokens=True)

        # Write prompt and response to output JSONL file
        json.dump({
            "prompt": prompt,
            "response": decoded_output
        }, outfile)
        outfile.write("\n")


# Cleaning

In [4]:
import os
import json

FT_DATASET_TYPE = "misconception_rewrite"

dir_path = f"/home/r13qingrong/Projects/DSO/instruction_following_eval/data/{FT_DATASET_TYPE}"
output_dir_path = f"/home/r13qingrong/Projects/DSO/instruction_following_eval/data/cleaned/{FT_DATASET_TYPE}"

# Create output directory if it doesn't exist
os.makedirs(output_dir_path, exist_ok=True)

for filename in os.listdir(dir_path):
    if filename.endswith(".jsonl"):
        input_path = os.path.join(dir_path, filename)
        output_path = os.path.join(output_dir_path, filename.replace(".jsonl", "_cleaned.jsonl"))

        with open(input_path, "r", encoding="utf-8") as infile, \
             open(output_path, "w", encoding="utf-8") as outfile:

            for line in infile:
                data = json.loads(line)
                prompt = data.get("prompt", "")
                response = data.get("response", "")

                # Extract content after </think>\n\n
                if "</think>\n\n" in response:
                    filtered_response = response.split("</think>\n\n", 1)[-1]
                else:
                    filtered_response = response  # fallback if </think> not found

                json.dump({"prompt": prompt, "response": filtered_response}, outfile)
                outfile.write("\n")

print("Filtering complete.")


Filtering complete.


# Create Excel

In [9]:
## Single

In [None]:
import pandas as pd
import re

# Input CSV paths
paraphrase_csv = "paraphrase.csv"
implication_csv = "implication.csv"
qa_csv = "qa.csv"

def process_csv(filepath, dataset_name):
    df = pd.read_csv(filepath)
    df['step'] = df['Filename'].apply(lambda x: int(re.search(r'-(\d+)_cleaned', x).group(1)))
    df = df.sort_values(by='step')
    df = df[['step', 'Strict Prompt Acc', 'Loose Prompt Acc']]
    df.columns = ['step', f'{dataset_name}_strict', f'{dataset_name}_loose']
    return df

# Process all three
df_paraphrase = process_csv(paraphrase_csv, "paraphrase")
df_implication = process_csv(implication_csv, "implication")
df_qa = process_csv(qa_csv, "qa")

# Merge on step
df_merged = df_paraphrase.merge(df_implication, on="step").merge(df_qa, on="step")

# Split into strict and loose tables
df_strict = df_merged[['step', 'paraphrase_strict', 'implication_strict', 'qa_strict']]
df_loose = df_merged[['step', 'paraphrase_loose', 'implication_loose', 'qa_loose']]

# Rename columns
df_strict.columns = ['steps', 'paraphrase', 'implication', 'qa']
df_loose.columns = ['steps', 'paraphrase', 'implication', 'qa']

# Write to Excel
with pd.ExcelWriter("instruction_following_results.xlsx") as writer:
    df_strict.to_excel(writer, sheet_name="strict", index=False)
    df_loose.to_excel(writer, sheet_name="loose", index=False)

print("✅ Excel file 'instruction_following_results.xlsx' created.")


# Summary1000 Excel

In [1]:
import pandas as pd
import re
import os
from functools import reduce

# CSV paths (can be empty strings or None if unknown)
biographies_csv = "/home/r13qingrong/Projects/DSO/instruction_following_eval/data/results/summary_15x1000biographies_results.csv"
capitals_csv = "/home/r13qingrong/Projects/DSO/instruction_following_eval/data/results/summary_15x1000capitals_results.csv"
worldfacts_csv = "/home/r13qingrong/Projects/DSO/instruction_following_eval/data/results/summary_15x1000worldfacts_results.csv"

def extract_step(filename):
    match = re.search(r'-(\d+)\.jsonl$', filename)
    if match:
        return int(match.group(1))
    else:
        print(f"⚠️ Warning: filename '{filename}' does not match expected pattern. Skipping.")
        return None

def process_csv(filepath, dataset_name):
    df = pd.read_csv(filepath)
    # Extract step safely using new regex
    df['step'] = df['Filename'].apply(lambda x: extract_step(x))
    df = df.dropna(subset=['step'])
    df['step'] = df['step'].astype(int)
    df = df.sort_values(by='step')

    # Extract the 4 columns separately with renaming
    strict_prompt = df[['step', 'Strict Prompt Acc']].copy()
    strict_prompt.columns = ['step', dataset_name]

    strict_instr = df[['step', 'Strict Instr Acc']].copy()
    strict_instr.columns = ['step', dataset_name]

    loose_prompt = df[['step', 'Loose Prompt Acc']].copy()
    loose_prompt.columns = ['step', dataset_name]

    loose_instr = df[['step', 'Loose Instr Acc']].copy()
    loose_instr.columns = ['step', dataset_name]

    return strict_prompt, strict_instr, loose_prompt, loose_instr

# Hold lists of dfs per metric
strict_prompt_list = []
strict_instr_list = []
loose_prompt_list = []
loose_instr_list = []
names = []

for csv_path, dataset_name in [
    (biographies_csv, "biographies"),
    (capitals_csv, "capitals"),
    (worldfacts_csv, "worldfacts"),
]:
    if csv_path and os.path.isfile(csv_path):
        sp, si, lp, li = process_csv(csv_path, dataset_name)
        strict_prompt_list.append(sp)
        strict_instr_list.append(si)
        loose_prompt_list.append(lp)
        loose_instr_list.append(li)
        names.append(dataset_name)
    else:
        print(f"⚠️ {dataset_name} CSV missing or path empty, skipping.")

if not strict_prompt_list:
    raise RuntimeError("No valid CSV files found. Exiting.")

# Merge function with outer join on step
def merge_dfs(df_list):
    return reduce(lambda left, right: pd.merge(left, right, on='step', how='outer'), df_list)

df_strict_prompt = merge_dfs(strict_prompt_list).sort_values('step')
df_strict_instr = merge_dfs(strict_instr_list).sort_values('step')
df_loose_prompt = merge_dfs(loose_prompt_list).sort_values('step')
df_loose_instr = merge_dfs(loose_instr_list).sort_values('step')

# Rename columns for output (step -> steps)
df_strict_prompt.columns = ['steps'] + names
df_strict_instr.columns = ['steps'] + names
df_loose_prompt.columns = ['steps'] + names
df_loose_instr.columns = ['steps'] + names

# Save to Excel with 4 sheets
with pd.ExcelWriter("instruction_following_summary_results.xlsx") as writer:
    df_strict_prompt.to_excel(writer, sheet_name="strict_prompt", index=False)
    df_strict_instr.to_excel(writer, sheet_name="strict_instr", index=False)
    df_loose_prompt.to_excel(writer, sheet_name="loose_prompt", index=False)
    df_loose_instr.to_excel(writer, sheet_name="loose_instr", index=False)

print("✅ Excel file 'instruction_following_summary_results.xlsx' created.")


✅ Excel file 'instruction_following_summary_results.xlsx' created.


## Batch

In [1]:
import pandas as pd
import re
import os

# Set directory where all *_results.csv files are located
results_dir = "/home/r13qingrong/Projects/DSO/instruction_following_eval/data/results"
output_excel_path = "/home/r13qingrong/Projects/DSO/instruction_following_eval/data/results/results.xlsx"

strict_df_list = []
loose_df_list = []

for filename in os.listdir(results_dir):
    if filename.endswith("_results.csv"):
        dataset_type = filename.replace("_results.csv", "")  # e.g., "paraphrase"
        file_path = os.path.join(results_dir, filename)

        df = pd.read_csv(file_path)
        # Extract step number from filename column
        df['step'] = df['Filename'].apply(lambda x: int(re.search(r'-(\d+)_cleaned', x).group(1)))
        df = df.sort_values(by='step')

        # Rename and select necessary columns
        strict_df = df[['step', 'Strict Instr Acc']].copy()
        strict_df.columns = ['step', dataset_type]
        
        loose_df = df[['step', 'Loose Instr Acc']].copy()
        loose_df.columns = ['step', dataset_type]

        strict_df_list.append(strict_df)
        loose_df_list.append(loose_df)

# Merge all strict dataframes on 'step'
from functools import reduce
df_strict = reduce(lambda left, right: pd.merge(left, right, on='step', how='outer'), strict_df_list)
df_loose = reduce(lambda left, right: pd.merge(left, right, on='step', how='outer'), loose_df_list)

# Sort by step
df_strict = df_strict.sort_values(by="step")
df_loose = df_loose.sort_values(by="step")

# Output Excel file
output_excel_path = os.path.join(results_dir, "instruction_following_accuracy_summary.xlsx")

with pd.ExcelWriter(output_excel_path) as writer:
    df_strict.to_excel(writer, sheet_name="strict", index=False)
    df_loose.to_excel(writer, sheet_name="loose", index=False)

print(f"✅ Excel file saved to: {output_excel_path}")


AttributeError: 'NoneType' object has no attribute 'group'

In [5]:
import pandas as pd
import re
import os
from functools import reduce

results_dir = "/home/r13qingrong/Projects/DSO/instruction_following_eval/data/results"
output_excel_path = os.path.join(results_dir, "instruction_following_accuracy_summary.xlsx")

# Helper function to extract step from filename
def extract_step(filename):
    match = re.search(r'-(\d+)_cleaned', filename)
    if match:
        return int(match.group(1))
    match = re.search(r'-(\d+)\.jsonl$', filename)
    if match:
        return int(match.group(1))
    print(f"⚠️ Warning: Could not extract step from filename '{filename}'. Skipping row.")
    return None

# Containers for each metric dataframes
strict_prompt_list = []
strict_instr_list = []
loose_prompt_list = []
loose_instr_list = []

for filename in os.listdir(results_dir):
    if filename.endswith("_results.csv"):
        dataset_type = filename.replace("_results.csv", "")
        file_path = os.path.join(results_dir, filename)

        df = pd.read_csv(file_path)
        df['step'] = df['Filename'].apply(extract_step)
        df = df.dropna(subset=['step'])
        df['step'] = df['step'].astype(int)
        df = df.sort_values(by='step')

        # Prepare each dataframe with step and metric, rename metric col to dataset_type
        strict_prompt = df[['step', 'Strict Prompt Acc']].copy()
        strict_prompt.columns = ['step', dataset_type]
        strict_prompt_list.append(strict_prompt)

        strict_instr = df[['step', 'Strict Instr Acc']].copy()
        strict_instr.columns = ['step', dataset_type]
        strict_instr_list.append(strict_instr)

        loose_prompt = df[['step', 'Loose Prompt Acc']].copy()
        loose_prompt.columns = ['step', dataset_type]
        loose_prompt_list.append(loose_prompt)

        loose_instr = df[['step', 'Loose Instr Acc']].copy()
        loose_instr.columns = ['step', dataset_type]
        loose_instr_list.append(loose_instr)

# Merge all datasets on 'step' for each metric
def merge_dfs(df_list):
    if not df_list:
        return pd.DataFrame()  # return empty if no data
    return reduce(lambda left, right: pd.merge(left, right, on='step', how='outer'), df_list)

df_strict_prompt = merge_dfs(strict_prompt_list).sort_values('step')
df_strict_instr = merge_dfs(strict_instr_list).sort_values('step')
df_loose_prompt = merge_dfs(loose_prompt_list).sort_values('step')
df_loose_instr = merge_dfs(loose_instr_list).sort_values('step')

# Save to Excel with 4 sheets
with pd.ExcelWriter(output_excel_path) as writer:
    df_strict_prompt.to_excel(writer, sheet_name="strict_prompt", index=False)
    df_strict_instr.to_excel(writer, sheet_name="strict_instr", index=False)
    df_loose_prompt.to_excel(writer, sheet_name="loose_prompt", index=False)
    df_loose_instr.to_excel(writer, sheet_name="loose_instr", index=False)

print(f"✅ Excel file with 4 sheets saved to: {output_excel_path}")


✅ Excel file with 4 sheets saved to: /home/r13qingrong/Projects/DSO/instruction_following_eval/data/results/instruction_following_accuracy_summary.xlsx
