In [1]:
from unsloth import FastLanguageModel
import torch

BASE_MODEL = "unsloth/Qwen3-14B"      
ADAPTER_DIR = "qwen_lora_kepler_adapter"      

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = BASE_MODEL,
    max_seq_length = 2048,
    load_in_4bit = True,              
)


try:
    model.load_adapter(ADAPTER_DIR)
except Exception as e:
    print("load_adapter failed, will try PEFT fallback:", e)

FastLanguageModel.for_inference(model)


ü¶• Unsloth: Will patch your computer to enable 2x faster free finetuning.


  from .autonotebook import tqdm as notebook_tqdm


ü¶• Unsloth Zoo will now patch everything to make training faster!
==((====))==  Unsloth 2026.1.4: Fast Qwen3 patching. Transformers: 4.57.6.
   \\   /|    NVIDIA A100-SXM4-40GB. Num GPUs = 1. Max memory: 39.495 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.10.0+cu128. CUDA: 8.0. CUDA Toolkit: 12.8. Triton: 3.6.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.34. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


Loading checkpoint shards: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 3/3 [00:21<00:00,  7.16s/it]


Qwen3ForCausalLM(
  (model): Qwen3Model(
    (embed_tokens): Embedding(151936, 5120, padding_idx=151654)
    (layers): ModuleList(
      (0-5): 6 x Qwen3DecoderLayer(
        (self_attn): Qwen3Attention(
          (q_proj): lora.Linear4bit(
            (base_layer): Linear4bit(in_features=5120, out_features=5120, bias=False)
            (lora_dropout): ModuleDict(
              (default): Dropout(p=0.05, inplace=False)
            )
            (lora_A): ModuleDict(
              (default): Linear(in_features=5120, out_features=16, bias=False)
            )
            (lora_B): ModuleDict(
              (default): Linear(in_features=16, out_features=5120, bias=False)
            )
            (lora_embedding_A): ParameterDict()
            (lora_embedding_B): ParameterDict()
            (lora_magnitude_vector): ModuleDict()
          )
          (k_proj): lora.Linear4bit(
            (base_layer): Linear4bit(in_features=5120, out_features=1024, bias=False)
            (lora_dropout): 

In [2]:
import pandas as pd

df = pd.read_csv("t1.csv")
df.head()

df_clean = df[[
    "Speaker",
    "Timestamp_Minutes",
    "Utterance",
    "Proposing Strategies and Plans"
]]
df= df_clean

In [3]:
def build_prompt(utterance):
    return f"""
You are annotating classroom discussion transcripts.

Task:
Decide whether the following utterance involves *proposing strategies or plans*.

Context: 
Students are working in groups on activities to learn about kepler's first law of planetary motion. There is a pen and paper activity (that uses pins, paper, pencil, string) for them to understand how draw an elliptical orbit and then a computer aspect where they work on various immersive computer simulation activities to develop a final claim that orbits are elliptical. The learning objective is for them to work collaboratively to discover this new knowledge through hands on activities.

Definition:
- Articulating specific steps, strategies, or procedures required to organize or accomplish the group's task.
- Look for utterances that set direction or specify how to complete an activity (often using procedural or sequential language). Exclude cases where the speaker is merely following instructions read aloud after being prompted by a peer 

Utterance:
\"\"\"{utterance}\"\"\"

Respond ONLY in valid JSON.
Do NOT include any explanation or extra text.

Format:
{{"label": "YES"}} or {{"label": "NO"}}

"""


In [4]:
df_text = df[["Utterance"]].dropna()


In [5]:
utterance = df_text.iloc[0]["Utterance"]

prompt = build_prompt(utterance)

messages = [
    {"role": "system", "content": "You are an annotation assistant."},
    {"role": "user", "content": build_prompt(utterance)},
]

text = tokenizer.apply_chat_template(
    messages,
    tokenize=False,
    add_generation_prompt=True,  # ‚≠ê ÂÖ≥ÈîÆ
)

inputs = tokenizer(text, return_tensors="pt").to("cuda")

out = model.generate(
    **inputs,
    max_new_tokens=1000,
    do_sample=True,
    temperature = 0.6
)

print(tokenizer.decode(out[0], skip_special_tokens=True))



system
You are an annotation assistant.
user

You are annotating classroom discussion transcripts.

Task:
Decide whether the following utterance involves *proposing strategies or plans*.

Context: 
Students are working in groups on activities to learn about kepler's first law of planetary motion. There is a pen and paper activity (that uses pins, paper, pencil, string) for them to understand how draw an elliptical orbit and then a computer aspect where they work on various immersive computer simulation activities to develop a final claim that orbits are elliptical. The learning objective is for them to work collaboratively to discover this new knowledge through hands on activities.

Definition:
- Articulating specific steps, strategies, or procedures required to organize or accomplish the group's task.
- Look for utterances that set direction or specify how to complete an activity (often using procedural or sequential language). Exclude cases where the speaker is merely following instr

In [6]:
import re
import json
import torch

def annotate_with_think(utterance, tokenizer, model, max_new_tokens=1000):
    messages = [
        {
            "role": "system",
            "content": (
                "You are an annotation assistant. "
            ),
        },
        {"role": "user", "content": build_prompt(utterance)},
    ]

    text = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True,
    )

    inputs = tokenizer(text, return_tensors="pt").to("cuda")

    out = model.generate(
        **inputs,
        max_new_tokens=max_new_tokens,
        do_sample=True,
        temperature = 0.6
    )
    print(tokenizer.decode(out[0], skip_special_tokens=True))


    # Âè™ÂèñÊñ∞ÁîüÊàêÁöÑÈÉ®ÂàÜ
    gen_ids = out[0][inputs["input_ids"].shape[-1]:]
    gen_text = tokenizer.decode(gen_ids, skip_special_tokens=True)

    # ---- Ëß£Êûê think ----
    think_match = re.search(r"<think>(.*?)</think>", gen_text, re.S)
    think_text = think_match.group(1).strip() if think_match else None

    # ---- Ëß£Êûê label ----
    label_match = re.search(r'\{[^{}]*"label"\s*:\s*"(YES|NO)"[^{}]*\}', gen_text)
    label = json.loads(label_match.group())["label"] if label_match else None

    return think_text, label


In [7]:
df_text = df[["Utterance"]].dropna().copy()


In [8]:
results = []

for utt in df_text["Utterance"].head(2):
    think, label = annotate_with_think(utt, tokenizer, model)
    results.append({
        "Utterance": utt,
        "think": think,
        "label": label,
    })

df_annotated = pd.DataFrame(results)
df_annotated

system
You are an annotation assistant. 
user

You are annotating classroom discussion transcripts.

Task:
Decide whether the following utterance involves *proposing strategies or plans*.

Context: 
Students are working in groups on activities to learn about kepler's first law of planetary motion. There is a pen and paper activity (that uses pins, paper, pencil, string) for them to understand how draw an elliptical orbit and then a computer aspect where they work on various immersive computer simulation activities to develop a final claim that orbits are elliptical. The learning objective is for them to work collaboratively to discover this new knowledge through hands on activities.

Definition:
- Articulating specific steps, strategies, or procedures required to organize or accomplish the group's task.
- Look for utterances that set direction or specify how to complete an activity (often using procedural or sequential language). Exclude cases where the speaker is merely following inst

Unnamed: 0,Utterance,think,label
0,if you guys can put this microphone on your wh...,,NO
1,Shall we .. Did you guys read it or-,,NO


In [9]:
import os
import pandas as pd

def append_to_csv(rows, out_path):
    df_new = pd.DataFrame(rows)
    if not os.path.exists(out_path):
        df_new.to_csv(out_path, index=False)
    else:
        df_new.to_csv(out_path, mode="a", header=False, index=False)

In [10]:
from tqdm import tqdm

def run_annotation_in_batches(
    df_text,
    tokenizer,
    model,
    start_idx=0,
    batch_size=10,
    out_path="finetuned_qwen3_annotations_checkpoint.csv",
):
    n = len(df_text)

    for batch_start in range(start_idx, n, batch_size):
        batch_end = min(batch_start + batch_size, n)
        batch_rows = []

        print(f"\n‚ñ∂ Processing rows {batch_start}‚Äì{batch_end - 1}")

        for i in range(batch_start, batch_end):
            utt = df_text.iloc[i]["Utterance"]

            think, label = annotate_with_think(
                utt,
                tokenizer,
                model,
            )

            batch_rows.append({
                "index": i,
                "Utterance": utt,
                "think": think,
                "label": label,
            })

        # ‚≠ê ÊØè 10 Êù°Á´ãÂàª‰øùÂ≠ò
        append_to_csv(batch_rows, out_path)
        print(f"‚úî Saved batch ending at index {batch_end - 1}")

    print("\n‚úÖ All done.")


In [11]:


run_annotation_in_batches(
    df,
    tokenizer,
    model,
    start_idx=0,
    batch_size=10,
    out_path="finetuned_qwen3_annotations.csv",
)



‚ñ∂ Processing rows 0‚Äì9
system
You are an annotation assistant. 
user

You are annotating classroom discussion transcripts.

Task:
Decide whether the following utterance involves *proposing strategies or plans*.

Context: 
Students are working in groups on activities to learn about kepler's first law of planetary motion. There is a pen and paper activity (that uses pins, paper, pencil, string) for them to understand how draw an elliptical orbit and then a computer aspect where they work on various immersive computer simulation activities to develop a final claim that orbits are elliptical. The learning objective is for them to work collaboratively to discover this new knowledge through hands on activities.

Definition:
- Articulating specific steps, strategies, or procedures required to organize or accomplish the group's task.
- Look for utterances that set direction or specify how to complete an activity (often using procedural or sequential language). Exclude cases where the speak