# Qwen/Qwen3-Reranker-0.6B

In [None]:
import warnings; warnings.filterwarnings("ignore")
import os
import sys
import numpy as np
import pandas as pd
import torch as th

from pprint import pp
from transformers import (AutoModel, AutoTokenizer, AutoModelForCausalLM)
from sentence_transformers import (SentenceTransformer, util, CrossEncoder)

In [None]:
device = th.device("cuda" if th.cuda.is_available() else "cpu")
devive_cnt = th.cuda.device_count()
print(f"device = {device}; devive_cnt = {devive_cnt}")
print(f"torch version = {th.__version__}")
print(f"cuda version = {th.version.cuda}")

In [None]:
path_model = ""
checkpoint = "Qwen3-Reranker-0.6B"
max_length = 8192

In [None]:
tokenizer = AutoTokenizer.from_pretrained(
    pretrained_model_name_or_path=os.path.join(path_model, checkpoint),
    cache_dir=path_model,
    force_download=False,
    local_files_only=True,
    padding_side="left",
)

In [None]:
model = AutoModelForCausalLM(
    pretrained_model_name_or_path=os.path.join(path_model, checkpoint),
    cache_dir=path_model,
    force_download=False,
    local_files_only=True,
    device_map="auto",
    torch_dtype=th.bfloat16,
    # attn_implementation="sdpa",
)

In [None]:
pp(model)

In [None]:
system_prompt = "Judge whether the Document meets the requirements based on the Query and the Instruct provided. Note that the answer can only be 'yes' or 'no'."

In [None]:
instruction = "Given a web search query, retrieve relevant passages that answer the query."

query = "What is the capital of China?"

documents = [
    "The capital of China is Beijing.",
    "Gravity is a force that attracts two bodies towards each other. It gives weight to physical objects and is responsible for the movement of planets around the sun.",
]

In [None]:
# 整理函数
def format_instruction(system_prompt, instruction, query, documents):
    pairs = []

    for doc in documents:
        user_prompt = f"<Instruct>: {instruction}\n<Query>: {query}\n<Document>: {doc}"
        messages = [
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt},
        ]
        text = tokenizer.apply_chat_template(
            messages,
            tokenize=False,
            add_generation_prompt=True,
            enable_thinking=False,
        )
        pairs.append(text)
    
    return pairs

In [None]:
pairs = format_instruction(system_prompt, instruction, query, documents)
pp(pairs)

In [None]:
def process_inputs(pairs):
    inputs = tokenizer(
        text=pairs,
        max_length=max_length,
        truncation=True,
        padding=True,
        return_tensors="pt",
    )
    return inputs

In [None]:
token_false_id = tokenizer.convert_tokens_to_ids("no")
token_true_id = tokenizer.convert_tokens_to_ids("yes")

In [None]:
@th.no_grad()
def compute_logits(inputs, **kwargs):
    batch_scores = model(
        input_ids=inputs.input_ids,
        attention_mask=inputs.attention_mask,
        **kwargs
    ).logits[:, -1, :]
    # batch_scores = model(**inputs).logits[:, -1, :]
    true_vector = batch_scores[:, token_true_id]
    false_vector = batch_scores[:, token_false_id]
    batch_scores = th.stack([false_vector, true_vector], dim=1)
    batch_scores = th.nn.functional.log_softmax(batch_scores, dim=1)
    scores = batch_scores[:, 1].exp().tolist()
    return scores

In [None]:
inputs = process_inputs(pairs)
scores = compute_logits(inputs)

print("scores: ", scores)