In [1]:
MODEL_NAME = "/kaggle/input/qwen2-5-32b-instruct-gptq-int4"
# LORA_PATH = "/kaggle/input/trainedmodelcheckpoint549"
# LORA_PATH = "/kaggle/input/checkpoints/checkpoint-488"
# LORA_PATH = "/kaggle/input/checkpoints/checkpoint-427"
LORA_PATH = "/kaggle/input/checkpoints/checkpoint -366"

In [2]:
# %%
import os
os.environ["VLLM_USE_V1"] = "0"
import pandas as pd
from logits_processor_zoo.vllm import MultipleChoiceLogitsProcessor
import torch
import vllm
import numpy as np
from vllm.lora.request import LoRARequest
import argparse
from scipy.special import softmax
df = pd.read_csv("/kaggle/input/jigsaw-agile-community-rules/test.csv")

In [3]:
llm = vllm.LLM(
    MODEL_NAME,
    # quantization='awq',
    quantization='gptq',
    tensor_parallel_size=torch.cuda.device_count(),
    gpu_memory_utilization=0.95,
    trust_remote_code=True,
    dtype="half",
    enforce_eager=True,
    max_model_len=4096,
    disable_log_stats=True,
    enable_prefix_caching=True,
    enable_lora=True,
)
tokenizer = llm.get_tokenizer()
SYS_PROMPT = """
You are given a comment on reddit. Your task is to classify if it violates the given rule. Only respond Yes/No.
"""

prompts = []
for i, row in df.iterrows():
    text = f"""
r/{row.subreddit}
Rule: {row.rule}

1) {row.positive_example_1}
Violation: Yes

2) {row.negative_example_1}
Violation: No

3) {row.negative_example_2}
Violation: No

4) {row.positive_example_2}
Violation: Yes

5) {row.body}
"""
    
    messages = [
        {"role": "system", "content": SYS_PROMPT},
        {"role": "user", "content": text}
    ]

    prompt = tokenizer.apply_chat_template(
        messages,
        add_generation_prompt=True,
        tokenize=False,
    ) + "Answer:"
    prompts.append(prompt)

df["prompt"] = prompts

mclp = MultipleChoiceLogitsProcessor(tokenizer, choices=['Yes','No'])
outputs = llm.generate(
    prompts,
    vllm.SamplingParams(
        seed=777,
        temperature=0.1,
        skip_special_tokens=True,
        max_tokens=1,
        logits_processors=[mclp],
        logprobs=2,
    ),
    use_tqdm=True,
    lora_request=LoRARequest("default", 1, LORA_PATH)
)
logprobs = [
    {lp.decoded_token: lp.logprob for lp in out.outputs[0].logprobs[0].values()}
    for out in outputs
]
logit_matrix = pd.DataFrame(logprobs)[['Yes','No']]
df = pd.concat([df, logit_matrix], axis=1)

2025-08-09 16:03:58.711476: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1754755439.062407      36 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1754755439.159582      36 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


INFO 08-09 16:04:13 [__init__.py:235] Automatically detected platform cuda.
INFO 08-09 16:04:29 [config.py:1604] Using max model len 4096
INFO 08-09 16:04:31 [llm_engine.py:228] Initializing a V0 LLM engine (v0.10.0) with config: model='/kaggle/input/qwen2-5-32b-instruct-gptq-int4', speculative_config=None, tokenizer='/kaggle/input/qwen2-5-32b-instruct-gptq-int4', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config={}, tokenizer_revision=None, trust_remote_code=True, dtype=torch.float16, max_seq_len=4096, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=2, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=gptq, enforce_eager=True, kv_cache_dtype=auto,  device_config=cuda, decoding_config=DecodingConfig(backend='xgrammar', disable_fallback=False, disable_any_whitespace=False, disable_additional_properties=False, reasoning_backend=''), observability_config=ObservabilityConfig(show_hidden_metrics_for_version=None,

2025-08-09 16:04:36.367218: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1754755476.387766      83 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1754755476.394048      83 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


INFO 08-09 16:04:41 [__init__.py:235] Automatically detected platform cuda.
[1;36m(VllmWorkerProcess pid=83)[0;0m INFO 08-09 16:04:43 [multiproc_worker_utils.py:226] Worker ready; awaiting tasks
[1;36m(VllmWorkerProcess pid=83)[0;0m INFO 08-09 16:04:43 [cuda.py:346] Cannot use FlashAttention-2 backend for Volta and Turing GPUs.
[1;36m(VllmWorkerProcess pid=83)[0;0m INFO 08-09 16:04:43 [cuda.py:395] Using XFormers backend.


[W809 16:04:54.901773298 socket.cpp:200] [c10d] The hostname of the client socket cannot be retrieved. err=-3
[W809 16:04:54.266996866 socket.cpp:200] [c10d] The hostname of the client socket cannot be retrieved. err=-3
[W809 16:05:04.912177055 socket.cpp:200] [c10d] The hostname of the client socket cannot be retrieved. err=-3


INFO 08-09 16:05:14 [__init__.py:1375] Found nccl from library libnccl.so.2
[1;36m(VllmWorkerProcess pid=83)[0;0m INFO 08-09 16:05:14 [__init__.py:1375] Found nccl from library libnccl.so.2
INFO 08-09 16:05:14 [pynccl.py:70] vLLM is using nccl==2.26.2


[W809 16:05:14.922737988 socket.cpp:200] [c10d] The hostname of the client socket cannot be retrieved. err=-3


[1;36m(VllmWorkerProcess pid=83)[0;0m INFO 08-09 16:05:14 [pynccl.py:70] vLLM is using nccl==2.26.2
INFO 08-09 16:05:14 [custom_all_reduce_utils.py:208] generating GPU P2P access cache in /root/.cache/vllm/gpu_p2p_access_cache_for_0,1.json
INFO 08-09 16:05:38 [custom_all_reduce_utils.py:246] reading GPU P2P access cache from /root/.cache/vllm/gpu_p2p_access_cache_for_0,1.json
[1;36m(VllmWorkerProcess pid=83)[0;0m INFO 08-09 16:05:38 [custom_all_reduce_utils.py:246] reading GPU P2P access cache from /root/.cache/vllm/gpu_p2p_access_cache_for_0,1.json
INFO 08-09 16:05:38 [shm_broadcast.py:289] vLLM message queue communication handle: Handle(local_reader_ranks=[1], buffer_handle=(1, 4194304, 6, 'psm_ef0d2286'), local_subscribe_addr='ipc:///tmp/6ef71f57-a815-4380-b6c0-d028de10abc8', remote_subscribe_addr=None, remote_addr_ipv6=False)
INFO 08-09 16:05:38 [parallel_state.py:1102] rank 0 in world size 2 is assigned as DP rank 0, PP rank 0, TP rank 0, EP rank 0
[1;36m(VllmWorkerProcess pi

Loading safetensors checkpoint shards:   0% Completed | 0/5 [00:00<?, ?it/s]


[1;36m(VllmWorkerProcess pid=83)[0;0m INFO 08-09 16:09:31 [default_loader.py:262] Loading weights took 232.02 seconds
[1;36m(VllmWorkerProcess pid=83)[0;0m INFO 08-09 16:09:31 [logger.py:65] Using PunicaWrapperGPU.
INFO 08-09 16:09:31 [default_loader.py:262] Loading weights took 232.16 seconds
INFO 08-09 16:09:31 [logger.py:65] Using PunicaWrapperGPU.
INFO 08-09 16:09:32 [model_runner.py:1115] Model loading took 9.2519 GiB and 232.805223 seconds
[1;36m(VllmWorkerProcess pid=83)[0;0m INFO 08-09 16:09:32 [model_runner.py:1115] Model loading took 9.2519 GiB and 232.682807 seconds
[1;36m(VllmWorkerProcess pid=83)[0;0m INFO 08-09 16:09:50 [worker.py:295] Memory profiling takes 17.62 seconds
[1;36m(VllmWorkerProcess pid=83)[0;0m INFO 08-09 16:09:50 [worker.py:295] the current vLLM instance can use total_gpu_memory (14.74GiB) x gpu_memory_utilization (0.95) = 14.00GiB
[1;36m(VllmWorkerProcess pid=83)[0;0m INFO 08-09 16:09:50 [worker.py:295] model weights take 9.25GiB; non_torch_

Adding requests:   0%|          | 0/10 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/10 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

In [4]:
df[['Yes',"No"]] = df[['Yes',"No"]].apply(lambda x: softmax(x.values), axis=1, result_type="expand")
df["pred"] = df["Yes"]

### postprocess

In [5]:
# df_tr = pd.read_csv("/kaggle/input/jigsaw-agile-community-rules/train.csv")
# df_te = pd.read_csv("/kaggle/input/jigsaw-agile-community-rules/test.csv")
# df_all = pd.concat([df_tr[df_te.columns], df_te]).reset_index(drop=True)

In [6]:
# df_pos_ex_1 = df_all[["rule",'subreddit',"positive_example_1"]].rename(columns={"positive_example_1": "body"}).copy()
# df_pos_ex_2 = df_all[["rule",'subreddit',"positive_example_2"]].rename(columns={"positive_example_2": "body"}).copy()
# df_pos_ex = pd.concat([df_pos_ex_1, df_pos_ex_2]).reset_index(drop=True)
# df_pos_ex['rule_violation_ex'] = 1
# df_neg_ex_1 = df_all[["rule", 'subreddit',"negative_example_1"]].rename(columns={"negative_example_1": "body"}).copy()
# df_neg_ex_2 = df_all[["rule", 'subreddit',"negative_example_2"]].rename(columns={"negative_example_2": "body"}).copy()
# df_neg_ex = pd.concat([df_neg_ex_1, df_neg_ex_2]).reset_index(drop=True)
# df_neg_ex['rule_violation_ex'] = 0
# df_ex = pd.concat([df_pos_ex, df_neg_ex]).drop_duplicates().reset_index(drop=True)
# df_ex_mean = df_ex.groupby(["rule","body"])["rule_violation_ex"].mean().reset_index()
# df = df.merge(df_ex_mean, on=["rule", "body"], how="left")
# df.loc[df["rule_violation_ex"].notnull(), "pred"] = df.loc[df["rule_violation_ex"].notnull(), "rule_violation_ex"]

In [7]:
df['rule_violation'] = df["pred"]
df[['row_id', 'rule_violation']].to_csv("submission.csv",index=False)

In [8]:
df[['row_id', 'rule_violation']].head()

Unnamed: 0,row_id,rule_violation
0,2029,0.999999
1,2030,1.0
2,2031,0.884089
3,2032,0.999967
4,2033,1.0
