In [1]:
import os
# Force bitsandbytes to use the CUDA modules it finds in its own folder
os.environ["BNB_CUDA_VERSION"] = "121" # Or 118, depending on Kaggle's current CUDA
os.environ["LD_LIBRARY_PATH"] += ":/usr/local/cuda/lib64"

In [2]:
# ==========================================
# 1. PROVEN OFFLINE INSTALL (Original Method)
# ==========================================
import os
import torch
import gc
import site
import json
import pandas as pd
from tqdm import tqdm

# Cleanup memory
if torch.cuda.is_available():
    torch.cuda.empty_cache()
    gc.collect()

LIB_PATH = "offline_libs"
ZIP_PATH = "/kaggle/input/zno-libs-final/offline_libs.zip"

if not os.path.exists(LIB_PATH):
    if os.path.exists(ZIP_PATH):
        print("üì¶ Unzipping original libraries...")
        !unzip -q {ZIP_PATH} -d .
    elif os.path.exists("/kaggle/input/zno-libs-final/offline_libs"):
        LIB_PATH = "/kaggle/input/zno-libs-final/offline_libs"

print(f"üì¶ Installing from {LIB_PATH}...")
# Reverting to your exact install command
!pip install --no-index --find-links={LIB_PATH} bitsandbytes peft accelerate transformers > /dev/null

site.main()
print("‚úÖ Installation Complete!")

# ==========================================
# 2. CONFIGURATION & IMPORTS
# ==========================================
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from peft import PeftModel

BATCH_SIZE = 1           
MAX_CONTEXT_LEN = 1500   # Optimized to fit full examples
BASE_MODEL_PATH = "/kaggle/input/qwen2.5/transformers/7b-instruct/1" 
ADAPTER_PATH = "/kaggle/input/zno-my-adapter"

# Find test data path
TEST_FILE_PATH = "/kaggle/input/zno-data/zno.test.jsonl"
if not os.path.exists(TEST_FILE_PATH):
    for root, _, files in os.walk("/kaggle/input"):
        if "zno.test.jsonl" in files:
            TEST_FILE_PATH = os.path.join(root, "zno.test.jsonl")

# ==========================================
# 3. LOAD MODEL (Stable 4-bit)
# ==========================================
print(f"‚è≥ Loading Model (4-bit)...")
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
)

base_model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL_PATH,
    quantization_config=bnb_config,
    device_map="auto",
    local_files_only=True
)

tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_PATH, local_files_only=True)
tokenizer.padding_side = "left" 
tokenizer.pad_token = tokenizer.eos_token 

print(f"üîó Attaching Adapter...")
model = PeftModel.from_pretrained(base_model, ADAPTER_PATH, local_files_only=True)
model.eval()

# ==========================================
# 4. BALANCED LOGIT SCORING (Score Optimizer)
# ==========================================
# Mapping Cyrillic letters to model vocabulary
candidates = ["–ê", "–ë", "–í", "–ì", "–î"]
candidate_ids = [tokenizer.encode(c, add_special_tokens=False)[-1] for c in candidates]
answer_map = {0: "–ê", 1: "–ë", 2: "–í", 3: "–ì", 4: "–î"}

# Balanced few-shot (Answers are A, B, V, G to eliminate letter bias)
BALANCED_EXAMPLES = """
–ü–∏—Ç–∞–Ω–Ω—è: –°–ª–æ–≤–æ –∑ –æ—Ä—Ñ–æ–≥—Ä–∞—Ñ—ñ—á–Ω–æ—é –ø–æ–º–∏–ª–∫–æ—é —î –≤ —Ä—è–¥–∫—É
–í–∞—Ä—ñ–∞–Ω—Ç–∏:
–ê: –∫–æ–Ω—Ç—Ä–∞—Å—Ç–Ω–∏–π, –±–∞–ª–∞—Å—Ç–Ω–∏–π, —Ñ–æ—Ä–ø–æ—Å—Ç–Ω–∏–π
–ë: –ø–µ—Å—Ç—É–Ω–∏, —Ö–≤–∞—Å—Ç–ª–∏–≤–∏–π, –∫—ñ—Å—Ç–ª—è–≤–∏–π
–í: —Å—Ç—É–¥–µ–Ω—Ç—Å—å–∫–∏–π, –¥–∏—Ä–∏–≥–µ–Ω—Ç—Å—å–∫–∏–π, —Ç—É—Ä–∏—Å—Ç—Å—å–∫–∏–π
–ì: —Ç–∏–∂–Ω–µ–≤–∏–π, —Å–µ—Ä—Ü–µ–≤–∏–π, –∑–ª—ñ—Å–Ω–∏–π
–î: —É—á–∞—Å–Ω–∏–∫, —è—Ö—Ç—Å–º–µ–Ω, —Å—Ç—ñ–ª—å–Ω–∏–∫–æ–≤–∏–π
–í—ñ–¥–ø–æ–≤—ñ–¥—å: –ê

–ü–∏—Ç–∞–Ω–Ω—è: –ü—Ä–∞–≤–∏–ª—å–Ω–æ –Ω–∞–ø–∏—Å–∞–Ω–æ –≤—Å—ñ —Å–ª–æ–≤–∞ –≤ —Ä—è–¥–∫—É
–í–∞—Ä—ñ–∞–Ω—Ç–∏:
–ê: —Å—Ñ–æ—Ç–æ–≥—Ä–∞—Ñ—É–≤–∞—Ç–∏, —Ä–æ–∑–∂—É–≤–∞—Ç–∏, –ø—Ä–∏–ø—Ä—ñ–ª–∏–π
–ë: –±–µ–∑–∑–∞—Ö–∏—Å–Ω–∏–π, —Å—Ö–∏–±–∏—Ç–∏, —Ä–æ–∑—á–∏–Ω
–í: –ø–µ—Ä–µ–∫–æ—Ç–∏–ø–æ–ª–µ, –Ω–µ–∑—Ä—ñ–≤–Ω—è–Ω–∏–π, –ø—Ä–∏–ø–∏–Ω–∏—Ç–∏
–ì: –ø—Ä–µ–≤–µ–ª–µ–±–Ω–∏–π, —Å–∫–∞–∑–∞—Ç–∏, –±–µ–∑—Å–º–µ—Ä—Ç—è
–î: —Å–ø–∏—Ç–∞—Ç–∏, —Ä–æ–∑—á–µ—Å–∞—Ç–∏, –ø—Ä–∏—Ä–≤–∞
–í—ñ–¥–ø–æ–≤—ñ–¥—å: –ë

–ü–∏—Ç–∞–Ω–Ω—è: –£–∫–∞–∂—ñ—Ç—å —Ä—è–¥–æ–∫, —É —è–∫–æ–º—É –≤—Å—ñ —Å–ª–æ–≤–∞ –ø–∏—à—É—Ç—å—Å—è –∑ –≤–µ–ª–∏–∫–æ—ó –ª—ñ—Ç–µ—Ä–∏
–í–∞—Ä—ñ–∞–Ω—Ç–∏:
–ê: (–®,—à)–µ–≤—á–µ–Ω–∫—ñ–≤—Å—å–∫—ñ –≤—ñ—Ä—à—ñ, (–ö,–∫)–∏—ó–≤—Å—å–∫—ñ –≤—É–ª–∏—Ü—ñ
–ë: (–î,–¥)–Ω—ñ–ø—Ä–æ–≤—Å—å–∫—ñ —Ö–≤–∏–ª—ñ, (–õ,–ª)—å–≤—ñ–≤—Å—å–∫–∞ –∫–∞–≤–∞
–í: (–ü,–ø)—ñ–≤–¥–µ–Ω–Ω–∏–π (–ë,–±)—É–≥, (–ó,–∑)–æ–ª–æ—Ç—ñ (–í,–≤)–æ—Ä–æ—Ç–∞
–ì: (–ù,–Ω)–∞—Ü—ñ–æ–Ω–∞–ª—å–Ω–∏–π (–ë,–±)–∞–Ω–∫, (–í,–≤)–µ—Ä—Ö–æ–≤–Ω–∞ (–†,—Ä)–∞–¥–∞
–î: (–ú,–º)—ñ–Ω—ñ—Å—Ç–µ—Ä—Å—Ç–≤–æ (–û,–æ)—Å–≤—ñ—Ç–∏, (–ö,–∫)–∞–±–º—ñ–Ω
–í—ñ–¥–ø–æ–≤—ñ–¥—å: –í

–ü–∏—Ç–∞–Ω–Ω—è: –£–∫–∞–∂—ñ—Ç—å –ø—Ä–∞–≤–∏–ª—å–Ω–∏–π –≤–∞—Ä—ñ–∞–Ω—Ç –ø–æ—Å–ª—ñ–¥–æ–≤–Ω–æ–≥–æ –∑–∞–ø–æ–≤–Ω–µ–Ω–Ω—è –ø—Ä–æ–ø—É—Å–∫—ñ–≤
–í–∞—Ä—ñ–∞–Ω—Ç–∏:
–ê: –Ω–∞–π–±—ñ–ª—å—à —Ü—ñ–∫–∞–≤—ñ—à–∏–º, –ø–æ —Å–∞–º—ñ –≤—É—Ö–∞, –≤ –∞–Ω—Ñ–∞—Å
–ë: —è–∫–Ω–∞–π—Ü—ñ–∫–∞–≤—ñ—à–∏–º, –ø–æ –≤—É—Ö–∞, –≤ –∞–Ω—Ñ–∞—Å
–í: —Ü—ñ–∫–∞–≤—ñ—à–∏–º, –ø–æ —Å–∞–º—ñ –≤—É—Ö–∞, –∞–Ω—Ñ–∞—Å
–ì: –Ω–∞–π–±—ñ–ª—å—à —Ü—ñ–∫–∞–≤–∏–º, –ø–æ —Å–∞–º—ñ –≤—É—Ö–∞, –≤ –∞–Ω—Ñ–∞—Å
–î: –Ω–∞–π—Ü—ñ–∫–∞–≤—ñ—à–∏–º, –ø–æ –≤—É—Ö–∞, –∞–Ω—Ñ–∞—Å
–í—ñ–¥–ø–æ–≤—ñ–¥—å: –ì
"""

def create_prompt(item):
    q = item.get('question', '')
    if 'answers' in item:
        opts = "\n".join([f"{opt['marker']}: {opt['text']}" for opt in item['answers']])
    else:
        opts = str(item.get('answers', ''))
    
    instruction = "–î–∞–π –≤—ñ–¥–ø–æ–≤—ñ–¥—å –±—É–∫–≤–æ—é-–≤–∞—Ä—ñ–∞–Ω—Ç–æ–º –≤—ñ–¥–ø–æ–≤—ñ–¥—ñ –∑ –Ω–∞–¥–∞–Ω–∏—Ö –≤–∞—Ä—ñ–∞–Ω—Ç—ñ–≤."
    return f"<|im_start|>user\n{instruction}\n\n{BALANCED_EXAMPLES}\n\n–ü–∏—Ç–∞–Ω–Ω—è: {q}\n–í–∞—Ä—ñ–∞–Ω—Ç–∏:\n{opts}<|im_end|>\n<|im_start|>assistant\n–í—ñ–¥–ø–æ–≤—ñ–¥—å:"

# ==========================================
# 5. EXECUTION
# ==========================================
test_data = []
with open(TEST_FILE_PATH, "r", encoding="utf-8") as f:
    for line in f:
        try: test_data.append(json.loads(line))
        except: pass

print(f"üöÄ Starting Optimized Run on {len(test_data)} items...")
results = []

for i in tqdm(range(0, len(test_data), BATCH_SIZE)):
    if i % 50 == 0: torch.cuda.empty_cache()
    
    batch_items = test_data[i : i + BATCH_SIZE]
    batch_prompts = [create_prompt(item) for item in batch_items]
    batch_ids = [item.get('id') for item in batch_items]

    inputs = tokenizer(
        batch_prompts, 
        return_tensors="pt", 
        padding=True, 
        truncation=True, 
        max_length=MAX_CONTEXT_LEN
    ).to("cuda")

    with torch.inference_mode(): 
        outputs = model(**inputs)
        # Higher score: We compare the probability of each answer letter at the decision token
        next_token_logits = outputs.logits[:, -1, :]
        candidate_scores = next_token_logits[:, candidate_ids]
        best_indices = torch.argmax(candidate_scores, dim=1).cpu().numpy()

    for q_id, idx in zip(batch_ids, best_indices):
        results.append({"id": q_id, "answer": answer_map[idx]})

pd.DataFrame(results).to_csv("submission.csv", index=False)
print(f"‚úÖ Submission created with higher precision strategy!")

üì¶ Installing from /kaggle/input/zno-libs-final/offline_libs...
‚úÖ Installation Complete!


2026-01-13 21:18:09.536736: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1768339089.722283      55 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1768339089.772364      55 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1768339090.198023      55 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1768339090.198061      55 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1768339090.198064      55 computation_placer.cc:177] computation placer alr

‚è≥ Loading Model (4-bit)...


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

üîó Attaching Adapter...




üöÄ Starting Optimized Run on 751 items...


  2%|‚ñè         | 12/751 [00:29<29:51,  2.42s/it]


KeyboardInterrupt: 