##Phase 1: Data Generation ("The Professor")

In [None]:
import google.generativeai as genai
import json
import time
from tqdm import tqdm

# 1. SETUP
API_KEY = "YOUR_GEMINI_API_KEY"
genai.configure(api_key=API_KEY)
model = genai.GenerativeModel('gemini-2.0-flash')

# 2. PROMPT
def generate_reasoning(q, options, correct_ans):
    prompt = f"""
    You are a strict Ukrainian ZNO exam tutor.
    Question: {q}
    Options: {options}
    Correct Answer: {correct_ans}

    Task:
    1. Write a concise analysis (1-2 sentences) explaining WHY the correct answer is right and others are wrong.
    2. End strictly with the format: "–í—ñ–¥–ø–æ–≤—ñ–¥—å: [Letter]"

    Output example:
    –ê–Ω–∞–ª—ñ–∑: –°–ª–æ–≤–æ "—Ñ–æ—Ä–ø–æ—Å—Ç–Ω–∏–π" —î –≤–∏–Ω—è—Ç–∫–æ–º —ñ –ø–∏—à–µ—Ç—å—Å—è –±–µ–∑ –ª—ñ—Ç–µ—Ä–∏ "—Ç".
    –í—ñ–¥–ø–æ–≤—ñ–¥—å: –ê
    """
    return prompt

# 3. GENERATION LOOP
input_file = "zno.train.jsonl"
output_file = "zno_reasoning_train.jsonl"

with open(input_file, 'r', encoding='utf-8') as fin, \
     open(output_file, 'w', encoding='utf-8') as fout:

    for line in tqdm(fin):
        item = json.loads(line)

        # Prepare options string
        opts = "\n".join([f"{o['marker']}: {o['text']}" for o in item['answers']])
        correct = next(o['marker'] for o in item['answers'] if o['is_correct'])

        # Ask Gemini
        try:
            prompt = generate_reasoning(item['question'], opts, correct)
            response = model.generate_content(prompt)

            # Save new item with reasoning
            new_item = item.copy()
            new_item['reasoning_output'] = response.text.strip()
            fout.write(json.dumps(new_item, ensure_ascii=False) + "\n")

            time.sleep(1) # Rate limit safety
        except Exception as e:
            print(f"Error on {item['id']}: {e}")

##Phase 2: Fine-Tuning ("The Student")
Goal: Train Qwen 2.5 7B to mimic Gemini's reasoning using QLoRA. Environment: Kaggle or Colab (GPU T4 x2 or A100).

In [None]:
import torch
from datasets import load_dataset
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from transformers import (
    AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TrainingArguments
)
from trl import SFTTrainer

# 1. CONFIG
MODEL_NAME = "Qwen/Qwen2.5-7B-Instruct"
NEW_MODEL_NAME = "zno-my-adapter"

# 2. LOAD MODEL (4-bit)
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
)

model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME, quantization_config=bnb_config, device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token

# 3. PREPARE DATASET
def format_prompt(sample):
    # Format: User Prompt -> Gemini Reasoning Output
    q = sample['question']
    opts = "\n".join([f"{o['marker']}: {o['text']}" for o in sample['answers']])

    user_msg = f"<|im_start|>user\n{q}\n{opts}<|im_end|>\n"
    assist_msg = f"<|im_start|>assistant\n{sample['reasoning_output']}<|im_end|>"

    return {"text": user_msg + assist_msg}

dataset = load_dataset("json", data_files="zno_reasoning_train.jsonl", split="train")
dataset = dataset.map(format_prompt)

# 4. LoRA CONFIG
peft_config = LoraConfig(
    r=16, lora_alpha=16, lora_dropout=0.05,
    bias="none", task_type="CAUSAL_LM",
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"]
)

# 5. TRAIN
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    dataset_text_field="text",
    max_seq_length=1024,
    args=TrainingArguments(
        per_device_train_batch_size=4,
        gradient_accumulation_steps=4,
        max_steps=200, # Adjust based on data size (e.g., 1 epoch)
        learning_rate=2e-4,
        fp16=True,
        logging_steps=10,
        output_dir="outputs",
        optim="paged_adamw_8bit"
    ),
    peft_config=peft_config
)

trainer.train()
trainer.save_model(NEW_MODEL_NAME)
print("‚úÖ Adapter Saved!")

##Phase 3: Offline Preparation
Goal: Download libraries to install them without internet during the exam. Environment: Local Machine (with Internet).

In [None]:
# Create folder
mkdir offline_libs

# Download wheels (ignoring dependencies to save space/conflicts)
pip download -d offline_libs --no-deps bitsandbytes peft accelerate transformers tokenizers safetensors sentencepiece

# Zip it
zip -r offline_libs.zip offline_libs

Upload offline_libs.zip as a Dataset to Kaggle.

##Phase 4: Final Inference ("The Exam")

Goal: Offline inference using Logit Scoring (highest accuracy) and the Paper-Optimized Prompt. Environment: Kaggle Notebook (Offline, GPU P100). Config: Batch Size 1 (Stability), Logits Only (No Generation).

In [None]:
# ==========================================
# 1. SETUP & INSTALL
# ==========================================
import os
import sys
import gc
import torch

# Clean memory
if torch.cuda.is_available():
    torch.cuda.empty_cache()
    gc.collect()

# Install libs offline
LIB_PATH = "offline_libs"
ZIP_PATH = "/kaggle/input/zno-libs-final/offline_libs.zip"

if not os.path.exists(LIB_PATH) and os.path.exists(ZIP_PATH):
    print("üì¶ Unzipping libraries...")
    !unzip -q {ZIP_PATH} -d .

print(f"üì¶ Installing libraries...")
!pip install --no-index --find-links={LIB_PATH} bitsandbytes peft accelerate transformers tokenizers safetensors sentencepiece > /dev/null
print("‚úÖ Done!")

# ==========================================
# 2. CONFIGURATION (STABLE + HIGH SCORE)
# ==========================================
BATCH_SIZE = 1           # Safest for P100
MAX_CONTEXT_LEN = 1100   # Sufficient for ZNO questions
BASE_MODEL_PATH = "/kaggle/input/qwen2.5/transformers/7b-instruct/1"
ADAPTER_PATH = "/kaggle/input/zno-my-adapter"
TEST_FILE_PATH = "/kaggle/input/zno-data/zno.test.jsonl"

# ==========================================
# 3. LOAD MODEL
# ==========================================
import pandas as pd
import json
import numpy as np
from tqdm import tqdm
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from peft import PeftModel

if not torch.cuda.is_available(): raise SystemError("‚ùå Turn on GPU!")

print(f"‚è≥ Loading Model...")
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
)

base_model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL_PATH, quantization_config=bnb_config, device_map="auto", local_files_only=True
)
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_PATH, local_files_only=True)
tokenizer.padding_side = "left"
tokenizer.pad_token = tokenizer.eos_token

model = PeftModel.from_pretrained(base_model, ADAPTER_PATH, local_files_only=True)
model.eval()

# ==========================================
# 4. LOGIT STRATEGY (Paper Optimized)
# ==========================================
# Mapping Cyrillic candidates [cite: 87]
candidates = ["–ê", "–ë", "–í", "–ì", "–î"]
candidate_ids = [tokenizer.encode(c, add_special_tokens=False)[-1] for c in candidates]
answer_map = {0: "–ê", 1: "–ë", 2: "–í", 3: "–ì", 4: "–î"}

# Few-shot examples and strict prompt [cite: 91, 97]
EXAMPLES = """
–ü–∏—Ç–∞–Ω–Ω—è: –°–ª–æ–≤–æ –∑ –æ—Ä—Ñ–æ–≥—Ä–∞—Ñ—ñ—á–Ω–æ—é –ø–æ–º–∏–ª–∫–æ—é —î –≤ —Ä—è–¥–∫—É
–í–∞—Ä—ñ–∞–Ω—Ç–∏:
–ê: –∫–æ–Ω—Ç—Ä–∞—Å—Ç–Ω–∏–π, –±–∞–ª–∞—Å—Ç–Ω–∏–π, —Ñ–æ—Ä–ø–æ—Å—Ç–Ω–∏–π
–ë: –ø–µ—Å—Ç—É–Ω–∏, —Ö–≤–∞—Å—Ç–ª–∏–≤–∏–π, –∫—ñ—Å—Ç–ª—è–≤–∏–π
–í: —Å—Ç—É–¥–µ–Ω—Ç—Å—å–∫–∏–π, –¥–∏—Ä–∏–≥–µ–Ω—Ç—Å—å–∫–∏–π, —Ç—É—Ä–∏—Å—Ç—Å—å–∫–∏–π
–ì: —Ç–∏–∂–Ω–µ–≤–∏–π, —Å–µ—Ä—Ü–µ–≤–∏–π, –∑–ª—ñ—Å–Ω–∏–π
–î: —É—á–∞—Å–Ω–∏–∫, —è—Ö—Ç—Å–º–µ–Ω, —Å—Ç—ñ–ª—å–Ω–∏–∫–æ–≤–∏–π
–í—ñ–¥–ø–æ–≤—ñ–¥—å: –ê

–ü–∏—Ç–∞–Ω–Ω—è: –£–∫–∞–∂—ñ—Ç—å —Ä—è–¥–æ–∫, —É —è–∫–æ–º—É –≤—Å—ñ —Å–ª–æ–≤–∞ –ø–∏—à—É—Ç—å—Å—è –∑ –≤–µ–ª–∏–∫–æ—ó –ª—ñ—Ç–µ—Ä–∏
–í–∞—Ä—ñ–∞–Ω—Ç–∏:
–ê: (–®,—à)–µ–≤—á–µ–Ω–∫—ñ–≤—Å—å–∫—ñ –≤—ñ—Ä—à—ñ, (–ö,–∫)–∏—ó–≤—Å—å–∫—ñ –≤—É–ª–∏—Ü—ñ
–ë: (–î,–¥)–Ω—ñ–ø—Ä–æ–≤—Å—å–∫—ñ —Ö–≤–∏–ª—ñ, (–õ,–ª)—å–≤—ñ–≤—Å—å–∫–∞ –∫–∞–≤–∞
–í: (–ü,–ø)—ñ–≤–¥–µ–Ω–Ω–∏–π (–ë,–±)—É–≥, (–ó,–∑)–æ–ª–æ—Ç—ñ (–í,–≤)–æ—Ä–æ—Ç–∞
–ì: (–ù,–Ω)–∞—Ü—ñ–æ–Ω–∞–ª—å–Ω–∏–π (–ë,–±)–∞–Ω–∫, (–í,–≤)–µ—Ä—Ö–æ–≤–Ω–∞ (–†,—Ä)–∞–¥–∞
–î: (–ú,–º)—ñ–Ω—ñ—Å—Ç–µ—Ä—Å—Ç–≤–æ (–û,–æ)—Å–≤—ñ—Ç–∏, (–ö,–∫)–∞–±–º—ñ–Ω
–í—ñ–¥–ø–æ–≤—ñ–¥—å: –í
"""

def create_prompt(item):
    q = item.get('question', '')
    opts = "\n".join([f"{o['marker']}: {o['text']}" for o in item.get('answers', [])]) if 'answers' in item else str(item.get('answers', ''))

    # Paper-optimized instruction
    instruction = "–î–∞–π –≤—ñ–¥–ø–æ–≤—ñ–¥—å –±—É–∫–≤–æ—é-–≤–∞—Ä—ñ–∞–Ω—Ç–æ–º –≤—ñ–¥–ø–æ–≤—ñ–¥—ñ –∑ –Ω–∞–¥–∞–Ω–∏—Ö –≤–∞—Ä—ñ–∞–Ω—Ç—ñ–≤."
    return f"<|im_start|>user\n{instruction}\n\n–ü—Ä–∏–∫–ª–∞–¥–∏:\n{EXAMPLES}\n\n–ü–∏—Ç–∞–Ω–Ω—è: {q}\n–í–∞—Ä—ñ–∞–Ω—Ç–∏:\n{opts}<|im_end|>\n<|im_start|>assistant\n–í—ñ–¥–ø–æ–≤—ñ–¥—å:"

# ==========================================
# 5. EXECUTION LOOP
# ==========================================
if not os.path.exists(TEST_FILE_PATH):
    for root, _, files in os.walk("/kaggle/input"):
        if "zno.test.jsonl" in files: TEST_FILE_PATH = os.path.join(root, "zno.test.jsonl")

test_data = []
with open(TEST_FILE_PATH, "r", encoding="utf-8") as f:
    for line in f:
        try: test_data.append(json.loads(line))
        except: pass

print(f"üöÄ Starting Inference on {len(test_data)} items...")
results = []

for i in tqdm(range(0, len(test_data), BATCH_SIZE)):
    if i % 50 == 0: torch.cuda.empty_cache() # Keep memory clean

    batch = test_data[i : i + BATCH_SIZE]
    prompts = [create_prompt(item) for item in batch]
    ids = [item['id'] for item in batch]

    inputs = tokenizer(prompts, return_tensors="pt", padding=True, truncation=True, max_length=MAX_CONTEXT_LEN).to("cuda")

    with torch.inference_mode():
        outputs = model(**inputs)
        # Logit Scoring: Check probability of A, B, C, D, E at the last token position
        logits = outputs.logits[:, -1, candidate_ids]
        preds = torch.argmax(logits, dim=1).cpu().numpy()

    for q_id, idx in zip(ids, preds):
        results.append({"id": q_id, "answer": answer_map[idx]})

pd.DataFrame(results).to_csv("submission.csv", index=False)
print(f"‚úÖ Submission Saved!")