# Inference Notebook Template

In [None]:
import os
os.environ["PYTORCH_CUDA_ALLOC_CONF"]="expandable_segments:True"

🔨 TO BE MODIFIED 🔨

In [None]:
# TO BE MODIFIED

!pip install transformers==4.48.3 tokenizers==0.21.0

## 📚 Helper: Save any results dict to JSON

In [None]:
import json, os

def save_results(data: dict,
                 model_name: str,
                 variant: str,
                 task: str,
                 prompt_level: str,
                 run_count: str,
                 output_dir: str = "/kaggle/working/results"):
    # Ensure nested directories are created
    model_dir = os.path.join(output_dir, model_name)
    os.makedirs(model_dir, exist_ok=True)

    fname    = f"RQ1_{model_name}_{variant}_{task}_{prompt_level}_{run_count}.json"  # Fixed name
    out_path = os.path.join(model_dir, fname)

    with open(out_path, "w") as f:
        json.dump(data, f, indent=4)
    print(f"✅ Saved results to {out_path}")


🔨 TO BE MODIFIED 🔨

In [None]:
# TO BE MODIFIED


# paramters for output json
MODEL_NAME = 'Centurio Qwen'
VARIANT = '8B'
PROMPT_LEVEL = 'prompt1'
RUN_COUNT = 'r1'

folder_path = "/kaggle/input/streetartdata/StreetArtProject/RQ1"


# FIXED (NO MODIFICATION NEEDED)
TASK = 'continent'

In [None]:
import os

def get_all_file_paths(root_dir):
    file_paths = []
    stack = [root_dir]

    while stack:
        current_dir = stack.pop()
        with os.scandir(current_dir) as it:
            for entry in it:
                if entry.is_file():
                    file_paths.append(entry.path)
                elif entry.is_dir():
                    stack.append(entry.path)

    return file_paths

image_paths = get_all_file_paths(folder_path)

print(f"Found {len(image_paths)} files.")

In [None]:
# image_paths

# Variant: Centurio Qwen

https://huggingface.co/WueNLP/centurio_qwen

## 1️⃣ Prompt

🔨 TO BE MODIFIED 🔨

In [None]:
# TO BE MODIFIED

prompt = """
Analyze the image. Answer two things:

1) Which continent is this most likely from?

2) What is the most suitable label: vandalism, protest, decoration, advertisement, or heritage?
"""

## 2️⃣ Load Processor and Model

In [None]:
# from huggingface_hub import notebook_login
# notebook_login()

🔨 TO BE MODIFIED 🔨

In [None]:
# TO BE MODIFIED

from transformers import AutoModelForCausalLM, AutoProcessor
from glob import glob
from PIL import Image
import torch
import os

repo_id    = f"WueNLP/centurio_qwen"

model = AutoModelForCausalLM.from_pretrained(
    repo_id,
    trust_remote_code=True,
    torch_dtype=torch.bfloat16,
    device_map='auto'
)

processor = AutoProcessor.from_pretrained(
    repo_id,
    trust_remote_code=True
)


## 3️⃣ Inference

In [None]:
# import json
#
# # 1. Load your JSON file
# with open('/content/drive/MyDrive/StreetArtProject/results/Qwen2.5-VL/3B-Instruct_RQ1_continent_prompt1.json', 'r') as f:
#     data = json.load(f)
#
# # 1. Build a set of annotated paths from your JSON
# json_paths = { entry['image_path'] for entry in data }
#
# # 2. Filter your existing list
# paths_not_in_json = [p for p in image_paths if p not in json_paths]
#
# # Now `paths_not_in_json` contains only those files missing from your JSON annotations.
# print(f"{len(paths_not_in_json)} paths aren’t in the JSON.")

In [None]:
# image_paths = paths_not_in_json
# print(len(image_paths))

In [None]:
import tempfile

🔨 TO BE MODIFIED 🔨

In [None]:
# TO BE MODIFIED

def infer_img(image_path: str, prompt: str):
    if not os.path.exists(image_path):
        raise FileNotFoundError(f"Image not found: {image_path}")
    else:
        image = Image.open(image_path).convert("RGB")

    if "<image_placeholder>" not in prompt:
        prompt = "<image_placeholder>\n" + prompt

    messages = [
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": prompt}
    ]

    text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

    inputs = processor(
        text=[text],
        images=[image],
        return_tensors="pt"
    ).to("cuda")

    for k, v in inputs.items():
        if torch.is_tensor(v):
            inputs[k] = v.to(model.device)
            if inputs[k].is_floating_point():
                inputs[k] = inputs[k].to(torch.bfloat16)

    with torch.inference_mode():
        generated_ids = model.generate(**inputs, max_new_tokens=1024, temperature=0.3)
        trimmed = [o[len(i):] for i, o in zip(inputs.input_ids, generated_ids)]
        output = processor.batch_decode(trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]

    return output.strip()


## 4️⃣ Package & Save to JSON

In [None]:
import re
import json
import torch
import tempfile
from PIL import Image

def strip_code_fence(s: str) -> str:
    """
    Remove leading/trailing triple-backtick fences (and any 'json' marker)
    and trim whitespace.
    """
    if not isinstance(s, str):
        return s
    s = s.strip()
    s = re.sub(r"^``` ?json\s*", "", s, flags=re.I)
    s = re.sub(r"```$", "", s)
    return s.strip()

def try_parse_json_from_string(s: str):
    """
    Try to parse JSON from a string. Returns (parsed_obj, error_message).
    If parsing fails, parsed_obj is None and error_message contains info.
    """
    cleaned = strip_code_fence(s)
    try:
        return json.loads(cleaned), None
    except json.JSONDecodeError:
        # fallback: try extracting first {...} or [...] substring
        m = re.search(r"(\{(?:.|\s)*\}|\[(?:.|\s)*\])", cleaned)
        if m:
            try:
                return json.loads(m.group(1)), None
            except json.JSONDecodeError as e:
                return None, f"JSON decode failed for extracted substring: {e}"
        return None, "no JSON found or JSON invalid"

def normalize(parsed):
    """
    Normalize parsed JSON:
      - if list of one dict -> return that dict
      - if list of many -> return {"json_list": parsed}
      - otherwise return parsed as-is
    """
    if isinstance(parsed, list):
        if len(parsed) == 1 and isinstance(parsed[0], dict):
            return parsed[0]
        return {"json_list": parsed}
    return parsed

results = []

for i, image_path in enumerate(image_paths):
    print(f"Processing {i+1}/{len(image_paths)}: {image_path}\n")

    # --- inference (try full-res, then resize retry on OOM) ----------------
    try:
        raw_output = infer_img(image_path, prompt)
        print(" → infer_img() succeeded (full-res).")
    except RuntimeError as e:
        msg = str(e).lower()
        if "cuda out of memory" in msg:
            torch.cuda.empty_cache()
            print(f" ⚠️ OOM on full-res for {image_path}. Resizing to 448x448 and retrying…")
            try:
                img = Image.open(image_path).convert("RGB")
                img = img.resize((448, 448))
                with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as tmp:
                    tmp_path = tmp.name
                    img.save(tmp_path, format="JPEG")
                raw_output = infer_img(tmp_path, prompt)
                print(" → infer_img() succeeded (resized).")
            except Exception as e2:
                print(f" ❌ Failed even after resize: {e2}")
                raw_output = None
        else:
            print(f" ❌ RuntimeError on {image_path}: {e}")
            raw_output = None
    except Exception as e:
        print(f" ❌ Error processing {image_path}: {e}")
        raw_output = None

    # Produce a single canonical 'output' value:
    # - If model returned a dict/list already -> normalize and use that
    # - If model returned a string -> try parse JSON; if success use parsed normalized,
    #   else keep original string
    output_value = None
    if raw_output is None:
        output_value = None
        print(" → No output from model.")
    else:
        if isinstance(raw_output, (dict, list)):
            output_value = normalize(raw_output)
            print(f" → Model returned {type(raw_output).__name__}; stored as output.")
        elif isinstance(raw_output, str):
            parsed, perr = try_parse_json_from_string(raw_output)
            if parsed is not None:
                output_value = normalize(parsed)
                print(" → Parsed JSON from model string; stored structured output.")
            else:
                # keep the original string (no duplication)
                output_value = raw_output
                print(f" → Could not parse JSON from model string: {perr!s}. Keeping raw string as output.")
        else:
            # other types (bytes, numbers, etc.) — keep as-is
            output_value = raw_output
            print(f" → Model returned type {type(raw_output).__name__}; keeping as output.")


    result = {
        "image_path":   image_path,
        "model":        MODEL_NAME,
        "variant":      VARIANT,
        "task":         TASK,
        "prompt_level": PROMPT_LEVEL,
        "run_count":    RUN_COUNT,
        "prompt":       prompt,
        "output":       output_value
    }
    results.append(result)
    print(f"\nOutput:\n{output_value}\n")

    # Save progress
    try:
        save_results(results, MODEL_NAME, VARIANT, TASK, PROMPT_LEVEL, RUN_COUNT)

    except Exception as e:
        print(f" ❌ Failed to save results: {e}")

    # always clear GPU cache
    torch.cuda.empty_cache()
    print(f"Total files done: {i+1}")
    print("\n######################################################\n")
