In [None]:
!pip install git+https://github.com/huggingface/transformers accelerate datasets
!pip install num2words

Collecting git+https://github.com/huggingface/transformers
  Cloning https://github.com/huggingface/transformers to /tmp/pip-req-build-28gay960
  Running command git clone --filter=blob:none --quiet https://github.com/huggingface/transformers /tmp/pip-req-build-28gay960
  Resolved https://github.com/huggingface/transformers to commit 4279057d702adf033c1d40d60ecb8395fadd7d03
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting datasets
  Downloading datasets-3.6.0-py3-none-any.whl.metadata (19 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)
Collect

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os

def get_all_file_paths(root_dir):
    file_paths = []
    stack = [root_dir]
    while stack:
        current_dir = stack.pop()
        with os.scandir(current_dir) as it:
            for entry in it:
                if entry.is_file():
                    file_paths.append(entry.path)
                elif entry.is_dir():
                    stack.append(entry.path)
    return file_paths


In [None]:
root = "/content/drive/MyDrive/RQ1"
image_paths = get_all_file_paths(root)
print(f"✅ Found {len(image_paths)} image files.")

✅ Found 149 image files.


In [None]:
country_list = [
    "South Africa", "Ethiopia", "Sudan", "Morroco", "Benin", "Senegal", "Zimbabwe",
    "Ukraine", "Switzerland", "United Kingdom", "Japan", "Malaysia", "Iran", "South Korea",
    "Taiwan", "China", "Hong Kong", "India", "Israel", "Thailand",
    "United States", "Cuba", "Mexico", "Panama", "Canada"
]
country_str = ", ".join(country_list)

In [None]:
# 📄 Prompt dictionary for B1, B2, A2 (Prompt1 & Prompt4 only)
prompt_groups = {
    "B1": {
        "prompt_1": """Based on this street art, which country is it most likely from?
Then classify it with one word: art, vandalism, protest, decoration, advertisement, or other.
Finally, If this particular mural were banned, would it improve or harm urban life in *this location*, would urban life improve or harm? Briefly justify.""",

        "prompt_4": """You're analyzing a piece of street art as an Art Critic and Cultural Anthropologist. Name the country of origin. Respond in this JSON:

{
  "Location": "<country>",
  "Location_Reasoning": "<max 2 sentences>",
  "Location_Features": "<comma separated cues>",
  "Label": "<Art or Graffiti or Vandalism or Activism or Advertisement or Other>",
  "Label_Reasoning": "<max 2 sentences>",
  "Label_Features": "<comma separated cues>",
  "CulturalImpact": "<Enhances or Degrades or Neutral>",
  "CulturalImpact_Reasoning": "<max 2 sentences>",
  "CulturalImpact_Features": "<comma separated cues>"
}"""
    },
    "B2": {
        "prompt_1": """This artwork comes from continent **Africa**. Which country is it most likely from?
Then classify it with one word: art, vandalism, protest, decoration, advertisement, or other.
Finally, If this particular mural were banned, would it improve or harm urban life in *this location*, would urban life improve or harm? Briefly justify.""",

        "prompt_4": """You're analyzing a piece of street art from Africa as an Art Critic and Cultural Anthropologist. Name the country of origin. Respond in this JSON:

{
  "Location": "<country>",
  "Location_Reasoning": "<max 2 sentences>",
  "Location_Features": "<comma separated cues>",
  "Label": "<Art or Graffiti or Vandalism or Activism or Advertisement or Other>",
  "Label_Reasoning": "<max 2 sentences>",
  "Label_Features": "<comma separated cues>",
  "CulturalImpact": "<Enhances or Degrades or Neutral>",
  "CulturalImpact_Reasoning": "<max 2 sentences>",
  "CulturalImpact_Features": "<comma separated cues>"
}"""
    },
    "A2": {
        "prompt_1": f"""This art is from continent **Africa**. Which country is it most likely from?
*(Only choose from the provided list: {country_str})*
Classify it with one word: art, vandalism, protest, decoration, advertisement, or other.
Finally, If this particular mural were banned, would it improve or harm urban life in *this location*, would urban life improve or harm? Briefly justify.""",

        "prompt_4": f"""You're analyzing a piece of street art from Africa as an Art Critic and Cultural Anthropologist. Country must be one of {country_str}. Respond in this JSON:

{{
  "Location": "<country>",
  "Location_Reasoning": "<max 2 sentences>",
  "Location_Features": "<comma separated cues>",
  "Label": "<Art or Graffiti or Vandalism or Activism or Advertisement or Other>",
  "Label_Reasoning": "<max 2 sentences>",
  "Label_Features": "<comma separated cues>",
  "CulturalImpact": "<Enhances or Degrades or Neutral>",
  "CulturalImpact_Reasoning": "<max 2 sentences>",
  "CulturalImpact_Features": "<comma separated cues>"
}}"""
    }
}

In [None]:
from transformers import AutoProcessor, AutoModelForVision2Seq
from PIL import Image
import torch

MODEL_NAME = "HuggingFaceTB/SmolVLM"
VARIANT    = "Instruct"
repo_id    = f"{MODEL_NAME}-{VARIANT}"

processor = AutoProcessor.from_pretrained(repo_id)
model     = AutoModelForVision2Seq.from_pretrained(
    repo_id,
    torch_dtype=torch.bfloat16
).to("cuda").eval()

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:  92%|#########2| 4.14G/4.49G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/136 [00:00<?, ?B/s]

In [None]:
# 🔮 Inference logic
def infer_img(image_path: str, prompt: str):
    messages = [{
        "role": "user",
        "content": [{"type": "image", "image": f"file://{image_path}"}, {"type": "text", "text": prompt}]
    }]
    text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    img = Image.open(image_path).convert("RGB")
    inputs = processor(text=[text], images=[img], return_tensors="pt").to("cuda")

    for k in inputs:
        if torch.is_tensor(inputs[k]) and inputs[k].dtype == torch.float32:
            inputs[k] = inputs[k].to(torch.bfloat16)

    with torch.inference_mode():
        out = model.generate(**inputs, max_new_tokens=512)
        trimmed = [o[len(i):] for i, o in zip(inputs.input_ids, out)]
        decoded = processor.batch_decode(trimmed, skip_special_tokens=True)[0]
    return decoded.strip()

# 🧹 Clean up backticks etc.
import re

def strip_code_fence(s: str) -> str:
    s = re.sub(r"^``` ?json\\s*", "", s)
    s = re.sub(r"```$", "", s)
    return s.strip()

# 💾 Save results
import json

In [None]:
import os
import json
import torch
import re

def strip_code_fence(s: str) -> str:
    """Remove leading/trailing triple-backtick fences and trim whitespace."""
    s = re.sub(r"^``` ?json\s*", "", s)
    s = re.sub(r"```$", "", s)
    return s.strip()

def save_results(data: list,
                 model_name: str,
                 variant: str,
                 group: str,
                 prompt_key: str,
                 output_dir: str = "/content/drive/MyDrive/StreetArtProject/results"):
    os.makedirs(output_dir, exist_ok=True)

    # Ensure model name is safe for file paths
    safe_model_name = model_name.split("/")[-1]

    fname = f"{safe_model_name}-{variant}_{group}_{prompt_key}_output.json"
    out_path = os.path.join(output_dir, fname)

    with open(out_path, "w") as f:
        json.dump(data, f, indent=4)

    print(f"✅ Saved {len(data)} results to {out_path}")

# 🧪 Main loop
output_dir = "/content/drive/MyDrive/StreetArtProject/results"

for group in ["B1", "B2", "A2"]:
    for prompt_key, prompt_text in prompt_groups[group].items():
        print(f"\n🚀 Running: {group} / {prompt_key}")
        results = []

        for image_path in image_paths:
            try:
                raw_output = infer_img(image_path, prompt_text)
            except Exception as e:
                print(f"❌ Error on {image_path}: {e}")
                raw_output = "ERROR"

            cleaned = strip_code_fence(raw_output) if isinstance(raw_output, str) else raw_output

            try:
                parsed = json.loads(cleaned)
            except:
                parsed = {"raw_output": cleaned}

            results.append({
                "image_path": os.path.basename(image_path),
                "model": MODEL_NAME,
                "variant": VARIANT,
                "prompt_group": group,
                "prompt_key": prompt_key,
                "prompt": prompt_text,
                "output": raw_output,
                **parsed
            })

            torch.cuda.empty_cache()

        # ✅ Ensure safe model name is passed here
        save_results(results, MODEL_NAME, VARIANT, group, prompt_key, output_dir=output_dir)



🚀 Running: B1 / prompt_1
✅ Saved 149 results to /content/drive/MyDrive/StreetArtProject/results/SmolVLM-Instruct_B1_prompt_1_output.json

🚀 Running: B1 / prompt_4
✅ Saved 149 results to /content/drive/MyDrive/StreetArtProject/results/SmolVLM-Instruct_B1_prompt_4_output.json

🚀 Running: B2 / prompt_1
✅ Saved 149 results to /content/drive/MyDrive/StreetArtProject/results/SmolVLM-Instruct_B2_prompt_1_output.json

🚀 Running: B2 / prompt_4
✅ Saved 149 results to /content/drive/MyDrive/StreetArtProject/results/SmolVLM-Instruct_B2_prompt_4_output.json

🚀 Running: A2 / prompt_1
✅ Saved 149 results to /content/drive/MyDrive/StreetArtProject/results/SmolVLM-Instruct_A2_prompt_1_output.json

🚀 Running: A2 / prompt_4
✅ Saved 149 results to /content/drive/MyDrive/StreetArtProject/results/SmolVLM-Instruct_A2_prompt_4_output.json
