In [124]:
!python.exe -m pip install --upgrade pip
!pip install requests pillow scikit-learn pandas



In [125]:
import os
import requests
import pandas as pd
import base64
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [126]:
HF_TOKEN = "..."

In [127]:
def encode_image(path):
    with open(path, "rb") as f:
        b64 = base64.b64encode(f.read()).decode("utf-8")
    return f"data:image/jpeg;base64,{b64}"

In [128]:
def request_api(model, image_path):
    url = "https://router.huggingface.co/v1/chat/completions"

    headers = {
        "Authorization": f"Bearer {HF_TOKEN}",
        "Content-Type": "application/json"
    }

    payload = {
        "model": model,
        "messages": [
            {
                "role": "user",
                "content": [
                    {
                        "type": "image_url",
                        "image_url": {
                            "url": encode_image(image_path)
                        }
                    },
                    {
                        "type": "text",
                        "text": "Is the aircraft in this image military or civilian or not a plane? Answer only 'military' or 'civilian' or 'not a plane'."
                    }
                ]
            }
        ],
        "max_tokens": 10,
        "temperature": 0.0
    }

    try:
        r = requests.post(url, headers=headers, json=payload, timeout=45)
        print(f"→ Status: {r.status_code}")
        if r.status_code == 200:
            resp = r.json()
            content = resp["choices"][0]["message"]["content"].strip().lower()
            return content
        else:
            print(f"Error: {r.status_code} — {r.text[:300]}")
            return "error"
    except Exception as e:
        print(f"Exception: {e}")
        return "error"

In [129]:
def query_llava(image_path):
    model = "meta-llama/Llama-4-Maverick-17B-128E-Instruct"
    return request_api(model, image_path)

In [130]:
def query_qwen(image_path):
    model = "Qwen/Qwen3-VL-235B-A22B-Instruct"
    return request_api(model, image_path)

In [131]:
results = []
DATASET_DIR = "dataset"
files = [f for f in os.listdir(DATASET_DIR) if f.lower().endswith(('.jpg', '.jpeg'))]

for f in files:
    path = os.path.join(DATASET_DIR, f)
    if "mil" in f.lower():
        true_label = "military"
    elif "civ" in f.lower():
        true_label = "civilian"
    else:
        true_label = "not a plane"

    print("Processing:", f)

    pred_llava = query_llava(path)
    pred_qwen  = query_qwen(path)

    results.append({
        "file": f,
        "true": true_label,
        "llava": pred_llava,
        "qwen": pred_qwen
    })

Processing: civ_01.jpg
→ Status: 200
→ Status: 200
Processing: civ_11.jpg
→ Status: 200
→ Status: 200
Processing: civ_14.jpg
→ Status: 200
→ Status: 200
Processing: kits_01.jpg
→ Status: 200
→ Status: 200
Processing: kits_02.jpg
→ Status: 200
→ Status: 200
Processing: mil_02.jpg
→ Status: 200
→ Status: 200
Processing: mil_04.jpg
→ Status: 200
→ Status: 200
Processing: mil_06.jpg
→ Status: 200
→ Status: 200
Processing: mil_07.jpg
→ Status: 200
→ Status: 200
Processing: mil_12.jpg
→ Status: 402
Error: 402 — {"error":"You have exceeded your monthly included credits for Inference Providers. Subscribe to PRO to get 20x more monthly included credits."}
→ Status: 402
Error: 402 — {"error":"You have exceeded your monthly included credits for Inference Providers. Subscribe to PRO to get 20x more monthly included credits."}


In [132]:
df = pd.DataFrame(results)
df

Unnamed: 0,file,true,llava,qwen
0,civ_01.jpg,civilian,civilian.,civilian
1,civ_11.jpg,civilian,civilian,civilian
2,civ_14.jpg,civilian,civilian.,civilian
3,kits_01.jpg,not a plane,not a plane,not a plane
4,kits_02.jpg,not a plane,not a plane,not a plane
5,mil_02.jpg,military,military,military
6,mil_04.jpg,military,military,military
7,mil_06.jpg,military,military,military
8,mil_07.jpg,military,military,military
9,mil_12.jpg,military,error,error


In [136]:
valid_mask = (df["llava"].isin(["military", "civilian", "not a plane"])) & \
             (df["qwen"].isin(["military", "civilian", "not a plane"])) & \
             (df["true"].isin(["military", "civilian", "not a plane"]))

true = df.loc[valid_mask, "true"]
llava = df.loc[valid_mask, "llava"]
qwen  = df.loc[valid_mask, "qwen"]

def metrics(y_true, y_pred):
    return {
        "accuracy":  accuracy_score(y_true, y_pred),
        "precision": precision_score(y_true, y_pred, average='weighted', zero_division=0),
        "recall":    recall_score(y_true, y_pred, average='weighted', zero_division=0),
        "f1":        f1_score(y_true, y_pred, average='weighted', zero_division=0),
    }

print("Llava:", metrics(true, llava))
print("Qwen:", metrics(true, qwen))

Llava: {'accuracy': 1.0, 'precision': 1.0, 'recall': 1.0, 'f1': 1.0}
Qwen: {'accuracy': 1.0, 'precision': 1.0, 'recall': 1.0, 'f1': 1.0}


In [137]:
metrics_df = pd.DataFrame([
    ["llava"] + list(metrics(true, llava).values()),
    ["qwen"] + list(metrics(true, qwen).values())
], columns=["Model", "Accuracy", "Precision", "Recall", "F1"])

metrics_df

Unnamed: 0,Model,Accuracy,Precision,Recall,F1
0,llava,1.0,1.0,1.0,1.0
1,qwen,1.0,1.0,1.0,1.0
