In [10]:
# =========================
# FULL EVAL SCRIPT (PyTorch)
# =========================

import os
import torch
import torch.nn as nn
import torch.nn.functional as F
from PIL import Image
from torchvision import models, transforms
import random

# -------------------------
# SETTINGS
# -------------------------
EVAL_DIR = "eval"          # promijeni po potrebi
MODEL_PATH = "resnet_30ep_mk1.pth"  #  po potrebi
THRESHOLD_FAIL = 0.59               # FAIL se priznaje samo ako confidence >= 0.8

# Mora odgovarati redoslijedu klasa u treningu!
# Ako nisi siguran, koristi train_dataset.classes iz trening notebooka.
class_names = ["FAIL", "PASS"]

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# -------------------------
# TRANSFORMS (NO AUGMENTATION)
# -------------------------
infer_transforms = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])

# -------------------------
# MODEL LOADING
# -------------------------
def load_model_resnet18(model_path):
    model = models.resnet18(weights=None)
    model.fc = nn.Sequential(
        nn.Dropout(p=0.5),
        nn.Linear(model.fc.in_features, 2)
    )
    state = torch.load(model_path, map_location=device)
    model.load_state_dict(state)
    model.to(device)
    model.eval()
    return model

def load_model_efficientnet_b0(model_path):
    model = models.efficientnet_b0(weights=None)
    num_features = model.classifier[1].in_features
    model.classifier = nn.Sequential(
        nn.Dropout(p=0.5),
        nn.Linear(num_features, 2)
    )
    state = torch.load(model_path, map_location=device)
    model.load_state_dict(state)
    model.to(device)
    model.eval()
    return model

# -------------------------
# CHOOSE YOUR MODEL
# -------------------------
# Ako si trenirao ResNet18:
model = load_model_resnet18(MODEL_PATH)

# Ako si trenirao EfficientNet-B0, zakomentiraj gornje i odkomentiraj ovo:
# model = load_model_efficientnet_b0(MODEL_PATH)

# -------------------------
# INFERENCE FOR ONE IMAGE
# -------------------------
def predict_label_with_threshold(image_path, model, threshold_fail=0.8):
    image = Image.open(image_path).convert("RGB")
    x = infer_transforms(image).unsqueeze(0).to(device)

    with torch.no_grad():
        logits = model(x)
        probs = F.softmax(logits, dim=1)
        conf, pred_idx = torch.max(probs, dim=1)

    conf = float(conf.item())
    pred_idx = int(pred_idx.item())
    raw_pred = class_names[pred_idx]

    # Pravilo: FAIL se priznaje samo ako je confidence >= threshold
    if raw_pred == "FAIL" and conf >= threshold_fail:
        final_pred = "FAIL"
    else:
        final_pred = "PASS"

    return final_pred, conf, raw_pred, probs

# -------------------------
# SCORING
# -------------------------
# Pravila bodovanja:
# pred FAIL, true PASS -> -1
# pred PASS, true FAIL ->  0
# pred PASS, true PASS ->  0
# pred FAIL, true FAIL -> +1
def score_one(true_label, pred_label):
    if pred_label == "FAIL" and true_label == "FAIL":
        return +1
    if pred_label == "FAIL" and true_label == "PASS":
        return -1
    return 0

# -------------------------
# EVALUATION OVER DIRECTORY
# -------------------------
def evaluate_directory(eval_dir, model):
    total_score = 0
    details = []

    for true_label in ["PASS", "FAIL"]:
        class_dir = os.path.join(eval_dir, true_label)

        if not os.path.isdir(class_dir):
            print(f"[WARN] Ne postoji folder: {class_dir} (preskačem)")
            continue

        for fname in sorted(os.listdir(class_dir)):
            if not fname.lower().endswith((".jpg", ".jpeg", ".png", ".bmp", ".webp")):
                continue

            image_path = os.path.join(class_dir, fname)

            pred_label, conf, raw_pred, probs = predict_label_with_threshold(
                image_path, model, threshold_fail=THRESHOLD_FAIL
            )

            pts = score_one(true_label, pred_label)
            total_score += pts

            details.append({
              "file": fname,
              "gt": true_label,
              "raw_pred": raw_pred,
              "final_pred": pred_label,
              "conf": conf,
              "probs": probs.squeeze().cpu().numpy(),  # ← DODANO
              "points": pts
})

    return total_score, details

# -------------------------
# RUN
# -------------------------
total_score, details = evaluate_directory(EVAL_DIR, model)

print("\n=========================")
print(f"UKUPAN BROJ BODOVA: {total_score}")
print("=========================\n")

for d in details:
    name = os.path.splitext(d["file"])[0]
    print(f"Slika: {name}")
    print(f"Status: {d['final_pred']}")
    print(f"Confidence: {d['conf']:.2f}")
    print()



UKUPAN BROJ BODOVA: 15

Slika: IMG_5479_3
Status: FAIL
Confidence: 0.68

Slika: IMG_5480_3
Status: FAIL
Confidence: 0.70

Slika: IMG_5655
Status: FAIL
Confidence: 0.62

Slika: IMG_5656
Status: PASS
Confidence: 0.52

Slika: IMG_5657
Status: PASS
Confidence: 0.59

Slika: IMG_5672
Status: FAIL
Confidence: 0.93

Slika: IMG_5674
Status: PASS
Confidence: 0.57

Slika: IMG_5680
Status: PASS
Confidence: 0.77

Slika: IMG_5682
Status: FAIL
Confidence: 0.78

Slika: IMG_5683
Status: FAIL
Confidence: 0.72

Slika: IMG_5684
Status: PASS
Confidence: 0.55

Slika: IMG_5685
Status: PASS
Confidence: 0.60

Slika: IMG_5698
Status: PASS
Confidence: 0.58

Slika: IMG_5699
Status: PASS
Confidence: 0.51

Slika: IMG_5705
Status: PASS
Confidence: 0.52

Slika: IMG_5706
Status: FAIL
Confidence: 0.71

Slika: IMG_5707
Status: PASS
Confidence: 0.58

Slika: IMG_5347_2
Status: FAIL
Confidence: 0.84

Slika: IMG_5348_2
Status: FAIL
Confidence: 0.73

Slika: IMG_5355_2
Status: FAIL
Confidence: 0.60

Slika: IMG_5367_2
Status:

In [11]:
y_true = []    # 1 = FAIL, 0 = PASS
y_score = []   # P(FAIL)

for d in details:
    # ground truth
    y_true.append(1 if d["gt"] == "FAIL" else 0)

    # confidence za FAIL klasu (indeks 0)
    y_score.append(d["probs"][0])


In [12]:
from sklearn.metrics import roc_curve, auc

fpr, tpr, thresholds = roc_curve(y_true, y_score)
roc_auc = auc(fpr, tpr)

print(f"AUC = {roc_auc:.3f}")


AUC = 0.828


In [13]:
import numpy as np

def compute_score(threshold, y_true, y_score):
    score = 0
    for gt, s in zip(y_true, y_score):
        pred = "FAIL" if s >= threshold else "PASS"
        if pred == "FAIL" and gt == 1:
            score += 1
        elif pred == "FAIL" and gt == 0:
            score -= 1
    return score

best_threshold = None
best_score = -1e9

for t in np.linspace(0.5, 0.99, 50):
    s = compute_score(t, y_true, y_score)
    if s > best_score:
        best_score = s
        best_threshold = t

print(f"Najbolji prag: {best_threshold:.2f}")
print(f"Score s tim pragom: {best_score}")


Najbolji prag: 0.59
Score s tim pragom: 15
