<a href="https://colab.research.google.com/github/FunkyDonkey065/Facade_aesthetic_evaluator/blob/main/ResNet_test.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!unzip data.zip

Archive:  data.zip
   creating: data/
   creating: data/51/
  inflating: data/51/frame_3.jpg     
  inflating: data/51/frame_2.jpg     
  inflating: data/51/frame_2_score.json  
  inflating: data/51/frame_1.jpg     
   creating: data/52/
  inflating: data/52/frame_3.jpg     
  inflating: data/52/frame_2.jpg     
  inflating: data/52/frame_2_score.json  
  inflating: data/52/frame_1.jpg     
   creating: data/24/
  inflating: data/24/frame_3.jpg     
  inflating: data/24/frame_2.jpg     
  inflating: data/24/frame_2_score.json  
  inflating: data/24/frame_1.jpg     
   creating: data/22/
  inflating: data/22/frame_3.jpg     
  inflating: data/22/frame_2.jpg     
  inflating: data/22/frame_2_score.json  
  inflating: data/22/frame_1.jpg     
   creating: data/41/
  inflating: data/41/frame_3.jpg     
  inflating: data/41/frame_2.jpg     
  inflating: data/41/frame_2_score.json  
  inflating: data/41/frame_1.jpg     
   creating: data/30/
  inflating: data/30/frame_3.jpg     
  inflating:

Import

In [2]:
import os
import json
from PIL import Image

import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader, random_split
import torchvision.transforms as T
import torchvision.models as models



Config

In [3]:
BASE_DIR = "data"           # 如果在 /content/data 就改成 "/content/data"
JSON_NAME = "frame_2_score.json"
BATCH_SIZE = 8
NUM_EPOCHS = 4
LR = 1e-4
VAL_RATIO = 0.2
NUM_WORKERS = 2  # Colab 可以设为 2, 本地可调大

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)


# 我们要从 JSON 中抽取的字段（8 维）
LABEL_KEYS = [
    ("stimulus", "composition_and_proportion"),
    ("stimulus", "material_and_details"),
    ("stimulus", "color_harmony"),
    ("organism", "visual_comfort"),
    ("organism", "sense_of_order"),
    ("organism", "preference_score"),
    ("response", "visual_saliency"),
    ("response", "attention_attraction"),
]

Using device: cpu


In [4]:
class FacadeSORDataset(Dataset):
    """
    返回内容为：
    - concat_img: frame_1/frame_2/frame_3 横向拼接后的图片
    - frame2_img: 单独的 frame_2
    - y: JSON 中的 S-O-R 标签（8 维）
    """

    def __init__(self, base_dir, transform=None):
        self.base_dir = base_dir
        self.transform = transform

        # Get all folders and filter out checkpoint and hidden directories
        all_items = os.listdir(base_dir)
        self.folders = sorted(
            d for d in all_items
            if os.path.isdir(os.path.join(base_dir, d))
            and not d.startswith('.')  # Skip hidden folders like .ipynb_checkpoints
            and d != '__pycache__'      # Skip Python cache
        )

        # Validate folders have required files
        valid_folders = []
        for folder in self.folders:
            folder_path = os.path.join(base_dir, folder)
            required_files = ["frame_1.jpg", "frame_2.jpg", "frame_3.jpg", JSON_NAME]
            if all(os.path.exists(os.path.join(folder_path, f)) for f in required_files):
                valid_folders.append(folder)
            else:
                print(f"Warning: Skipping folder '{folder}' - missing required files")

        self.folders = valid_folders
        print(f"Found {len(self.folders)} valid folders.")

    def __len__(self):
        return len(self.folders)

    def _load_label(self, json_path):
        try:
            with open(json_path, "r", encoding="utf-8") as f:
                data = json.load(f)
            labels = [float(data[group][key]) for group, key in LABEL_KEYS]
            return torch.tensor(labels, dtype=torch.float32)
        except (FileNotFoundError, KeyError, ValueError) as e:
            raise RuntimeError(f"Error loading labels from {json_path}: {e}")

    def _load_concat(self, f1, f2, f3):
        try:
            img1 = Image.open(f1).convert("RGB")
            img2 = Image.open(f2).convert("RGB")
            img3 = Image.open(f3).convert("RGB")

            w, h = img1.size
            concat_img = Image.new("RGB", (w * 3, h))
            concat_img.paste(img1, (0, 0))
            concat_img.paste(img2, (w, 0))
            concat_img.paste(img3, (2 * w, 0))
            return concat_img
        except FileNotFoundError as e:
            raise RuntimeError(f"Error loading images: {e}")

    def __getitem__(self, idx):
        folder_name = self.folders[idx]
        folder_path = os.path.join(self.base_dir, folder_name)

        f1 = os.path.join(folder_path, "frame_1.jpg")
        f2 = os.path.join(folder_path, "frame_2.jpg")
        f3 = os.path.join(folder_path, "frame_3.jpg")
        json_path = os.path.join(folder_path, JSON_NAME)

        # Load images
        concat_img = self._load_concat(f1, f2, f3)
        frame2_img = Image.open(f2).convert("RGB")

        # Apply transforms
        if self.transform is not None:
            concat_img = self.transform(concat_img)
            frame2_img = self.transform(frame2_img)

        # Load labels
        y = self._load_label(json_path)

        return concat_img, frame2_img, y

Center-Aware Model

In [5]:
class CenterAwareConcatModel(nn.Module):

    def __init__(self, backbone_name="resnet18", num_outputs=8):
        super().__init__()

        # --- Backbone: ResNet18 ---
        backbone = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)
        in_features = backbone.fc.in_features
        backbone.fc = nn.Identity()

        # 共享参数
        self.backbone = backbone
        self.backbone_center = backbone

        # MLP head
        self.reg_head = nn.Sequential(
            nn.Linear(in_features * 2, 256),
            nn.ReLU(),
            nn.Linear(256, num_outputs)
        )

    def forward(self, concat_img, frame2_img):

        f_concat = self.backbone(concat_img)
        f_center = self.backbone_center(frame2_img)

        fused = torch.cat([f_center, f_concat], dim=1)
        out = self.reg_head(fused)

        return out


Transform + DataLoader

In [6]:
transform = T.Compose([
    T.Resize((224, 224)),
    T.ToTensor(),
    T.Normalize(mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225]),
])

dataset = FacadeSORDataset(BASE_DIR, transform=transform)

val_size = int(len(dataset) * VAL_RATIO)
train_size = len(dataset) - val_size
train_ds, val_ds = random_split(dataset, [train_size, val_size])

train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True,
                          num_workers=NUM_WORKERS)
val_loader = DataLoader(val_ds, batch_size=BATCH_SIZE, shuffle=False,
                        num_workers=NUM_WORKERS)

print(f"Train: {train_size}, Val: {val_size}")


Found 99 valid folders.
Train: 80, Val: 19


Train

In [7]:
model = CenterAwareConcatModel(num_outputs=len(LABEL_KEYS)).to(device)
criterion = nn.MSELoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=LR)

def run_epoch(loader, training=True):
    model.train() if training else model.eval()
    total_loss = 0
    n = 0

    for concat_img, frame2_img, targets in loader:
        concat_img = concat_img.to(device)
        frame2_img = frame2_img.to(device)
        targets = targets.to(device)

        if training:
            optimizer.zero_grad()

        with torch.set_grad_enabled(training):
            preds = model(concat_img, frame2_img)
            loss = criterion(preds, targets)
            if training:
                loss.backward()
                optimizer.step()

        total_loss += loss.item() * concat_img.size(0)
        n += concat_img.size(0)

    return total_loss / n

for epoch in range(1, NUM_EPOCHS + 1):
    train_loss = run_epoch(train_loader, True)
    val_loss   = run_epoch(val_loader, False)
    print(f"Epoch {epoch} | Train {train_loss:.4f} | Val {val_loss:.4f}")


Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth


100%|██████████| 44.7M/44.7M [00:00<00:00, 152MB/s]


Epoch 1 | Train 37.9079 | Val 29.1669
Epoch 2 | Train 21.2232 | Val 13.4033
Epoch 3 | Train 8.6400 | Val 4.1868
Epoch 4 | Train 2.1780 | Val 1.1153


Save

In [8]:
os.makedirs("checkpoints", exist_ok=True)
torch.save(model.state_dict(), "checkpoints/sor_center_model.pth")
print("Model saved.")


Model saved.


In [9]:
!pip install torch.onnx

Collecting torch.onnx
  Downloading torch_onnx-0.1.25-py3-none-any.whl.metadata (3.1 kB)
Collecting onnxscript>=0.1.0.dev20240831 (from torch.onnx)
  Downloading onnxscript-0.5.7.dev20251119-py3-none-any.whl.metadata (13 kB)
Collecting onnx>=1.16 (from torch.onnx)
  Downloading onnx-1.19.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (7.0 kB)
Collecting onnx_ir<2,>=0.1.12 (from onnxscript>=0.1.0.dev20240831->torch.onnx)
  Downloading onnx_ir-0.1.12-py3-none-any.whl.metadata (3.2 kB)
Downloading torch_onnx-0.1.25-py3-none-any.whl (81 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m81.6/81.6 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading onnx-1.19.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (18.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m18.2/18.2 MB[0m [31m84.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading onnxscript-0.5.7.dev20251119-py3-none-any.whl (692 kB)
[2K   [90m━━━━━━━━━━━━━

ONNX export

In [10]:
# ===== ONNX Export =====

import torch.onnx

# 你模型的输出维度（8 个标签）
NUM_OUTPUTS = len(LABEL_KEYS)   # 你前面定义过 LABEL_KEYS

# 重新构建模型（必须和训练时完全一致）
export_model = CenterAwareConcatModel(num_outputs=NUM_OUTPUTS)
export_model.load_state_dict(torch.load("checkpoints/sor_center_model.pth", map_location="cpu"))
export_model.eval()

# 准备 dummy inputs（形状必须和真实推理时一样）
dummy_concat = torch.randn(1, 3, 224, 224, requires_grad=False)
dummy_frame2 = torch.randn(1, 3, 224, 224, requires_grad=False)

# 导出 ONNX 模型
torch.onnx.export(
    export_model,
    (dummy_concat, dummy_frame2),         # 2 inputs
    "facade_sor.onnx",                   # 导出的文件名
    input_names=["concat_img", "frame2_img"],
    output_names=["scores"],
    dynamic_axes=None,                   # 固定 batch=1，Grasshopper 用这个更稳
    opset_version=17                     # 推荐 ONNX Opset
)

print("ONNX 模型导出成功: facade_sor.onnx")


  torch.onnx.export(


ONNX 模型导出成功: facade_sor.onnx


**Test** torch model

In [None]:
def predict_one(idx):
    folder = os.path.join(BASE_DIR, str(idx))
    f1 = os.path.join(folder, "frame_1.jpg")
    f2 = os.path.join(folder, "frame_2.jpg")
    f3 = os.path.join(folder, "frame_3.jpg")

    concat_img = dataset._load_concat(f1, f2, f3)
    frame2_img = Image.open(f2).convert("RGB")

    concat_img = transform(concat_img).unsqueeze(0).to(device)
    frame2_img = transform(frame2_img).unsqueeze(0).to(device)

    model.eval()
    with torch.no_grad():
        pred = model(concat_img, frame2_img)[0].cpu().numpy().tolist()

    result = {}
    for (group, key), val in zip(LABEL_KEYS, pred):
        result.setdefault(group, {})
        result[group][key] = float(val)

    return result



In [11]:
print(predict_one(1))

NameError: name 'predict_one' is not defined

Test ONNX model

In [12]:
!pip install onnxruntime

Collecting onnxruntime
  Downloading onnxruntime-1.23.2-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (5.1 kB)
Collecting coloredlogs (from onnxruntime)
  Downloading coloredlogs-15.0.1-py2.py3-none-any.whl.metadata (12 kB)
Collecting humanfriendly>=9.1 (from coloredlogs->onnxruntime)
  Downloading humanfriendly-10.0-py2.py3-none-any.whl.metadata (9.2 kB)
Downloading onnxruntime-1.23.2-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (17.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.4/17.4 MB[0m [31m99.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading coloredlogs-15.0.1-py2.py3-none-any.whl (46 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m46.0/46.0 kB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading humanfriendly-10.0-py2.py3-none-any.whl (86 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m86.8/86.8 kB[0m [31m7.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected pack

In [14]:
from model_inference_onnx import FacadeScoringEngineONNX

engine = FacadeScoringEngineONNX("facade_sor.onnx")

result = engine.predict("frame_1.jpg", "frame_2.jpg", "frame_3.jpg")

print(result)


{'stimulus': {'composition_and_proportion': 3.3126060962677, 'material_and_details': 2.7733500003814697, 'color_harmony': 4.18727970123291}, 'organism': {'visual_comfort': 4.167214870452881, 'sense_of_order': 3.7930731773376465, 'preference_score': 3.810302257537842}, 'response': {'visual_saliency': 3.0102109909057617, 'attention_attraction': 4.1726603507995605}}
