In [1]:
import json
import torch
import json
import os
from LayoutDM import CLDM
from torch.utils.data import DataLoader
from diffusers import DDPMScheduler
from PIL import Image
import numpy as np
from dataset import ImageLayout
from tqdm import tqdm

  from .autonotebook import tqdm as notebook_tqdm


## Load Model with Seperate Param dict

In [2]:
device = torch.device('cuda:5' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')

Using device: cuda:5


In [4]:
model_path = '/workspace/joonsm/City_Layout/log_dir/FPN[50]_freeze/checkpoints/checkpoint-250/pytorch_model.bin'
model =CLDM(use_temp=False, backbone_name='resnet50')
model.load_state_dict(torch.load(model_path, map_location=device), strict=False)
model.to(device)
model.eval()

CLDM(
  (extractor): ImageFeatureExtractor(
    (body): ResNet(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Module(
        (0): Module(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu): ReLU(inplace=True)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stat

In [5]:
val = ImageLayout(type='val')
dataset = DataLoader(val, batch_size=256)
diffusion = DDPMScheduler(num_train_timesteps=250, prediction_type='sample', clip_sample=True)

In [6]:
def seg_loss(predicted_box, sample, zero_count):
    src = sample['sr']
    src_list = []
    
    for i in src:
        src_ = Image.open(i)
        src_list.append(src_)
    
    box = predicted_box.cpu().numpy()
    box = (box + 1) / 2
    
    match_list = []

    for i in range(box.shape[0]):
        img = src_list[i]
        width, height = img.size
        cx, cy, w, h = box[i]
        x = int((cx - w / 2) * width)
        y = int((cy - h / 2) * height)
        x2 = int((cx + w / 2) * width)
        y2 = int((cy + h / 2) * height)
        boxes = (x, y, x2, y2)
        
        # 이미지 크롭
        crop = img.crop(boxes)
        crop = np.array(crop)


        if crop.size == 0:
            print(f"Warning: Crop size is zero for box {boxes}.")
            match_list.append(0)  
            zero_count+=1
            continue
        
        blue_channel = crop[:, :, 2]
        blue_channel_flatten = blue_channel.flatten()
        

        match_pixel_size = np.sum(blue_channel_flatten == 128) / blue_channel_flatten.size
        match_list.append(match_pixel_size)
    
    # NaN 방지를 위해 match_list가 비어 있지 않은지 확인
    if len(match_list) == 0:
        print("Error: All crops have zero size. Returning NaN.")
        return float('nan')
    
    value = sum(match_list) / len(match_list)
    print(value)
    return value, zero_count


### Eval|

In [7]:
zero_count = 0
batch_value = []
step = 0
with torch.no_grad():
    for step,batch in tqdm(enumerate(dataset), total=len(dataset)):
        shape = batch['box'].shape
        noisy_batch = {'image':batch['image'].to(device),
                'box': torch.rand(*shape, dtype=torch.float32, device=device)}
        for i in range(250)[::-1]:
            t = torch.tensor([i]*shape[0], device=device)
            with torch.no_grad():
                noise_pred = model(noisy_batch, timesteps=t)
                bbox_pred = diffusion.step(noise_pred, t[0].detach().item(),  noisy_batch['box'], return_dict=True)

                noisy_batch['box'] = bbox_pred.prev_sample
        predicted = bbox_pred.prev_sample
        value,zero_count = seg_loss(predicted, batch,zero_count)
        print(zero_count)
        batch_value.append(value)
        step +=1
final = sum(batch_value)/step

  0%|          | 0/9 [00:00<?, ?it/s]



 11%|█         | 1/9 [05:49<46:35, 349.38s/it]

0.3243056239872953
1


 22%|██▏       | 2/9 [11:31<40:16, 345.28s/it]

0.29779011474502187
1


 33%|███▎      | 3/9 [17:14<34:24, 344.11s/it]

0.29053383264595234
1


 44%|████▍     | 4/9 [23:00<28:43, 344.66s/it]

0.27240659075462625
1


 56%|█████▌    | 5/9 [28:39<22:50, 342.72s/it]

0.2674573326297178
1


 67%|██████▋   | 6/9 [34:20<17:06, 342.18s/it]

0.27073974537591994
1


 78%|███████▊  | 7/9 [40:03<11:24, 342.41s/it]

0.3323314039216478
2


 89%|████████▉ | 8/9 [43:26<04:57, 297.96s/it]

0.3215844844688416
2


100%|██████████| 9/9 [44:50<00:00, 298.96s/it]

0.3962326704799326
2





In [9]:
print('score:', final)  
print('zero_count:', zero_count)

score: 0.30815353322321726
zero_count: 2


### GT 값 구해놓기

tensor(0.1115)