Import necessary packages

In [2]:
import numpy as np
import json
from PIL import Image
import torch
from torchvision import transforms
from PIL import Image
from unet import UNet
import matplotlib.pyplot as plt

File path

In [2]:
pathTrain = 'data/food/train/'
pathVal = 'data/food/val/'
annotation_dir_train = pathTrain + "annotations.json"
objData_train = json.load(open(annotation_dir_train))

annotation_dir_val = pathVal + "annotations.json"
objData_val = json.load(open(annotation_dir_val))

Functions

In [3]:
# def crop_seg(coordinate_matrix, width, height):
def crop_seg(coordinate_matrix):
    new_list = []
    for i in range(0, len(coordinate_matrix)):
        for j in coordinate_matrix[i]:
            new_list.append(j)

    coordinate_matrix = np.array(new_list)
    x_coordinate = coordinate_matrix[::2]
    y_coordinate = coordinate_matrix[1::2]
    top_left = np.array([x_coordinate.min(), y_coordinate.min()])
    bottom_right = np.array([x_coordinate.max(), y_coordinate.max()])
    # temp = bottom_right*1.1
    # bottom_right[0] = min(temp[0], width)
    # bottom_right[1] = min(temp[1], height)

    return top_left, bottom_right

def crop_image_by_mask(mask, width, height):
    array_2d = mask.transpose(1,2,0).reshape(-1, mask.shape[0])
    # sau khi transpose x -> y, y -> x. Do đó Row = x, y = col

    row_array, col_array = np.where(array_2d == 1)
    x_array = row_array
    y_array = col_array
    if x_array.size == 0 or y_array.size == 0:
        return (0, 0, width, height)
    min_x, min_y = x_array.min(), y_array.min()
    max_x, max_y = x_array.max(), y_array.max()

    width_scale = width/512
    height_scale = height/512

    min_x = float(min_x*width_scale)
    min_y = float(min_y*height_scale)
    max_x = float(max_x*width_scale)
    max_y = float(max_y*height_scale)

    return (min_x, min_y, max_x, max_y)


def calculate_IOU(seg_box_1, seg_box_2):
    coor_x = np.sort(np.array([seg_box_1[0], seg_box_1[2], seg_box_2[0], seg_box_2[2]]))[1:3]
    coor_y = np.sort(np.array([seg_box_1[1], seg_box_1[3], seg_box_2[1], seg_box_2[3]]))[1:3]
    s_inter = (coor_x[0]-coor_x[1])*(coor_y[0]-coor_y[1])
    s_union = (seg_box_1[0]-seg_box_1[2])*(seg_box_1[1]-seg_box_1[3]) + \
        (seg_box_2[0]-seg_box_2[2])*(seg_box_2[1]-seg_box_2[3]) - s_inter
    iou = s_inter/s_union

    return iou


In [4]:
val_save_path = 'data/food_new_vrs/val/images/'
original_val_path = 'data/food/val/images/'

Load model

In [5]:
device = "cuda" if torch.cuda.is_available() else "cpu"
model_pth = "./Segmentation_model/Unet/UNet-PyTorch/models/unet.pth"

model = UNet(in_channels=3, num_classes=1).to(device)
model.load_state_dict(torch.load(model_pth, map_location=torch.device(device)))
transform = transforms.Compose([
    transforms.Resize((512, 512)),
    transforms.ToTensor()])

Calculate IoU for valid dataset

In [6]:
num_image_val = 0
sum_iou_val = 0
for element in objData_val['images']:
    key = element['id']
    image_name = element['file_name']
    open_path = original_val_path + image_name
    get_image = Image.open(open_path)
    width, height = get_image.size
    coordinate_matrix = []
    for i in objData_val['annotations']:
        if i['image_id'] == key:
            coordinate_matrix.append(i['segmentation'][0])
    top_left, bottom_right = crop_seg(coordinate_matrix)
    crop_box_1 = (top_left[0], top_left[1], bottom_right[0], bottom_right[1])

    img = transform(get_image).float().to(device)
    img = img.unsqueeze(0)

    pred_mask = model(img)

    pred_mask = pred_mask.squeeze(0).cpu().detach()
    pred_mask = pred_mask.permute(1, 2, 0)
    pred_mask[pred_mask < 0] = 0
    pred_mask[pred_mask > 0] = 1

    mask = pred_mask.numpy()
    crop_box_2 = crop_image_by_mask(mask, width, height)
    iou = calculate_IOU(crop_box_1, crop_box_2)
    sum_iou_val += iou
    num_image_val +=1

avg_iou_val = sum_iou_val/num_image_val

In [7]:
print(avg_iou_val)

0.5057986669849921


Calculate IoU for train dataset

In [8]:
train_save_path = 'data/food_new_vrs/train/images/'
original_train_path = 'data/food/train/images/'

In [9]:
num_image_train = 0
sum_iou_train = 0
for element in objData_train['images']:
    key = element['id']
    image_name = element['file_name']
    open_path = train_save_path + image_name
    get_image = Image.open(open_path)
    width, height = get_image.size
    coordinate_matrix = []
    for i in objData_train['annotations']:
        if i['image_id'] == key:
            coordinate_matrix.append(i['segmentation'][0])
    top_left, bottom_right = crop_seg(coordinate_matrix)
    crop_box_1 = (top_left[0], top_left[1], bottom_right[0], bottom_right[1])

    img = transform(get_image).float().to(device)
    img = img.unsqueeze(0)

    pred_mask = model(img)

    pred_mask = pred_mask.squeeze(0).cpu().detach()
    pred_mask = pred_mask.permute(1, 2, 0)
    pred_mask[pred_mask < 0] = 0
    pred_mask[pred_mask > 0] = 1

    mask = pred_mask.numpy()
    crop_box_2 = crop_image_by_mask(mask, width, height)
    iou = calculate_IOU(crop_box_1, crop_box_2)
    sum_iou_train += iou
    num_image_train += 1

avg_iou = sum_iou_train/num_image_train

In [11]:
print(avg_iou)

0.419224194536759


data/food_new_vrs/train/images/135204.jpg

In [3]:
# img = transform(get_image).float().to(device)
# img = img.unsqueeze(0)
# pred_mask = model(img)
# pred_mask = pred_mask.squeeze(0).cpu().detach()
# pred_mask = pred_mask.permute(1, 2, 0)
# pred_mask[pred_mask < 0] = 0
# pred_mask[pred_mask > 0] = 1
# mask = pred_mask.numpy()

In [4]:
# plt.imshow(mask)
# plt.show()