## inference
This notebook provides inference for semantic segmentation and instance segmentation respectively.
 
Use the model to predict the results on the test set and calculate the accuracy (iou, fp, fn, tp, tf, ap), and save the visualization

#### import

In [None]:
import cv2
import os
import csv
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import json
import random
import csv

from PIL import Image, ImageDraw
from inference_sdk import InferenceHTTPClient
from inference_sdk import InferenceHTTPClient, InferenceConfiguration

import shapely.geometry as geom
from shapely.geometry import Polygon
from shapely.errors import TopologicalError

from collections import defaultdict

from ultralytics import YOLO
from ultralytics.utils.plotting import Annotator, colors

from roboflow import Roboflow


from pycocotools.coco import COCO
from tensorflow.keras.metrics import Precision, Recall, IoU
from sklearn.metrics import confusion_matrix
import base64
import io



#### semantic segmentation

test on single image, semantic segmentation

In [None]:
# test semantic segmentation on a single image
# 只能这样设置，原因未知


# 2048 tile的semantic segmentation
# rf = Roboflow(api_key="uZgVV5Mu30Veqelqd61T")
# project = rf.workspace().project("ss-favela-2048to1024-only-mask") # input model id
# model = project.version("1").model # input model version

# image_path = 'demo/original_2048_RGB.jpg'

# # print(model.predict(image_path).json())
# prediction = model.predict(image_path)

# print(prediction.json())
# model.predict(image_path).save("demo/prediction_2048.jpg")


#  400 tile的semantic segmentation
rf = Roboflow(api_key="ukRLimOMKqvRJtEskQFC")
project = rf.workspace().project("semantic-segmentation-satellite") # input model id
model = project.version("1").model # input model version

image_path = 'demo/original.jpg'

prediction = model.predict(image_path)

print(prediction.json())
model.predict(image_path).save("demo/prediction_400.jpg")



In [None]:
import os
import cv2
import numpy as np
import base64
from PIL import Image, ImageDraw
import io
from roboflow import Roboflow
import matplotlib.pyplot as plt

# Initialize Roboflow and model
rf = Roboflow(api_key="ukRLimOMKqvRJtEskQFC")
project = rf.workspace().project("semantic-segmentation-satellite")
model = project.version("1").model

# Input image path
image_path = 'SS_Rio_tile1024_stride0.v1i.coco-segmentation/test/000000005993_png.rf.3459be7a8ba7c2dc7f73edc5d5ea9a44.jpg'

# Perform prediction
prediction = model.predict(image_path)
prediction_data = prediction.json()

# Decode the predicted segmentation mask
segmentation_mask_b64 = prediction_data['predictions'][0]['segmentation_mask']
decoded_mask = base64.b64decode(segmentation_mask_b64)
pred_mask = np.array(Image.open(io.BytesIO(decoded_mask)))

# Load the original image
original_image = cv2.imread(image_path)
original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)  # Convert to RGB for visualization

# Print sizes of the mask and the original image
print(f"Original image size: {original_image.shape[:2]} (height, width)")
print(f"Predicted mask size: {pred_mask.shape[:2]} (height, width)")

# Resize the predicted mask to match the original image dimensions
if pred_mask.shape[:2] != original_image.shape[:2]:
    print(f"Resizing predicted mask from {pred_mask.shape[:2]} to {original_image.shape[:2]}")
    pred_mask_resized = cv2.resize(pred_mask, (original_image.shape[1], original_image.shape[0]), interpolation=cv2.INTER_NEAREST)
else:
    pred_mask_resized = pred_mask

# Convert the predicted mask to an overlay
overlay = np.zeros_like(original_image, dtype=np.uint8)
overlay[pred_mask_resized > 0] = [255, 0, 0]  # Red color for mask

# Blend the original image and the mask overlay
blended_image = cv2.addWeighted(original_image, 0.7, overlay, 0.3, 0)

# Visualization
plt.figure(figsize=(10, 10))
plt.imshow(blended_image)
plt.axis('off')
plt.title('Original Image with Predicted Mask Overlay')
plt.show()


In [None]:
from PIL import ImageFont, ImageDraw
import io
import os
import json
import csv
import base64
import numpy as np
from PIL import Image, ImageDraw
import cv2
from matplotlib import pyplot as plt
from roboflow import Roboflow
from sklearn.metrics import precision_recall_curve, average_precision_score

def add_labels(image, label, iou=None, font_size=30):
    """Add a label and optional IoU score at the top of the image."""
    # Create a new image with space for the label above the original image
    width, height = image.size
    label_height = font_size + 20  # Adjusted space for the label
    new_image = Image.new("RGBA", (width, height + label_height), (0, 0, 0, 255))  # Black background for the label

    # Paste the original image below the label space
    new_image.paste(image, (0, label_height))

    # Draw the label in the new image
    draw = ImageDraw.Draw(new_image)

    # Use a TTF font with the specified size
    try:
        font = ImageFont.truetype("calibri.ttf", font_size)  # Use Times New Roman or any other font
    except IOError:
        font = ImageFont.load_default()  # Fallback to default font if TTF font is unavailable

    # Create the text
    text = label
    if iou is not None:
        text += f" (IoU: {iou:.4f})"

    # Calculate text position (centered at the top)
    bbox = draw.textbbox((0, 0), text, font=font)  # Get text bounding box
    text_width = bbox[2] - bbox[0]  # Text width
    text_height = bbox[3] - bbox[1]  # Text height
    text_position = ((width - text_width) // 2, (label_height - text_height) // 2)  # Centered within label height

    # Draw the text
    draw.text(text_position, text, fill="white", font=font)

    return new_image

def combine_images_with_labels(images, labels, iou=None, border_width=20, font_size=30, outer_margin=20):
    """
    Combine multiple images with labels and black borders between them.
    
    Parameters:
        images (list): List of PIL.Image objects to combine.
        labels (list): List of labels corresponding to the images.
        iou (float, optional): IoU score for the last image.
        border_width (int): Width of the black border between images.
        font_size (int): Font size for the labels.
    
    Returns:
        PIL.Image: A new image combining all input images with labels and black borders.
    """
    assert len(images) == len(labels), "Images and labels must have the same length."

    labeled_images = []
    for i, img in enumerate(images):
        # Add IoU only to the last image
        iou_value = iou if i == len(images) - 1 else None
        labeled_image = add_labels(img, labels[i], iou_value, font_size=font_size)
        labeled_images.append(labeled_image)

    # Calculate total width and maximum height
    widths = [img.width for img in labeled_images]
    total_width = sum(widths) + border_width * (len(labeled_images) - 1)
    max_height = max(img.height for img in labeled_images)

    # Create a new image with black background
    combined_image = Image.new("RGBA", (total_width, max_height), (0, 0, 0, 255))

    # Paste images with black borders
    x_offset = 0
    for idx, img in enumerate(labeled_images):
        combined_image.paste(img, (x_offset, 0))
        x_offset += img.width + border_width
    
    # Add outer margin (black padding)
    final_image_with_margin = Image.new(
        "RGBA",
        (combined_image.width + 2 * outer_margin, combined_image.height + 2 * outer_margin),
        (0, 0, 0, 255),
    )
    final_image_with_margin.paste(combined_image, (outer_margin, outer_margin))

    return final_image_with_margin

##### IOU calculation test using tf

In [None]:
import tensorflow as tf

# 创建一个MeanIoU实例
m = tf.keras.metrics.MeanIoU(num_classes=2)

# 更新状态
m.update_state([0, 0, 1, 1], [0, 1, 0, 1], sample_weight=[0.3, 0.3, 0.3, 0.1])

# 计算结果
result = m.result().numpy()
print(result)

In [25]:
'''
得到每张图片的mean IOU
以下代码仅供数值比较，这样计算是有问题的
最后mean IOU是混淆矩阵累加而不是mean IOU取均值
'''

# Initialize Roboflow and model
rf = Roboflow(api_key="uZgVV5Mu30Veqelqd61T")
project = rf.workspace().project("ss_rio_tile1024_stride0") # input model id
model = project.version("1").model # input model version

# COCO数据集路径
image_dir = 'SS_Rio_tile1024_stride0.v1i.coco-segmentation/valid'
coco_json_path = os.path.join(image_dir, "_annotations.coco.json")

# 加载COCO数据集
with open(coco_json_path) as f:
    coco_data = json.load(f)

image_ids = [image['id'] for image in coco_data['images']]  # 获取所有图片的ID

# 初始化 MeanIoU 度量
num_classes = 2  # 假设有2个类别（背景和目标）
mean_iou_metric = MeanIoU(num_classes=num_classes)
iou_metric = IoU(num_classes=num_classes, target_class_ids=[1]) 


# 初始化COCO类，用于获取真实标签
coco = COCO(coco_json_path)

output_dir = os.path.join(image_dir, "segmentation_results_fp_fn")
iou_csv_path = os.path.join(output_dir, "iou_results_tf.csv")
with open(iou_csv_path, mode='w', newline='') as csv_file:
    csv_writer = csv.writer(csv_file)
    csv_writer.writerow(["Image Name", "mIoU", "IOU for slum(id=1)","tn", "fp", "fn", "tp"])  # Header for CSV
    
    # 遍历所有图片，获取 COCO 数据集的标签以及模型的预测结果
    for image_id in image_ids:
        # 获取真实标签
        anns = coco.loadAnns(coco.getAnnIds(imgIds=image_id))  # 获取与图片ID对应的所有标注
        image_info = coco.loadImgs(image_id)[0]
        image_file_name = image_info['file_name']
        image_path = os.path.join(image_dir, image_info['file_name'])

        # 读取图像并进行预处理
        image = plt.imread(image_path)  # 使用matplotlib读取图像
        height, width, _ = image.shape

        # 获取图像的实际标签 (ground truth mask)
        true_mask = np.zeros((height, width), dtype=np.uint8)
        for ann in anns:
            mask = coco.annToMask(ann)
            true_mask[mask == 1] = 1  # 用真实标签填充

        # 通过模型进行预测（假设您有一个训练好的模型）
        result = model.predict(image_path)
        prediction = result.json()
        segmentation_mask_b64 = prediction['predictions'][0]['segmentation_mask']
        mask_data = base64.b64decode(segmentation_mask_b64)
        mask_image = Image.open(io.BytesIO(mask_data)).convert("L")  # 转为灰度图
        predicted_mask = np.array(mask_image)  # 转换为NumPy数组
        predicted_mask_resized = cv2.resize(predicted_mask, (true_mask.shape[1], true_mask.shape[0]))

        conf_matrix = confusion_matrix(true_mask.flatten(), predicted_mask_resized.flatten(), labels=[0, 1])
        tn, fp, fn, tp = conf_matrix.ravel()

        # Compute IoU
        mean_iou_metric.update_state(true_mask, predicted_mask_resized)
        iou_metric.update_state(true_mask, predicted_mask_resized)

        image_iou = mean_iou_metric.result().numpy()
        iou = iou_metric.result().numpy()

        csv_writer.writerow([image_file_name, image_iou, iou,tn, fp, fn, tp])  # Write the image name and its IoU
        mean_iou_metric.reset_state()  # 知道逻辑之后不应该每张图片单独计
        iou_metric.reset_state()  # 知道逻辑之后不应该每张图片单独计算算

    # # 计算 Mean IoU
    # mean_iou_value = mean_iou_metric.result().numpy()
    # print(f"Mean IoU: {mean_iou_value}")
print(f"IOU results saved to {iou_csv_path}")


loading Roboflow workspace...
loading Roboflow project...
loading annotations into memory...
Done (t=0.06s)
creating index...
index created!
IOU results saved to SS_Rio_tile1024_stride0.v1i.coco-segmentation/valid\segmentation_results_fp_fn\iou_results_tf.csv


In [None]:
false_positives = tf.keras.metrics.FalsePositives()
false_negatives = tf.keras.metrics.FalseNegatives()

In [None]:
print(true_mask.shape)
print(np.unique(true_mask))

print(predicted_mask_resized.shape)
print(np.unique(predicted_mask_resized))

In [None]:
from sklearn.metrics import confusion_matrix

true_mask_flat = true_mask.flatten()
predicted_mask_flat = predicted_mask_resized.flatten()

# 计算混淆矩阵
conf_matrix = confusion_matrix(true_mask_flat, predicted_mask_flat, labels=[0, 1])

print("Confusion Matrix:")
print(conf_matrix)

In [None]:
'''
对一组图片
'''

# 初始化 MeanIoU 度量
num_classes = 2  # 假设有2个类别（背景和目标）
mean_iou_metric = MeanIoU(num_classes=num_classes)
iou = IoU(num_classes=num_classes, target_class_ids=[1]) 

# 初始化COCO类，用于获取真实标签
coco = COCO(coco_json_path)

# output_dir = os.path.join(image_dir, "segmentation_results_fp_fn")
# iou_csv_path = os.path.join(output_dir, "iou_results_tf.csv")
# with open(iou_csv_path, mode='w', newline='') as csv_file:
#     csv_writer = csv.writer(csv_file)
#     csv_writer.writerow(["Image Name", "IoU"])  # Header for CSV
    
# 遍历所有图片，获取 COCO 数据集的标签以及模型的预测结果
for image_id in image_ids:
    # 获取真实标签
    anns = coco.loadAnns(coco.getAnnIds(imgIds=image_id))  # 获取与图片ID对应的所有标注
    image_info = coco.loadImgs(image_id)[0]
    image_file_name = image_info['file_name']
    image_path = os.path.join(image_dir, image_info['file_name'])

    # 读取图像并进行预处理
    image = plt.imread(image_path)  # 使用matplotlib读取图像
    height, width, _ = image.shape

    # 获取图像的实际标签 (ground truth mask)
    true_mask = np.zeros((height, width), dtype=np.uint8)
    for ann in anns:
        mask = coco.annToMask(ann)
        true_mask[mask == 1] = 1  # 用真实标签填充

    # 通过模型进行预测（假设您有一个训练好的模型）
    result = model.predict(image_path)
    prediction = result.json()
    segmentation_mask_b64 = prediction['predictions'][0]['segmentation_mask']
    mask_data = base64.b64decode(segmentation_mask_b64)
    mask_image = Image.open(io.BytesIO(mask_data)).convert("L")  # 转为灰度图
    predicted_mask = np.array(mask_image)  # 转换为NumPy数组
    predicted_mask_resized = cv2.resize(predicted_mask, (true_mask.shape[1], true_mask.shape[0]))

    # Compute IoU
    mean_iou_metric.update_state(true_mask, predicted_mask_resized)
    iou.update_state(true_mask, predicted_mask_resized)
    # image_iou = mean_iou_metric.result().numpy()
    # csv_writer.writerow([image_file_name, image_iou])  # Write the image name and its IoU
    # mean_iou_metric.reset_state()

# # 计算 Mean IoU
mean_iou_value = mean_iou_metric.result().numpy()
target_cate_iou = iou.result().numpy()
print(f"Mean IoU: {mean_iou_value} \n target_cate_iou:{target_cate_iou}")


IOU:

tf: 0.7343394756317139
- 但是对每张图片处理取平均是0.69？？ - solved 混淆矩阵累加
- 之前的方法算出来是0.56左右 - solved 只算了一类


算一类 -> 算多类 -> 混淆矩阵累加
roboflow: 0.77

In [None]:
mean_iou_metric.reset_state()
mean_iou_metric.result()


In [None]:
image_id = 1  # 选择一个单一的图片 ID

with open(coco_json_path) as f:
    coco_data = json.load(f)
img_info = next((img for img in coco_data['images'] if img['id'] == image_id), None)

# 获取真实标签
anns = coco.loadAnns(coco.getAnnIds(imgIds=image_id))  # 获取与图片ID对应的所有标注
image_info = coco.loadImgs(image_id)[0]
image_path = os.path.join(image_dir, image_info['file_name'])

# 读取图像并进行预处理
image = plt.imread(image_path)  # 使用matplotlib读取图像
height, width, _ = image.shape

# 获取图像的实际标签 (ground truth mask)
true_mask = np.zeros((height, width), dtype=np.uint8)
for ann in anns:
    mask = coco.annToMask(ann)
    true_mask[mask == 1] = 1  # 用真实标签填充

# 通过模型进行预测（假设您有一个训练好的模型）
result = model.predict(image_path)
prediction = result.json()
segmentation_mask_b64 = prediction['predictions'][0]['segmentation_mask']
mask_data = base64.b64decode(segmentation_mask_b64)
mask_image = Image.open(io.BytesIO(mask_data)).convert("L")  # 转为灰度图
predicted_mask = np.array(mask_image)  # 转换为NumPy数组
predicted_mask_resized = cv2.resize(predicted_mask, (true_mask.shape[1], true_mask.shape[0]))  # 缩放预测的mask以匹配实际图像的大小

# 计算 IoU
mean_iou_metric.reset_state()
mean_iou_metric.update_state(true_mask, predicted_mask_resized)
mean_iou_metric.result()

# 计算 Mean IoU
mean_iou_value = mean_iou_metric.result().numpy()
print(f"Mean IoU for image {image_id},{img_info['file_name']}: {mean_iou_value}")

In [None]:
def compute_iou_for_each_class(gt_mask, pred_mask, num_classes):
    iou_list = []
    for class_id in range(num_classes):
        gt_class_binary = (gt_mask == class_id).astype(np.uint8)
        pred_class_binary = (pred_mask == class_id).astype(np.uint8)

        intersection = np.logical_and(gt_class_binary, pred_class_binary).sum()
        union = np.logical_or(gt_class_binary, pred_class_binary).sum()

        iou = intersection / union if union > 0 else 0
        iou_list.append(iou)
    return np.mean(iou_list)  # mIoU is the average of individual class IoUs


In [None]:
# # for visulization only (improved) 



# # Initialize Roboflow and model
# # rf = Roboflow(api_key="uZgVV5Mu30Veqelqd61T")
# # project = rf.workspace().project("ss-tijuca2019_tile400_stride0")
# # model = project.version("1").model  # Specify version

# # # Input and output directories
# # image_dir = 'SS-Tijuca2019_tile400_stride0.v1i.coco-segmentation/test'


# # Initialize Roboflow and model
# rf = Roboflow(api_key="uZgVV5Mu30Veqelqd61T")
# project = rf.workspace().project("ss_rio_tile1024_stride0") # input model id
# model = project.version("1").model # input model version

# # Input and output directories
# image_dir = 'SS_Rio_tile1024_stride0.v1i.coco-segmentation/test'


# coco_json_path = os.path.join(image_dir, "_annotations.coco.json")
# output_dir = os.path.join(image_dir, "segmentation_results_fp_fn_portfolio")
# os.makedirs(output_dir, exist_ok=True)

# # Initialize CSV for IoU results
# iou_csv_path = os.path.join(output_dir, "iou_results.csv")
# with open(iou_csv_path, mode='w', newline='') as csv_file:
#     csv_writer = csv.writer(csv_file)
#     csv_writer.writerow(["Image Name", "IoU", "AP50", "AP75", "mAP", "fn_count", "fp_count", "tp_count","total_pixels"])

#     with open(coco_json_path) as f:
#         coco_data = json.load(f)

#     image_ids = [image['id'] for image in coco_data['images']]
#     threshold_list = np.linspace(0.5, 0.95, 10)  # IoU thresholds for mAP
    
#     for image_id in image_ids:
#         if image_id != -1: 
#             image_info = next(img for img in coco_data['images'] if img['id'] == image_id)
#             image_file_name = image_info['file_name']
#             img_path = os.path.join(image_dir, image_file_name)

#             # Load ground truth annotations
#             gt_annotations = [anno for anno in coco_data['annotations'] if anno['image_id'] == image_id]
#             im = cv2.imread(img_path)
#             gt_mask = np.zeros((im.shape[0], im.shape[1]), dtype=np.uint8)

#             for annotation in gt_annotations:
#                 segmentation = annotation['segmentation'][0]
#                 points = [(segmentation[i], segmentation[i + 1]) for i in range(0, len(segmentation), 2)]
#                 cv2.fillPoly(gt_mask, [np.array(points, dtype=np.int32)], 1)

#             # Perform inference
#             result = model.predict(img_path)
#             prediction = result.json()

#             if not prediction['predictions']:
#                 print(f"No predictions for {image_file_name}. Skipping.")
#                 csv_writer.writerow([image_file_name, "No predictions"])
#                 continue

#             # Decode prediction mask
#             segmentation_mask_b64 = prediction['predictions'][0]['segmentation_mask']
#             decoded_mask = base64.b64decode(segmentation_mask_b64)
#             pred_mask = np.array(Image.open(io.BytesIO(decoded_mask)))

#             # Resize pred_mask to match gt_mask dimensions
#             pred_mask_resized = cv2.resize(pred_mask, (gt_mask.shape[1], gt_mask.shape[0]), interpolation=cv2.INTER_NEAREST)
#             pred_mask_resized = (pred_mask_resized > 0).astype(np.uint8) 



#             # Flatten masks for precision-recall computation
#             gt_flat = (gt_mask.flatten() > 0).astype(np.uint8)  # Ensure ground truth values are {0, 1}
#             pred_flat = pred_mask_resized.flatten() / 255.0

#             # Calculate Precision-Recall curve
#             precision, recall, _ = precision_recall_curve(gt_flat, pred_flat)
#             ap50 = average_precision_score(gt_flat, pred_flat)  # AP50 calculation

#             # Calculate AP at each IoU threshold
#             ap_values = []
#             for threshold in threshold_list:
#                 pred_binary = (pred_flat >= threshold * 255).astype(np.uint8)  # Binarize predictions
#                 precision, recall, _ = precision_recall_curve(gt_flat, pred_binary)
#                 ap = average_precision_score(gt_flat, pred_binary)
#                 ap_values.append(ap)

#             # Extract AP50 and AP75
#             ap50 = ap_values[0]  # First value corresponds to IoU=0.5
#             ap75 = ap_values[5]  # Sixth value corresponds to IoU=0.75
#             mAP = np.mean(ap_values)  # Mean of all AP values

            
#             # Construct confusion matrix for binary segmentation
#             conf_matrix = np.zeros((2, 2), dtype=np.int32)
#             for gt, pred in zip(gt_flat, (pred_flat >= 0.5).astype(np.uint8)):
#                 conf_matrix[gt, pred] += 1

#             # Error mask
#             error_mask = cv2.bitwise_and(gt_mask, cv2.bitwise_not(pred_mask_resized))
#             fn_mask = cv2.bitwise_and(gt_mask, cv2.bitwise_not(pred_mask_resized))          # False Negative (FN): Ground Truth is 1, Prediction is 0
#             fp_mask = cv2.bitwise_and(pred_mask_resized, cv2.bitwise_not(gt_mask))        # False Positive (FP): Prediction is 1, Ground Truth is 0
#             tp_mask = cv2.bitwise_and(gt_mask, pred_mask_resized)

#             # IoU calculation
#             # gt_binary = (gt_mask > 0).astype(np.uint8)
#             # pred_binary = (pred_mask_resized > 0).astype(np.uint8)

#             # intersection = np.logical_and(gt_binary, pred_binary).sum()
#             # union = np.logical_or(gt_binary, pred_binary).sum()
#             # iou = intersection / union if union > 0 else 0

#             iou = compute_iou_for_each_class(gt_mask,pred_mask_resized,2)

#             # Calculate FN, FP, and TP counts
#             fn_count = np.sum(fn_mask)
#             fp_count = np.sum(fp_mask)
#             tp_count = np.sum(gt_mask & pred_mask_resized)
#             total_pixels = gt_mask.size

#             # Write to CSV
#             csv_writer.writerow([image_file_name, iou, ap50, ap75, mAP, fn_count, fp_count, tp_count, total_pixels])

#             # Visualization: overlay ground truth and prediction on the original image
#             original_image = Image.open(img_path).convert("RGBA")

#             # Ground truth overlay
#             overlay_gt = Image.new("RGBA", original_image.size, (255, 255, 255, 0))
#             draw_gt = ImageDraw.Draw(overlay_gt)
#             for annotation in gt_annotations:
#                 segmentation = annotation['segmentation'][0]
#                 points = [(segmentation[i], segmentation[i + 1]) for i in range(0, len(segmentation), 2)]
#                 draw_gt.polygon(points, fill=(255, 255, 255, 100))  # Semi-transparent red for ground truth
#             combined_gt = Image.alpha_composite(original_image, overlay_gt)

#             # Prediction overlay
#             # Create a transparent RGBA image for the prediction overlay
#             overlay_pred = Image.new("RGBA", original_image.size, (255, 255, 255, 0))
#             pred_mask_grayscale = Image.fromarray((pred_mask_resized * 255).astype(np.uint8), mode="L")  # 0 for background, 255 for prediction
#             pred_mask_colored = Image.new("RGBA", pred_mask_grayscale.size, (0, 0, 255, 30))  # Blue with transparency
#             overlay_pred = Image.composite(pred_mask_colored, overlay_pred, pred_mask_grayscale)
#             combined_pred = Image.alpha_composite(original_image, overlay_pred)

#             # # Create a PIL image with black background and white error regions
#             error_image = Image.fromarray(
#                 np.where(
#                     error_mask[..., None] > 0,  # Condition for error pixels
#                     [255, 255, 255, 255],         # White for error pixels
#                     [0, 0, 0, 255]                # Black for non-error pixels
#                 ).astype(np.uint8)
#             )

#             tp_image = Image.fromarray(
#                 np.where(
#                     tp_mask[..., None] > 0,  # Condition for error pixels
#                     [255, 255, 255, 255],         # White for error pixels
#                     [0, 0, 0, 255]                # Black for non-error pixels
#                 ).astype(np.uint8)
#             )


#             # Error overlay (FN in red, FP in blue)
#             overlay_error = Image.new("RGBA", original_image.size, (0, 0, 0, 255))
#             fn_pil = Image.fromarray((fn_mask > 0).astype(np.uint8) * 255).convert("L")
#             fp_pil = Image.fromarray((fp_mask > 0).astype(np.uint8) * 255).convert("L")
#             tp_pil = Image.fromarray((tp_mask > 0).astype(np.uint8) * 255).convert("L")
#             error_draw = ImageDraw.Draw(overlay_error)
#             error_draw.bitmap((0, 0), fn_pil, fill=(255, 255, 0, 100))  # Red for False Negative, (255, 0, 0, 100)   (50, 50, 50, 255)
#             error_draw.bitmap((0, 0), fp_pil, fill=(0, 255, 255, 100))  # Blue for False Positive,(0, 0, 255, 100)   (200, 200, 200, 255)
#             error_draw.bitmap((0, 0), tp_pil, fill=(0, 0, 0, 255)) 
#             combined_error = overlay_error

#             # Plot results
#             fig, axs = plt.subplots(1, 5, figsize=(20, 10))
#             axs[0].imshow(original_image)
#             axs[0].axis('off')
#             axs[0].set_title('Original Image')

#             axs[1].imshow(combined_gt)
#             axs[1].axis('off')
#             axs[1].set_title('Ground Truth Overlay')

#             axs[2].imshow(combined_pred)
#             axs[2].axis('off')
#             axs[2].set_title('Prediction Overlay')

#             # tp_mask
#             axs[3].imshow(np.array(tp_mask)) 
#             axs[3].axis('off')
#             axs[3].set_title(f'True Positive Overlay')

#             axs[4].imshow(np.array(combined_error))  # Error visualization (FN & FP) 
#             axs[4].axis('off')
#             axs[4].set_title("Error (IoU: {iou:.4f})")

#             # save img
#             output_path = os.path.join(output_dir, f"{image_file_name}_result.png")

#             images = [original_image, combined_gt, combined_pred, tp_image, combined_error]
#             labels = ["Original Image", "Ground Truth", "Prediction","True Positive Overlay", "Error Map"]
#             final_image = combine_images_with_labels(images, labels, iou=iou, border_width=30, font_size=30,outer_margin=30)
#             final_image.save(output_path)
            
#             # plt.savefig(output_path, bbox_inches='tight', pad_inches=0.1)
#             # plt.close(fig)
#             plt.show()

#             print(f"Image: {image_file_name} | IoU: {iou:.4f} | AP50: {ap50:.4f} | AP75: {ap75:.4f} | mAP: {mAP:.4f} | mAP: {mAP:.4f} ")

# print(f"IoU results saved to {iou_csv_path}")

In [22]:
# improved 



# Initialize Roboflow and model
# rf = Roboflow(api_key="uZgVV5Mu30Veqelqd61T")
# project = rf.workspace().project("ss-tijuca2019_tile400_stride0")
# model = project.version("1").model  # Specify version

# # Input and output directories
# image_dir = 'SS-Tijuca2019_tile400_stride0.v1i.coco-segmentation/test'


# Initialize Roboflow and model
rf = Roboflow(api_key="uZgVV5Mu30Veqelqd61T")
project = rf.workspace().project("ss_rio_tile1024_stride0") # input model id
model = project.version("1").model # input model version

# Input and output directories
image_dir = 'SS_Rio_tile1024_stride0.v1i.coco-segmentation/test'


coco_json_path = os.path.join(image_dir, "_annotations.coco.json")
output_dir = os.path.join(image_dir, "segmentation_results_fp_fn")
os.makedirs(output_dir, exist_ok=True)

# Initialize CSV for IoU results
iou_csv_path = os.path.join(output_dir, "iou_results.csv")
with open(iou_csv_path, mode='w', newline='') as csv_file:
    csv_writer = csv.writer(csv_file)
    csv_writer.writerow(["Image Name", "IoU", "AP50", "AP75", "mAP", "fn_ratio", "fp_ratio", "tp_ratio"])

    with open(coco_json_path) as f:
        coco_data = json.load(f)

    image_ids = [image['id'] for image in coco_data['images']]
    threshold_list = np.linspace(0.5, 0.95, 10)  # IoU thresholds for mAP
    
    for image_id in image_ids:
        if image_id != -1: 
            image_info = next(img for img in coco_data['images'] if img['id'] == image_id)
            image_file_name = image_info['file_name']
            img_path = os.path.join(image_dir, image_file_name)

            # Load ground truth annotations
            gt_annotations = [anno for anno in coco_data['annotations'] if anno['image_id'] == image_id]
            im = cv2.imread(img_path)
            gt_mask = np.zeros((im.shape[0], im.shape[1]), dtype=np.uint8)

            for annotation in gt_annotations:
                segmentation = annotation['segmentation'][0]
                points = [(segmentation[i], segmentation[i + 1]) for i in range(0, len(segmentation), 2)]
                cv2.fillPoly(gt_mask, [np.array(points, dtype=np.int32)], 1)

            # Perform inference
            result = model.predict(img_path)
            prediction = result.json()

            if not prediction['predictions']:
                print(f"No predictions for {image_file_name}. Skipping.")
                csv_writer.writerow([image_file_name, "No predictions"])
                continue

            # Decode prediction mask
            segmentation_mask_b64 = prediction['predictions'][0]['segmentation_mask']
            decoded_mask = base64.b64decode(segmentation_mask_b64)
            pred_mask = np.array(Image.open(io.BytesIO(decoded_mask)))

            # Resize pred_mask to match gt_mask dimensions
            pred_mask_resized = cv2.resize(pred_mask, (gt_mask.shape[1], gt_mask.shape[0]), interpolation=cv2.INTER_NEAREST)
            pred_mask_resized = (pred_mask_resized > 0).astype(np.uint8) 



            # Flatten masks for precision-recall computation
            gt_flat = (gt_mask.flatten() > 0).astype(np.uint8)  # Ensure ground truth values are {0, 1}
            pred_flat = pred_mask_resized.flatten() / 255.0

            # Calculate Precision-Recall curve
            precision, recall, _ = precision_recall_curve(gt_flat, pred_flat)
            ap50 = average_precision_score(gt_flat, pred_flat)  # AP50 calculation

            # Calculate AP at each IoU threshold
            ap_values = []
            for threshold in threshold_list:
                pred_binary = (pred_flat >= threshold * 255).astype(np.uint8)  # Binarize predictions
                precision, recall, _ = precision_recall_curve(gt_flat, pred_binary)
                ap = average_precision_score(gt_flat, pred_binary)
                ap_values.append(ap)

            # Extract AP50 and AP75
            ap50 = ap_values[0]  # First value corresponds to IoU=0.5
            ap75 = ap_values[5]  # Sixth value corresponds to IoU=0.75
            mAP = np.mean(ap_values)  # Mean of all AP values

            
            # Construct confusion matrix for binary segmentation
            conf_matrix = np.zeros((2, 2), dtype=np.int32)
            for gt, pred in zip(gt_flat, (pred_flat >= 0.5).astype(np.uint8)):
                conf_matrix[gt, pred] += 1

            # Error mask
            error_mask = cv2.bitwise_and(gt_mask, cv2.bitwise_not(pred_mask_resized))
            fn_mask = cv2.bitwise_and(gt_mask, cv2.bitwise_not(pred_mask_resized))          # False Negative (FN): Ground Truth is 1, Prediction is 0
            fp_mask = cv2.bitwise_and(pred_mask_resized, cv2.bitwise_not(gt_mask))        # False Positive (FP): Prediction is 1, Ground Truth is 0
            tp_mask = cv2.bitwise_and(gt_mask, pred_mask_resized)

            # IoU calculation
            # gt_binary = (gt_mask > 0).astype(np.uint8)
            # pred_binary = (pred_mask_resized > 0).astype(np.uint8)

            # intersection = np.logical_and(gt_binary, pred_binary).sum()
            # union = np.logical_or(gt_binary, pred_binary).sum()
            # iou = intersection / union if union > 0 else 0

            iou = compute_iou_for_each_class(gt_mask,pred_mask_resized,2)

            # Calculate FN, FP, and TP counts
            fn_count = np.sum(fn_mask)
            fp_count = np.sum(fp_mask)
            tp_count = np.sum(gt_mask & pred_mask_resized)
            total_pixels = gt_mask.size

            # Calculate ratios
            fn_ratio = fn_count / total_pixels
            fp_ratio = fp_count / total_pixels
            tp_ratio = tp_count / total_pixels

            # Write to CSV
            csv_writer.writerow([image_file_name, iou, ap50, ap75, mAP, fn_ratio, fp_ratio, tp_ratio])

            # Visualization: overlay ground truth and prediction on the original image
            original_image = Image.open(img_path).convert("RGBA")

            # Ground truth overlay
            overlay_gt = Image.new("RGBA", original_image.size, (255, 255, 255, 0))
            draw_gt = ImageDraw.Draw(overlay_gt)
            for annotation in gt_annotations:
                segmentation = annotation['segmentation'][0]
                points = [(segmentation[i], segmentation[i + 1]) for i in range(0, len(segmentation), 2)]
                draw_gt.polygon(points, fill=(255, 255, 255, 100))  # Semi-transparent red for ground truth
            combined_gt = Image.alpha_composite(original_image, overlay_gt)

            # Prediction overlay
            overlay_pred = Image.new("RGBA", original_image.size, (255, 255, 255, 0))
            draw_pred = ImageDraw.Draw(overlay_pred)
            pred_mask_pil = Image.fromarray(pred_mask_resized)
            draw_pred.bitmap((0, 0), pred_mask_pil, fill=(0, 0, 255, 30))  # Semi-transparent blue for prediction
            combined_pred = Image.alpha_composite(original_image, overlay_pred)



            # # Create a PIL image with black background and white error regions
            # error_image = Image.fromarray(
            #     np.where(
            #         error_mask[..., None] == 255,  # Condition for error pixels
            #         [255, 255, 255, 255],         # White for error pixels
            #         [0, 0, 0, 255]                # Black for non-error pixels
            #     ).astype(np.uint8)
            # )

            # Error overlay (FN in red, FP in blue)
            overlay_error = Image.new("RGBA", original_image.size, (0, 0, 0, 255))
            fn_pil = Image.fromarray((fn_mask > 0).astype(np.uint8) * 255).convert("L")
            fp_pil = Image.fromarray((fp_mask > 0).astype(np.uint8) * 255).convert("L")
            tp_pil = Image.fromarray((tp_mask > 0).astype(np.uint8) * 255).convert("L")
            error_draw = ImageDraw.Draw(overlay_error)
            error_draw.bitmap((0, 0), fn_pil, fill=(255, 0, 0, 100))  # Red for False Negative, (255, 0, 0, 100)   (50, 50, 50, 255)
            error_draw.bitmap((0, 0), fp_pil, fill=(0, 0, 255, 100))  # Blue for False Positive,(0, 0, 255, 100)   (200, 200, 200, 255)
            error_draw.bitmap((0, 0), tp_pil, fill=(255, 255, 255, 255)) 
            combined_error = overlay_error

            # Plot results
            # fig, axs = plt.subplots(1, 4, figsize=(20, 10))
            # axs[0].imshow(original_image)
            # axs[0].axis('off')
            # axs[0].set_title('Original Image')

            # axs[1].imshow(combined_gt)
            # axs[1].axis('off')
            # axs[1].set_title('Ground Truth Overlay')

            # axs[2].imshow(combined_pred)
            # axs[2].axis('off')
            # axs[2].set_title('Prediction Overlay')

            # # axs[3].imshow(error_image)
            # # Error Map (False Positive & False Negative)
            # axs[3].imshow(np.array(combined_error))  # Error visualization (FN & FP)
            # axs[3].axis('off')
            # axs[3].set_title(f'Error (IoU: {iou:.4f})')

            # save img
            # output_path = os.path.join(output_dir, f"{image_file_name}_result.png")

            # images = [original_image, combined_gt, combined_pred, combined_error]
            # labels = ["Original Image", "Ground Truth", "Prediction", "Error Map"]
            # final_image = combine_images_with_labels(images, labels, iou=iou, border_width=30, font_size=30,outer_margin=30)
            # final_image.save(output_path)
            
            # plt.savefig(output_path, bbox_inches='tight', pad_inches=0.1)
            # plt.close(fig)
            # plt.show()

            print(f"Image: {image_file_name} | IoU: {iou:.4f} | AP50: {ap50:.4f} | AP75: {ap75:.4f} | mAP: {mAP:.4f} | mAP: {mAP:.4f} | fn_ratio: {fn_ratio:.4f} | fp_ratio: {fp_ratio:.4f} |  tp_ratio: {tp_ratio:.4f}")

print(f"IoU results saved to {iou_csv_path}")

loading Roboflow workspace...


loading Roboflow project...
Image: 000000004768_png.rf.0e5109576c9b35e2f8179ae3f50a7efd.jpg | IoU: 0.6881 | AP50: 0.1096 | AP75: 0.1096 | mAP: 0.1096 | mAP: 0.1096 | fn_ratio: 0.0346 | fp_ratio: 0.0505 |  tp_ratio: 0.0750
Image: 000000003270_png.rf.1cd5cf5aefd4008f661f1b4d4af28c15.jpg | IoU: 0.5846 | AP50: 0.6322 | AP75: 0.6322 | mAP: 0.6322 | mAP: 0.6322 | fn_ratio: 0.2131 | fp_ratio: 0.0470 |  tp_ratio: 0.4191
Image: 000000003257_png.rf.04268d1dedf15db0fc3ff73bcf7ffb42.jpg | IoU: 0.4603 | AP50: 0.1844 | AP75: 0.1844 | mAP: 0.1844 | mAP: 0.1844 | fn_ratio: 0.0349 | fp_ratio: 0.2968 |  tp_ratio: 0.1495
Image: 000000005520_png.rf.0c1fcc9956dcdf55591ae5e3ca926ac3.jpg | IoU: 0.7694 | AP50: 0.0963 | AP75: 0.0963 | mAP: 0.0963 | mAP: 0.0963 | fn_ratio: 0.0326 | fp_ratio: 0.0123 |  tp_ratio: 0.0637
Image: 000000005558_png.rf.10669832edfb0b67fa5937b3fe5b20d2.jpg | IoU: 0.7685 | AP50: 0.1692 | AP75: 0.1692 | mAP: 0.1692 | mAP: 0.1692 | fn_ratio: 0.0616 | fp_ratio: 0.0062 |  tp_ratio: 0.1075
Im

In [None]:
type(gt_mask)

In [None]:
print(fn_mask.shape)
print(np.unique(fn_mask))

print(fp_mask.shape)
print(np.unique(fp_mask))

print(gt_mask.shape)
print(np.unique(gt_mask))

print(pred_mask_resized.shape)
print(np.unique(pred_mask_resized))

print(tp_count.shape)
print(np.unique(tp_count))

print(gt_mask.size)

In [None]:
print(fn_count)
print(fp_count)

In [None]:
print(tp_count) 
print(total_pixels)

In [None]:
# improved 



# Initialize Roboflow and model
# rf = Roboflow(api_key="uZgVV5Mu30Veqelqd61T")
# project = rf.workspace().project("ss-tijuca2019_tile400_stride0")
# model = project.version("1").model  # Specify version

# # Input and output directories
# image_dir = 'SS-Tijuca2019_tile400_stride0.v1i.coco-segmentation/test'


# Initialize Roboflow and model
rf = Roboflow(api_key="uZgVV5Mu30Veqelqd61T")
project = rf.workspace().project("ss_rio_tile1024_stride0") # input model id
model = project.version("1").model # input model version

# Input and output directories
image_dir = 'SS_Rio_tile1024_stride0.v1i.coco-segmentation/test'


coco_json_path = os.path.join(image_dir, "_annotations.coco.json")
output_dir = os.path.join(image_dir, "segmentation_results_fp_fn")
os.makedirs(output_dir, exist_ok=True)

# Initialize CSV for IoU results
iou_csv_path = os.path.join(output_dir, "iou_results.csv")
with open(iou_csv_path, mode='w', newline='') as csv_file:
    csv_writer = csv.writer(csv_file)
    csv_writer.writerow(["Image Name", "IoU", "AP50", "AP75", "mAP", "fn_ratio", "fp_ratio", "tp_ratio"])

    with open(coco_json_path) as f:
        coco_data = json.load(f)

    image_ids = [image['id'] for image in coco_data['images']]
    threshold_list = np.linspace(0.5, 0.95, 10)  # IoU thresholds for mAP
    
    for image_id in image_ids:
        image_info = next(img for img in coco_data['images'] if img['id'] == image_id)
        image_file_name = image_info['file_name']
        img_path = os.path.join(image_dir, image_file_name)

        # Load ground truth annotations
        gt_annotations = [anno for anno in coco_data['annotations'] if anno['image_id'] == image_id]
        im = cv2.imread(img_path)
        gt_mask = np.zeros((im.shape[0], im.shape[1]), dtype=np.uint8)

        for annotation in gt_annotations:
            segmentation = annotation['segmentation'][0]
            points = [(segmentation[i], segmentation[i + 1]) for i in range(0, len(segmentation), 2)]
            cv2.fillPoly(gt_mask, [np.array(points, dtype=np.int32)], 255)

        # Perform inference
        result = model.predict(img_path)
        prediction = result.json()

        if not prediction['predictions']:
            print(f"No predictions for {image_file_name}. Skipping.")
            csv_writer.writerow([image_file_name, "No predictions"])
            continue

        # Decode prediction mask
        segmentation_mask_b64 = prediction['predictions'][0]['segmentation_mask']
        decoded_mask = base64.b64decode(segmentation_mask_b64)
        pred_mask = np.array(Image.open(io.BytesIO(decoded_mask)))

        # Resize pred_mask to match gt_mask dimensions
        pred_mask_resized = cv2.resize(pred_mask, (gt_mask.shape[1], gt_mask.shape[0]), interpolation=cv2.INTER_NEAREST)
        pred_mask_resized = (pred_mask_resized > 0).astype(np.uint8) * 255



        # Flatten masks for precision-recall computation
        gt_flat = (gt_mask.flatten() > 0).astype(np.uint8)  # Ensure ground truth values are {0, 1}
        pred_flat = pred_mask_resized.flatten() / 255.0

        # Calculate Precision-Recall curve
        precision, recall, _ = precision_recall_curve(gt_flat, pred_flat)
        ap50 = average_precision_score(gt_flat, pred_flat)  # AP50 calculation

        # Calculate AP at each IoU threshold
        ap_values = []
        for threshold in threshold_list:
            pred_binary = (pred_flat >= threshold * 255).astype(np.uint8)  # Binarize predictions
            precision, recall, _ = precision_recall_curve(gt_flat, pred_binary)
            ap = average_precision_score(gt_flat, pred_binary)
            ap_values.append(ap)

        # Extract AP50 and AP75
        ap50 = ap_values[0]  # First value corresponds to IoU=0.5
        ap75 = ap_values[5]  # Sixth value corresponds to IoU=0.75
        mAP = np.mean(ap_values)  # Mean of all AP values

        
        # Construct confusion matrix for binary segmentation
        conf_matrix = np.zeros((2, 2), dtype=np.int32)
        for gt, pred in zip(gt_flat, (pred_flat >= 0.5).astype(np.uint8)):
            conf_matrix[gt, pred] += 1

        # Error mask
        error_mask = cv2.bitwise_and(gt_mask, cv2.bitwise_not(pred_mask_resized))
        fn_mask = cv2.bitwise_and(gt_mask, cv2.bitwise_not(pred_mask_resized))          # False Negative (FN): Ground Truth is 1, Prediction is 0
        fp_mask = cv2.bitwise_and(pred_mask_resized, cv2.bitwise_not(gt_mask))        # False Positive (FP): Prediction is 1, Ground Truth is 0
        tp_mask = cv2.bitwise_and(gt_mask, pred_mask_resized)

        # IoU calculation
        # gt_binary = (gt_mask > 0).astype(np.uint8)
        # pred_binary = (pred_mask_resized > 0).astype(np.uint8)

        # intersection = np.logical_and(gt_binary, pred_binary).sum()
        # union = np.logical_or(gt_binary, pred_binary).sum()
        # iou = intersection / union if union > 0 else 0

        iou = compute_iou_for_each_class(gt_mask,pred_mask_resized,2)

        # Calculate FN, FP, and TP counts
        fn_count = np.sum(fn_mask)
        fp_count = np.sum(fp_mask)
        tp_count = np.sum(gt_mask & pred_mask_resized)
        total_pixels = gt_mask.size

        # Calculate ratios
        fn_ratio = fn_count / total_pixels
        fp_ratio = fp_count / total_pixels
        tp_ratio = tp_count / total_pixels

        # Write to CSV
        csv_writer.writerow([image_file_name, iou, ap50, ap75, mAP, fn_ratio, fp_ratio, tp_ratio])

        # Visualization: overlay ground truth and prediction on the original image
        original_image = Image.open(img_path).convert("RGBA")

        # Ground truth overlay
        overlay_gt = Image.new("RGBA", original_image.size, (255, 255, 255, 0))
        draw_gt = ImageDraw.Draw(overlay_gt)
        for annotation in gt_annotations:
            segmentation = annotation['segmentation'][0]
            points = [(segmentation[i], segmentation[i + 1]) for i in range(0, len(segmentation), 2)]
            draw_gt.polygon(points, fill=(255, 255, 255, 100))  # Semi-transparent red for ground truth
        combined_gt = Image.alpha_composite(original_image, overlay_gt)

        # Prediction overlay
        overlay_pred = Image.new("RGBA", original_image.size, (255, 255, 255, 0))
        draw_pred = ImageDraw.Draw(overlay_pred)
        pred_mask_pil = Image.fromarray(pred_mask_resized)
        draw_pred.bitmap((0, 0), pred_mask_pil, fill=(0, 0, 255, 30))  # Semi-transparent blue for prediction
        combined_pred = Image.alpha_composite(original_image, overlay_pred)



        # # Create a PIL image with black background and white error regions
        # error_image = Image.fromarray(
        #     np.where(
        #         error_mask[..., None] == 255,  # Condition for error pixels
        #         [255, 255, 255, 255],         # White for error pixels
        #         [0, 0, 0, 255]                # Black for non-error pixels
        #     ).astype(np.uint8)
        # )

        # Error overlay (FN in red, FP in blue)
        overlay_error = Image.new("RGBA", original_image.size, (0, 0, 0, 255))
        fn_pil = Image.fromarray((fn_mask > 0).astype(np.uint8) * 255).convert("L")
        fp_pil = Image.fromarray((fp_mask > 0).astype(np.uint8) * 255).convert("L")
        tp_pil = Image.fromarray((tp_mask > 0).astype(np.uint8) * 255).convert("L")
        error_draw = ImageDraw.Draw(overlay_error)
        error_draw.bitmap((0, 0), fn_pil, fill=(255, 0, 0, 100))  # Red for False Negative, (255, 0, 0, 100)   (50, 50, 50, 255)
        error_draw.bitmap((0, 0), fp_pil, fill=(0, 0, 255, 100))  # Blue for False Positive,(0, 0, 255, 100)   (200, 200, 200, 255)
        error_draw.bitmap((0, 0), tp_pil, fill=(255, 255, 255, 255)) 
        combined_error = overlay_error

        # Plot results
        fig, axs = plt.subplots(1, 4, figsize=(20, 10))
        axs[0].imshow(original_image)
        axs[0].axis('off')
        axs[0].set_title('Original Image')

        axs[1].imshow(combined_gt)
        axs[1].axis('off')
        axs[1].set_title('Ground Truth Overlay')

        axs[2].imshow(combined_pred)
        axs[2].axis('off')
        axs[2].set_title('Prediction Overlay')

        # axs[3].imshow(error_image)
        # Error Map (False Positive & False Negative)
        axs[3].imshow(np.array(combined_error))  # Error visualization (FN & FP)
        axs[3].axis('off')
        axs[3].set_title(f'Error (IoU: {iou:.4f})')

        # save img
        output_path = os.path.join(output_dir, f"{image_file_name}_result.png")

        images = [original_image, combined_gt, combined_pred, combined_error]
        labels = ["Original Image", "Ground Truth", "Prediction", "Error Map"]
        final_image = combine_images_with_labels(images, labels, iou=iou, border_width=30, font_size=30,outer_margin=30)
        final_image.save(output_path)
        
        # plt.savefig(output_path, bbox_inches='tight', pad_inches=0.1)
        # plt.close(fig)
        plt.show()

        print(f"Image: {image_file_name} | IoU: {iou:.4f} | AP50: {ap50:.4f} | AP75: {ap75:.4f} | mAP: {mAP:.4f}")

print(f"IoU results saved to {iou_csv_path}")

roboflow mIOU calculation method

In [None]:
import numpy as np

def calculate_miou(actual, predicted, num_classes):
    """
    Calculate Mean Intersection over Union (mIoU).
    
    Parameters:
    - actual: Ground Truth labels (2D array)
    - predicted: Predicted labels (2D array)
    - num_classes: Total number of classes
    
    Returns:
    - mIoU: Mean IoU value
    """
    # Flatten both matrices
    actual_flat = actual.flatten()
    predicted_flat = predicted.flatten()
    
    # Confusion matrix initialization
    confusion_matrix = np.zeros((num_classes, num_classes), dtype=np.int32)
    
    # Populate confusion matrix
    for i in range(len(actual_flat)):
        confusion_matrix[actual_flat[i], predicted_flat[i]] += 1
    
    # Calculate IoU for each class
    ious = []
    for c in range(num_classes):
        intersection = confusion_matrix[c, c]
        union = (
            np.sum(confusion_matrix[c, :]) +
            np.sum(confusion_matrix[:, c]) -
            intersection
        )
        if union > 0:
            ious.append(intersection / union)
        else:
            ious.append(np.nan)  # Handle classes not present in the data
    
    # Calculate mean IoU
    miou = np.nanmean(ious)  # Ignore NaN classes
    return miou

# Example usage
actual = np.array([[0, 1, 1, 2], [0, 0, 2, 2], [3, 3, 4, 4], [3, 5, 5, 5]])
predicted = np.array([[0, 1, 0, 2], [0, 0, 2, 1], [3, 3, 4, 4], [3, 5, 5, 0]])

num_classes = 6  # Assuming labels are 0-5
miou = calculate_miou(actual, predicted, num_classes)
print(f"Mean IoU: {miou:.4f}")

change detection

In [None]:
# change detection


import io
import os
import json
import csv
import base64
import numpy as np
from PIL import Image, ImageDraw
import cv2
from matplotlib import pyplot as plt
from roboflow import Roboflow
from sklearn.metrics import precision_recall_curve, average_precision_score

# Initialize Roboflow and model
# rf = Roboflow(api_key="uZgVV5Mu30Veqelqd61T")
# project = rf.workspace().project("ss-tijuca2019_tile400_stride0")
# model = project.version("1").model  # Specify version

# # Input and output directories
# image_dir = 'SS-Tijuca2019_tile400_stride0.v1i.coco-segmentation/test'


# Initialize Roboflow and model
rf = Roboflow(api_key="uZgVV5Mu30Veqelqd61T")
project = rf.workspace().project("ss_rio_tile1024_stride0") # input model id
model = project.version("1").model # input model version

# Input and output directories
image_dir = 'SS_Rio_tile1024_stride0.v1i.coco-segmentation/test-new'


coco_json_path = os.path.join(image_dir, "_annotations.coco.json")
output_dir = os.path.join(image_dir, "segmentation_results_fp_fn")
os.makedirs(output_dir, exist_ok=True)

# Initialize CSV for IoU results
iou_csv_path = os.path.join(output_dir, "iou_results.csv")
with open(iou_csv_path, mode='w', newline='') as csv_file:
    csv_writer = csv.writer(csv_file)
    csv_writer.writerow(["Image Name", "IoU", "AP50", "AP75", "mAP"])

    with open(coco_json_path) as f:
        coco_data = json.load(f)

    image_ids = [image['id'] for image in coco_data['images']]
    threshold_list = np.linspace(0.5, 0.95, 10)  # IoU thresholds for mAP
    
    for image_id in image_ids:
        image_info = next(img for img in coco_data['images'] if img['id'] == image_id)
        image_file_name = image_info['file_name']
        img_path = os.path.join(image_dir, image_file_name)

        # Load ground truth annotations
        gt_annotations = [anno for anno in coco_data['annotations'] if anno['image_id'] == image_id]
        im = cv2.imread(img_path)
        gt_mask = np.zeros((im.shape[0], im.shape[1]), dtype=np.uint8)

        for annotation in gt_annotations:
            segmentation = annotation['segmentation'][0]
            points = [(segmentation[i], segmentation[i + 1]) for i in range(0, len(segmentation), 2)]
            cv2.fillPoly(gt_mask, [np.array(points, dtype=np.int32)], 255)

        # Perform inference
        result = model.predict(img_path)
        prediction = result.json()

        if not prediction['predictions']:
            print(f"No predictions for {image_file_name}. Skipping.")
            csv_writer.writerow([image_file_name, "No predictions"])
            continue

        # Decode prediction mask
        segmentation_mask_b64 = prediction['predictions'][0]['segmentation_mask']
        decoded_mask = base64.b64decode(segmentation_mask_b64)
        pred_mask = np.array(Image.open(io.BytesIO(decoded_mask)))

        # Resize pred_mask to match gt_mask dimensions
        pred_mask_resized = cv2.resize(pred_mask, (gt_mask.shape[1], gt_mask.shape[0]), interpolation=cv2.INTER_NEAREST)
        pred_mask_resized = (pred_mask_resized > 0).astype(np.uint8) * 255

        # IoU calculation
        intersection = np.logical_and(gt_mask, pred_mask_resized).sum()
        union = np.logical_or(gt_mask, pred_mask_resized).sum()
        iou = intersection / union if union > 0 else 0

        # Flatten masks for precision-recall computation
        gt_flat = (gt_mask.flatten() > 0).astype(np.uint8)  # Ensure ground truth values are {0, 1}
        pred_flat = pred_mask_resized.flatten() / 255.0

        # Calculate Precision-Recall curve
        precision, recall, _ = precision_recall_curve(gt_flat, pred_flat)
        ap50 = average_precision_score(gt_flat, pred_flat)  # AP50 calculation

        # Calculate AP at each IoU threshold
        ap_values = []
        for threshold in threshold_list:
            pred_binary = (pred_flat >= threshold * 255).astype(np.uint8)  # Binarize predictions
            precision, recall, _ = precision_recall_curve(gt_flat, pred_binary)
            ap = average_precision_score(gt_flat, pred_binary)
            ap_values.append(ap)

        # Extract AP50 and AP75
        ap50 = ap_values[0]  # First value corresponds to IoU=0.5
        ap75 = ap_values[5]  # Sixth value corresponds to IoU=0.75
        mAP = np.mean(ap_values)  # Mean of all AP values

        # Write results to CSV
        csv_writer.writerow([image_file_name, iou, ap50, ap75, mAP])

        # Visualization: overlay ground truth and prediction on the original image
        original_image = Image.open(img_path).convert("RGBA")

        # Ground truth overlay
        overlay_gt = Image.new("RGBA", original_image.size, (255, 255, 255, 0))
        draw_gt = ImageDraw.Draw(overlay_gt)
        for annotation in gt_annotations:
            segmentation = annotation['segmentation'][0]
            points = [(segmentation[i], segmentation[i + 1]) for i in range(0, len(segmentation), 2)]
            draw_gt.polygon(points, fill=(255, 255, 255, 100))  # Semi-transparent red for ground truth
        combined_gt = Image.alpha_composite(original_image, overlay_gt)

        # Prediction overlay
        overlay_pred = Image.new("RGBA", original_image.size, (255, 255, 255, 0))
        draw_pred = ImageDraw.Draw(overlay_pred)
        pred_mask_pil = Image.fromarray(pred_mask_resized)
        draw_pred.bitmap((0, 0), pred_mask_pil, fill=(0, 0, 255, 30))  # Semi-transparent blue for prediction
        combined_pred = Image.alpha_composite(original_image, overlay_pred)

        # Error mask
        error_mask = cv2.bitwise_and(gt_mask, cv2.bitwise_not(pred_mask_resized))

        # False Negative (FN): Ground Truth is 1, Prediction is 0
        fn_mask = cv2.bitwise_and(gt_mask, cv2.bitwise_not(pred_mask_resized))

        # False Positive (FP): Prediction is 1, Ground Truth is 0
        fp_mask = cv2.bitwise_and(pred_mask_resized, cv2.bitwise_not(gt_mask))

        # # Create a PIL image with black background and white error regions
        # error_image = Image.fromarray(
        #     np.where(
        #         error_mask[..., None] == 255,  # Condition for error pixels
        #         [255, 255, 255, 255],         # White for error pixels
        #         [0, 0, 0, 255]                # Black for non-error pixels
        #     ).astype(np.uint8)
        # )

        # Error overlay (FN in red, FP in blue)
        overlay_error = Image.new("RGBA", original_image.size, (0, 0, 0, 255))
        fn_pil = Image.fromarray(fn_mask).convert("L")
        fp_pil = Image.fromarray(fp_mask).convert("L")
        error_draw = ImageDraw.Draw(overlay_error)
        error_draw.bitmap((0, 0), fn_pil, fill=(255, 255, 255, 255))  # Red for False Negative, (255, 0, 0, 100)
        error_draw.bitmap((0, 0), fp_pil, fill=(128, 128, 128, 255))  # Blue for False Positive,(0, 0, 255, 100)
        combined_error = overlay_error

        # Plot results
        fig, axs = plt.subplots(1, 4, figsize=(20, 10))
        axs[0].imshow(original_image)
        axs[0].axis('off')
        axs[0].set_title('Original Image')

        axs[1].imshow(combined_gt)
        axs[1].axis('off')
        axs[1].set_title('Ground Truth Overlay')

        axs[2].imshow(combined_pred)
        axs[2].axis('off')
        axs[2].set_title('Prediction Overlay')

        # axs[3].imshow(error_image)
        # Error Map (False Positive & False Negative)
        axs[3].imshow(np.array(combined_error))  # Error visualization (FN & FP)
        axs[3].axis('off')
        axs[3].set_title(f'Error (IoU: {iou:.4f})')

        output_path = os.path.join(output_dir, f"{image_file_name}_result.png")

        # Add labels to each image
        original_with_label = add_labels(original_image, "Original Image")
        gt_with_label = add_labels(combined_gt, "Ground Truth")
        pred_with_label = add_labels(combined_pred, "Prediction")
        error_with_label = add_labels(combined_error, "Error Map", iou=iou)

        output_image = np.hstack([
            np.array(original_with_label),
            np.array(gt_with_label),
            np.array(pred_with_label),
            np.array(error_with_label)
        ])
        final_image = Image.fromarray(output_image)
        final_image.save(output_path)
        
        # plt.savefig(output_path, bbox_inches='tight', pad_inches=0.1)
        # plt.close(fig)
        plt.show()



        # Save the mask as a grayscale PNG image
        pred_mask_image = Image.fromarray(pred_mask_resized)
        gt_mask_image = Image.fromarray(gt_mask)

        mask_output_path = os.path.join(output_dir, f"{image_file_name}_pred_mask.png")
        pred_mask_image.save(mask_output_path, "PNG")

        gt_mask_output_path = os.path.join(output_dir, f"{image_file_name}_gt_mask.png")
        gt_mask_image.save(gt_mask_output_path, "PNG")

        print(f"Saved predicted mask to {mask_output_path}")


        print(f"Image: {image_file_name} | IoU: {iou:.4f} | AP50: {ap50:.4f} | AP75: {ap75:.4f} | mAP: {mAP:.4f}")

print(f"IoU results saved to {iou_csv_path}")

In [None]:
import numpy as np
from PIL import Image
import cv2

# Define directory and file paths
changedetection_image_dir = 'SS_Rio_tile1024_stride0.v1i.coco-segmentation/test-new/segmentation_results_fp_fn'
mask1_path = os.path.join(changedetection_image_dir, "000000003537_png.rf.47308245be44358eabfd94bd7f4b857b.jpg_pred_mask.png")
mask2_path = os.path.join(changedetection_image_dir, "3537-2020-12-16-resize.png_pred_mask.png")

# Check if both files exist
if not (os.path.exists(mask1_path) and os.path.exists(mask2_path)):
    raise FileNotFoundError("One or both mask files are missing!")

# Load masks
mask1 = np.array(Image.open(mask1_path))
mask2 = np.array(Image.open(mask2_path))

# Ensure masks are binary
mask1 = (mask1 > 0).astype(np.uint8) * 255
mask2 = (mask2 > 0).astype(np.uint8) * 255

# Subtract masks
mask_diff = cv2.absdiff(mask1, mask2)

# Save difference
diff_output_path = os.path.join(changedetection_image_dir, "mask_difference.png")
Image.fromarray(mask_diff).save(diff_output_path)

print(f"Saved mask difference to {diff_output_path}")

In [None]:
# RGBA to RGB

# Load the image
# image = cv2.imread(r"E:\Workfolder\favela\image-processing\inference\SS_Rio_tile1024_stride0.v1i.coco-segmentation\test-new\3537-2020-12-16.png")  # Reads the image in BGR format by default
img_path = r'E:\Workfolder\favela\image-processing\inference\SS_Rio_tile1024_stride0.v1i.coco-segmentation\test-new\3537-2020-12-16.png'

# len(image.shape)

img = Image.open(img_path)
print(img.mode)
# if img.mode == "RGBA":
#     img = img.convert("RGB")

# # Save it back to ensure compatibility
# img.save(img_path)

#### instance segmentation

test on single image, instance segmentation

In [None]:
# test it on single image (instance segmentation), either on roboflow platform or using code

# initialize the client
CLIENT = InferenceHTTPClient(
    api_url="https://detect.roboflow.com",
    api_key="sRBH2PAnkPFSD53Ai589"
)

custom_configuration = InferenceConfiguration(confidence_threshold=0.1) # 可以在这里设置theshold

# infer on a local image
image_path = 'data/000000000004_png.rf.d8640e1758f706d916fa47416a5b1735.jpg'

with CLIENT.use_configuration(custom_configuration):
    result = CLIENT.infer(image_path, model_id="is-favela-400-only-mask-wnngf/1")

# 加载原始图像
image = Image.open(image_path).convert("RGBA")  # 转换为支持透明度的 RGBA 模式
overlay = Image.new("RGBA", image.size, (255, 255, 255, 0))
draw = ImageDraw.Draw(overlay)

# 遍历预测结果，仅绘制多边形掩膜
for prediction in result['predictions']:
    
    
    print(prediction['confidence'])

    # 获取类别标签
    label = prediction['class']

    # 绘制多边形掩膜
    points = [(point['x'], point['y']) for point in prediction['points']]
    draw.polygon(points, fill=(0, 0, 255, 30))  # 填充掩膜区域为蓝色
    
    # 在掩膜的第一个点处标注类别
    draw.text(points[0], label, fill="white")

combined = Image.alpha_composite(image, overlay)

# 显示结果图像并去除所有边框
plt.imshow(combined)
plt.axis("off")
plt.show()

combined_output_path = "demo/segmentation_result_combined.png"
combined.save(combined_output_path)

In [None]:
# Define merged_mask and compute_iou functions
def merged_mask(masks):
    if masks.ndim < 3 or masks.shape[2] == 0:
        return masks if masks.ndim == 2 else masks[:, :, 0]
    merged_mask = np.sum(masks, axis=2).astype(np.uint8)
    return merged_mask

def create_masks(polygons, shape):
    """
    根据多边形列表创建掩码层。
    
    :param polygons: 多边形列表
    :param shape: 掩码的形状 (height, width)
    :return: 掩码层列表
    """
    masks = []
    for poly in polygons:
        mask = np.zeros(shape, dtype=np.uint8)
        pts = np.array(list(poly.exterior.coords), dtype=np.int32)
        cv2.fillPoly(mask, [pts], 1)  # 用值 1 填充多边形
        masks.append(mask)
    return masks

In [None]:
# for pt file 

import os
import json
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image, ImageDraw
import cv2
from ultralytics import YOLO

# 初始化 YOLO 模型
model = YOLO("yolo model/best.pt")  # 替换为您自己的模型路径

# 加载测试集路径
image_dir = 'IS_Rio_tile1024_stride0.v1i.coco-segmentation/test-new'
coco_json_path = os.path.join(image_dir, "_annotations.coco.json")

with open(coco_json_path) as f:
    coco_data = json.load(f)

output_dir = os.path.join(image_dir, "segmentation_results_fp_fn")
os.makedirs(output_dir, exist_ok=True)

# 初始化 IoU 结果 CSV
iou_csv_path = os.path.join(output_dir, "iou_results.csv")
with open(iou_csv_path, mode='w', newline='') as csv_file:
    csv_writer = csv.writer(csv_file)
    csv_writer.writerow(["Image Name", "IoU"])

# 加载图像列表
image_ids = [image['id'] for image in coco_data['images']]

# 遍历测试图像
for image_id in image_ids:  # 可用 random.sample(image_ids, min(4, len(image_ids))) 随机抽取 4 张
    image_info = next(img for img in coco_data['images'] if img['id'] == image_id)
    image_file_name = image_info['file_name']
    img_path = os.path.join(image_dir, image_file_name)

    # 读取 Ground Truth
    gt_annotations = [anno for anno in coco_data['annotations'] if anno['image_id'] == image_id]
    gt_polygons = []
    for annotation in gt_annotations:
        segmentation = annotation['segmentation'][0]
        points = [(segmentation[i], segmentation[i + 1]) for i in range(0, len(segmentation), 2)]
        gt_polygons.append(points)

    # 推理
    result = model(img_path)[0]  # 获取推理结果
    predictions = result.masks.data.cpu().numpy() if result.masks is not None else []

    # 初始化可视化
    original_image = cv2.imread(img_path)
    annotated_image = original_image.copy()
    overlay = original_image.copy()

    # 绘制 Ground Truth
    gt_mask = np.zeros(original_image.shape[:2], dtype=np.uint8)
    for polygon in gt_polygons:
        cv2.fillPoly(gt_mask, [np.array(polygon, dtype=np.int32)], 1)
        cv2.polylines(overlay, [np.array(polygon, dtype=np.int32)], isClosed=True, color=(255, 0, 0), thickness=2)

    # 绘制 Predictions
    pred_mask = np.zeros(original_image.shape[:2], dtype=np.uint8)
    for pred in predictions:
        coords = np.column_stack(np.where(pred > 0.5))  # 提取掩码的坐标
        if coords.shape[0] > 0:
            polygon = cv2.convexHull(coords)
            cv2.fillPoly(pred_mask, [polygon], 1)
            cv2.polylines(overlay, [polygon], isClosed=True, color=(0, 255, 0), thickness=2)

    # 计算 IoU
    intersection = np.logical_and(gt_mask, pred_mask).sum()
    union = np.logical_or(gt_mask, pred_mask).sum()
    iou = intersection / union if union > 0 else 0

    # 保存 IoU 结果
    # csv_writer.writerow([image_file_name, iou])

    # 显示可视化
    error_visualization = np.zeros_like(original_image, dtype=np.uint8)
    error_visualization[gt_mask == 1] = [255, 0, 0]  # Red for FN
    error_visualization[pred_mask == 1] = [0, 255, 0]  # Green for FP
    error_visualization[np.logical_and(gt_mask == 1, pred_mask == 1)] = [255, 255, 0]  # Yellow for TP

    fig, axs = plt.subplots(1, 3, figsize=(15, 10))
    axs[0].imshow(cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB))
    axs[0].axis('off')
    axs[0].set_title('Original Image')

    axs[1].imshow(overlay)
    axs[1].axis('off')
    axs[1].set_title('Overlay (Ground Truth + Predictions)')

    axs[2].imshow(cv2.cvtColor(error_visualization, cv2.COLOR_BGR2RGB))
    axs[2].axis('off')
    axs[2].set_title(f'Error Visualization (IoU: {iou:.2f})')

    plt.show()

print(f"IoU results saved to {iou_csv_path}")


In [None]:
# test on folder, improved
# to debug

CLIENT = InferenceHTTPClient(
    api_url="https://detect.roboflow.com",
    api_key="uZgVV5Mu30Veqelqd61T"
)

# confidence
custom_configuration = InferenceConfiguration(confidence_threshold=0.3) # 可以在这里设置theshold
CLIENT.use_configuration(custom_configuration)

# model
# CLIENT.select_model(model_id="is-favela-400-only-mask-wnngf/1")
# image_dir = 'IS-favela-400-only mask.v1i.coco/test' # input
# CLIENT.select_model(model_id="is-favela-2048to1024-only-mask-drguu/1")  #is-favela-2048to1024-maual/1 #这个模型不行

CLIENT.select_model(model_id="is_rio_tile1024_stride0/1")  
image_dir = 'IS_Rio_tile1024_stride0.v1i.coco-segmentation/test-new' 

coco_json_path = os.path.join(image_dir, "_annotations.coco.json")

with open(coco_json_path) as f:
    coco_data = json.load(f)

output_dir = os.path.join(image_dir, "segmentation_results_fp_fn")
os.makedirs(output_dir, exist_ok=True)

# Initialize CSV file for storing IoU results
iou_csv_path = os.path.join(output_dir, "iou_results.csv")
with open(iou_csv_path, mode='w', newline='') as csv_file:
    csv_writer = csv.writer(csv_file)
    csv_writer.writerow(["Image Name", "IoU"]) 

    image_ids = [image['id'] for image in coco_data['images']]

    for image_id in image_ids:  # [:5]， Process first 5 images
        image_info = next(img for img in coco_data['images'] if img['id'] == image_id)
        image_file_name = image_info['file_name']
        img_path = os.path.join(image_dir, image_file_name)

        gt_annotations = [anno for anno in coco_data['annotations'] if anno['image_id'] == image_id]

        # Read image and initialize overlay
        im = cv2.imread(img_path)
        overlay = Image.new("RGBA", (im.shape[1], im.shape[0]), (255, 255, 255, 0))
        draw = ImageDraw.Draw(overlay)

        gt_polygons = []
        for annotation in gt_annotations:
            segmentation = annotation['segmentation'][0]
            points = [(segmentation[i], segmentation[i + 1]) for i in range(0, len(segmentation), 2)]
            gt_polygons.append(Polygon(points))
            draw.polygon(points, fill=(255, 255, 255, 100))  # Ground Truth overlay

        image = Image.open(img_path).convert("RGBA")
        combined_gt = Image.alpha_composite(image, overlay)

        
        # result = CLIENT.infer(img_path)
        result = CLIENT.infer(img_path)

        original_image = Image.open(img_path).convert("RGBA")

        overlay_pred = Image.new("RGBA", (im.shape[1], im.shape[0]), (255, 255, 255, 0))
        draw_pred = ImageDraw.Draw(overlay_pred)

        pred_polygons = []
        for prediction in result['predictions']:
            points = [(point['x'], point['y']) for point in prediction['points']]
            pred_polygons.append(Polygon(points))
            draw_pred.polygon(points, fill=(0, 0, 255, 30))  # Prediction overlay

        pred_image = Image.open(img_path).convert("RGBA")
        combined_pred = Image.alpha_composite(pred_image, overlay_pred)


        # IoU computation, TBD: instance segmentation用IOU是合适的吗？可以之后跟semantic segmentation进行比较；GPT也不是很靠谱一开始生成的是平均IOU，实际应该用加权的
        gt_masks = []
        pred_masks = []

        # 为 ground truth, predictions 创建掩码层
        gt_masks = create_masks(gt_polygons, (im.shape[0], im.shape[1]))
        pred_masks = create_masks(pred_polygons, (im.shape[0], im.shape[1]))

        if gt_masks:  # 检查 gt_masks 是否为空
            merged_gt_mask = merged_mask(np.stack(gt_masks, axis=-1))
        else:
            merged_gt_mask = np.zeros((im.shape[0], im.shape[1]), dtype=np.uint8)

        # Prediction merged_mask
        if pred_masks:  # 检查 pred_masks 是否为空
            merged_pred_mask = merged_mask(np.stack(pred_masks, axis=-1))
        else:
            merged_pred_mask = np.zeros((im.shape[0], im.shape[1]), dtype=np.uint8)

        # 计算 IoU：使用掩码的像素级别重叠计算
        intersection = np.logical_and(merged_gt_mask, merged_pred_mask).sum()  # 重叠区域
        union = np.logical_or(merged_gt_mask, merged_pred_mask).sum()          # 联合区域

        iou = intersection / union if union > 0 else 0


        # Calculate error mask
        # error_mask = cv2.bitwise_and(merged_gt_mask, cv2.bitwise_not(merged_pred_mask))
        # False Negative (FN): Ground Truth is 1, Prediction is 0
        fn_mask = np.logical_and(merged_gt_mask == 1, merged_pred_mask == 0).astype(np.uint8) * 255
        # False Positive (FP): Prediction is 1, Ground Truth is 0
        fp_mask = np.logical_and(merged_pred_mask == 1, merged_gt_mask == 0).astype(np.uint8) * 255


        error_image = Image.fromarray(np.where(error_mask[..., None] == 255, [255, 255, 255, 255], [0, 0, 0, 255]).astype(np.uint8)) # 黑底白块
        # error_image = Image.fromarray(np.where(error_mask[..., None] == 255, [0, 0, 0, 255], [255, 255, 255, 255]).astype(np.uint8)) # 白底黑块

        # Error overlay (FN in red, FP in blue)
        # overlay_error = Image.new("RGBA", original_image.size, (0, 0, 0, 255))
        # fn_pil = Image.fromarray(fn_mask).convert("L")
        # fp_pil = Image.fromarray(fp_mask).convert("L")
        # error_draw = ImageDraw.Draw(overlay_error)
        # error_draw.bitmap((0, 0), fn_pil, fill=(255, 255, 255, 255))  # Red for False Negative, (255, 0, 0, 100)
        # error_draw.bitmap((0, 0), fp_pil, fill=(128, 128, 128, 255))  # Blue for False Positive,(0, 0, 255, 100)
        # combined_error = overlay_error

        # Combined Error Map with Transparency (FN in Red, FP in Blue)
        error_visualization = np.zeros((merged_gt_mask.shape[0], merged_gt_mask.shape[1], 3), dtype=np.uint8)  # RGBA
        error_visualization[fn_mask == 255] = [255, 255, 255]  
        error_visualization[fp_mask == 255] = [128, 128, 128]  
        combined_error = Image.fromarray(error_visualization, mode="RGB")

        # Write IoU to CSV file
        csv_writer.writerow([image_file_name, iou])

        fig, axs = plt.subplots(1, 4, figsize=(20, 10))
        axs[0].imshow(original_image)
        axs[0].axis('off')
        axs[0].set_title('Original Image')

        axs[1].imshow(combined_gt)
        axs[1].axis('off')
        axs[1].set_title('Ground Truth')
        
        axs[2].imshow(combined_pred)
        axs[2].axis('off')
        axs[2].set_title('Prediction')

        axs[3].imshow(np.array(combined_error))  # Error visualization (FN & FP)
        axs[3].axis('off')
        axs[3].set_title(f'Error (IoU: {iou:.4f})')

        output_path = os.path.join(output_dir, f"{image_file_name}_result.png")

        # Add labels to each image
        original_with_label = add_labels(original_image, "Original Image")
        gt_with_label = add_labels(combined_gt, "Ground Truth")
        pred_with_label = add_labels(combined_pred, "Prediction")
        error_with_label = add_labels(combined_error, "Error Map", iou=iou)

        output_image = np.hstack([
            np.array(original_with_label),
            np.array(gt_with_label),
            np.array(pred_with_label),
            np.array(error_with_label)
        ])
        final_image = Image.fromarray(output_image)
        final_image.save(output_path)
        
        # plt.savefig(output_path, bbox_inches='tight', pad_inches=0.1)
        plt.close(fig)
        # plt.show()

        print(f"Image {image_file_name} accuracy (IoU): {iou:.4f}")
print(f"IoU results saved to {iou_csv_path}")


In [None]:
# # test on folder

# CLIENT = InferenceHTTPClient(
#     api_url="https://detect.roboflow.com",
#     api_key="uZgVV5Mu30Veqelqd61T"
# )

# # confidence
# custom_configuration = InferenceConfiguration(confidence_threshold=0.1) # 可以在这里设置theshold
# CLIENT.use_configuration(custom_configuration)

# # model
# # CLIENT.select_model(model_id="is-favela-400-only-mask-wnngf/1")
# # image_dir = 'IS-favela-400-only mask.v1i.coco/test' # input
# # CLIENT.select_model(model_id="is-favela-2048to1024-only-mask-drguu/1")  #is-favela-2048to1024-maual/1 #这个模型不行

# CLIENT.select_model(model_id="is_rio_tile1024_stride0/1")  
# image_dir = 'IS_Rio_tile1024_stride0.v1i.coco-segmentation/test' 

# coco_json_path = os.path.join(image_dir, "_annotations.coco.json")

# with open(coco_json_path) as f:
#     coco_data = json.load(f)

# output_dir = os.path.join(image_dir, "segmentation_results")
# os.makedirs(output_dir, exist_ok=True)

# # Initialize CSV file for storing IoU results
# iou_csv_path = os.path.join(output_dir, "iou_results.csv")
# with open(iou_csv_path, mode='w', newline='') as csv_file:
#     csv_writer = csv.writer(csv_file)
#     csv_writer.writerow(["Image Name", "IoU"]) 

#     image_ids = [image['id'] for image in coco_data['images']]

#     for image_id in image_ids:  # [:5]， Process first 5 images
#         image_info = next(img for img in coco_data['images'] if img['id'] == image_id)
#         image_file_name = image_info['file_name']
#         img_path = os.path.join(image_dir, image_file_name)

#         gt_annotations = [anno for anno in coco_data['annotations'] if anno['image_id'] == image_id]

#         # Read image and initialize overlay
#         im = cv2.imread(img_path)
#         overlay = Image.new("RGBA", (im.shape[1], im.shape[0]), (255, 255, 255, 0))
#         draw = ImageDraw.Draw(overlay)

#         gt_polygons = []
#         for annotation in gt_annotations:
#             segmentation = annotation['segmentation'][0]
#             points = [(segmentation[i], segmentation[i + 1]) for i in range(0, len(segmentation), 2)]
#             gt_polygons.append(Polygon(points))
#             draw.polygon(points, fill=(255, 255, 255, 100))  # Ground Truth overlay

#         image = Image.open(img_path).convert("RGBA")
#         gt_combined = Image.alpha_composite(image, overlay)

        
#         result = CLIENT.infer(img_path)

#         overlay_pred = Image.new("RGBA", (im.shape[1], im.shape[0]), (255, 255, 255, 0))
#         draw_pred = ImageDraw.Draw(overlay_pred)

#         pred_polygons = []
#         for prediction in result['predictions']:
#             points = [(point['x'], point['y']) for point in prediction['points']]
#             pred_polygons.append(Polygon(points))
#             draw_pred.polygon(points, fill=(0, 0, 255, 30))  # Prediction overlay

#         pred_image = Image.open(img_path).convert("RGBA")
#         pred_combined = Image.alpha_composite(pred_image, overlay_pred)

#         # Create masks
#         gt_mask = np.zeros((im.shape[0], im.shape[1]), dtype=np.uint8)
#         pred_mask = np.zeros((im.shape[0], im.shape[1]), dtype=np.uint8)

#         for poly in gt_polygons:
#             pts = np.array(list(poly.exterior.coords), dtype=np.int32)
#             cv2.fillPoly(gt_mask, [pts], 255)

#         for poly in pred_polygons:
#             pts = np.array(list(poly.exterior.coords), dtype=np.int32)
#             cv2.fillPoly(pred_mask, [pts], 255)

#         # Calculate error mask
#         error_mask = cv2.bitwise_and(gt_mask, cv2.bitwise_not(pred_mask))

#         error_image = Image.fromarray(np.where(error_mask[..., None] == 255, [255, 255, 255, 255], [0, 0, 0, 255]).astype(np.uint8)) # 黑底白块
#         # error_image = Image.fromarray(np.where(error_mask[..., None] == 255, [0, 0, 0, 255], [255, 255, 255, 255]).astype(np.uint8)) # 白底黑块


#         # IoU computation, TBD: instance segmentation用IOU是合适的吗？可以之后跟semantic segmentation进行比较；GPT也不是很靠谱一开始生成的是平均IOU，实际应该用加权的
#         total_intersection = 0
#         total_union = 0

#         for gt_polygon in gt_polygons:
#             if not gt_polygon.is_valid:  # 跳过无效的多边形
#                 print(f"Skipping invalid ground truth polygon in {image_file_name}")
#                 continue
#             for pred_polygon in pred_polygons:
#                 if not pred_polygon.is_valid:  # 跳过无效的多边形
#                     print(f"Skipping invalid prediction polygon in {image_file_name}")
#                     continue
#                 if gt_polygon.intersects(pred_polygon):
#                     try:
#                         intersection_area = gt_polygon.intersection(pred_polygon).area
#                         union_area = gt_polygon.union(pred_polygon).area
#                         total_intersection += intersection_area
#                         total_union += union_area
#                     except TopologicalError:
#                         print(f"Skipping union calculation due to TopologyException for {image_file_name}")


#         weighted_iou = total_intersection / total_union if total_union > 0 else 0
#         # Write IoU to CSV file
#         csv_writer.writerow([image_file_name, weighted_iou])

#         fig, axs = plt.subplots(1, 3, figsize=(16, 8))
#         axs[0].imshow(gt_combined)
#         axs[0].axis('off')
#         axs[0].set_title('Ground Truth')
        
#         axs[1].imshow(pred_combined)
#         axs[1].axis('off')
#         axs[1].set_title('Prediction')

#         axs[2].imshow(error_image)
#         axs[2].axis('off')
#         axs[2].set_title(f'Error (IoU: {weighted_iou:.4f})')

#         plt.figtext(0.5, 0.2, image_file_name, ha='center', fontsize=12)

#         output_path = os.path.join(output_dir, f"{image_file_name}_result.png")
#         plt.savefig(output_path, bbox_inches='tight', pad_inches=0.1)
        
#         # plt.close(fig) 
#         plt.show()

#         print(f"Image {image_file_name} accuracy (IoU): {weighted_iou:.4f}")
# print(f"IoU results saved to {iou_csv_path}")


In [None]:
# download dataset
from roboflow import Roboflow
rf = Roboflow(api_key="sRBH2PAnkPFSD53Ai589")
project = rf.workspace("my-first-workspace-list1").project("is-favela-2048to1024-only-mask-drguu")
version = project.version(1)
dataset = version.download("coco")