In [1]:
pip install segmentation-models-pytorch

Collecting segmentation-models-pytorch
  Downloading segmentation_models_pytorch-0.3.4-py3-none-any.whl.metadata (30 kB)
Collecting efficientnet-pytorch==0.7.1 (from segmentation-models-pytorch)
  Downloading efficientnet_pytorch-0.7.1.tar.gz (21 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting pretrainedmodels==0.7.4 (from segmentation-models-pytorch)
  Downloading pretrainedmodels-0.7.4.tar.gz (58 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m58.8/58.8 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting timm==0.9.7 (from segmentation-models-pytorch)
  Downloading timm-0.9.7-py3-none-any.whl.metadata (58 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m58.8/58.8 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
Collecting munch (from pretrainedmodels==0.7.4->segmentation-models-pytorch)
  Downloading munch-4.0.0-py2.py3-none-any.whl.metadata (5.9 kB)
Downloading segm

In [2]:
import os
from google.colab import drive
drive.mount('/content/drive')
os.chdir('/content/drive/MyDrive/IP/final') #切換該目錄

Mounted at /content/drive


In [3]:
from segmentation_models_pytorch import Unet
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as T
import torch.optim as optim
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
from PIL import Image
import numpy as np
import torchvision.transforms.functional as TF
import random
from torch.optim.lr_scheduler import ReduceLROnPlateau
import re

#### 初始化一些參數

輸入正確的檔案path 即可 run

In [4]:
### init parameter
# Constants for UNet model training process
BATCH_SIZE = 4
IMG_HEIGHT = 512
IMG_WIDTH = 512
NUM_EPOCHS = 200
Learning_Ratio = 2e-3
THRESOLD = 0.5

# Load data (please change)
test_img_dir = r'testing_dataset/image'
test_mask_dir = r'testing_dataset/mask'
test_mask_output_dir = r'testing_dataset/output'
train_img_dir = r'training_dataset/image'
train_mask_dir = r'training_dataset/mask'
train_mask_output_dir = r'training_dataset/output'
model_load_file = r'unet_25_test1_iou_0632_origin'

In [5]:
def calculate_iou_tensor(image1, image2, threshold=THRESOLD):
    """
    計算兩個 PyTorch Tensor 的交集比（IoU）。

    :param image1: 第一張影像（PyTorch Tensor）
    :param image2: 第二張影像（PyTorch Tensor）
    :param threshold: 灰階二值化的閾值（預設 128）
    :return: IoU 值（介於 0 到 1）
    """
    # 確保兩張影像的形狀相同

    if image1.shape != image2.shape:
        raise ValueError("兩張影像必須具有相同的尺寸！")
    # 將灰階影像二值化
    binary1 = (image1 >= threshold).to(torch.uint8)
    binary2 = (image2 >= threshold).to(torch.uint8)

    # 計算交集和聯集
    intersection = torch.sum(binary1 & binary2).item()
    union = torch.sum(binary1 | binary2).item()

    # 防止分母為零
    if union == 0:
        return 0.0

    # 計算 IoU
    iou = intersection / union
    return iou

In [6]:
def extract_number(filename):
    """
    從檔名中提取第一個出現的數字並轉換為整數。
    如果找不到數字，則返回一個很大的數，使該檔案排在最後。
    """
    name, _ = os.path.splitext(filename)
    match = re.search(r'\d+', name)
    if match:
        return int(match.group())
    else:
        return float('inf')  # 沒有數字的檔案將排在最後

class customDataSet(Dataset):
  def __init__(self, img_dir, mask_dir, transform_img,transform_mask):
    super().__init__()
    self.img_dir = img_dir
    self.mask_dir = mask_dir
    self.transform_img = transform_img
    self.transform_mask = transform_mask
    self.images = [f for f in os.listdir(img_dir)] #僅讀取副檔名為以下的
    self.images = sorted(self.images, key=extract_number)
    self.masks  = [f for f in os.listdir(mask_dir)]
    self.masks  = sorted(self.masks, key=extract_number)
  def __len__(self):
    return len(self.images)

  def __getitem__(self, idx):
    img_path = os.path.join(self.img_dir, self.images[idx])
    mask_path = os.path.join(self.mask_dir, self.masks[idx])
    image = Image.open(img_path).convert('RGB')
    mask = Image.open(mask_path).convert('L') # 轉成黑白圖片 0 或 255

    if self.transform_img and self.transform_mask:
       image = self.transform_img(image)
       mask  = self.transform_mask(mask)


    return image,mask

In [7]:
model = Unet(encoder_name="resnet34",
             encoder_weights=None,  # 不用pre-train的權重
             in_channels=3,
             classes=1,
             )

# Check the device we are using is GPU or CPU
if torch.cuda.is_available():
  device = torch.device('cuda')
  print('Use the GPU to train')
else:
  device = torch.device('cpu')
  print('Use the CPU to train')
model = model.to(device)

transform_img = T.Compose([
                T.ToTensor(),                                  # 轉換為 Tensor
                # T.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
                T.Resize((IMG_HEIGHT, IMG_WIDTH))            # 調整圖片大小
            ])

# valid 都不做resize 直接算原始的 iou 大小
transform_test_img = T.Compose([T.ToTensor(),
                                # T.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
                                T.Resize((IMG_HEIGHT, IMG_WIDTH)),
                                T.GaussianBlur(kernel_size=3)
                                ])
transform_mask  = T.Compose([T.ToTensor(),
                                T.Resize((IMG_HEIGHT, IMG_WIDTH)),
                            ])
train_data    = customDataSet(train_img_dir, train_mask_dir,transform_img,transform_mask)
valid_dataset = customDataSet(test_img_dir, test_mask_dir ,transform_test_img,transform_mask)

train_loader  = DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=False,pin_memory=True)
valid_loader  = DataLoader(valid_dataset, batch_size=BATCH_SIZE, shuffle=False,pin_memory=True)
loss_function = nn.BCEWithLogitsLoss() # 內部會幫忙做sigmoid
optimizer = optim.Adam(model.parameters(),lr=Learning_Ratio) # Choosing Adam as our optimizer
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=5) # new_lr = lr*factor

Use the GPU to train


In [8]:
def load_model(model, load_path,device):
    """
    加載已保存的模型和優化器狀態。

    :param model: 模型實例
    :param optimizer: 優化器實例
    :param load_path: 模型保存的路徑
    :param device: 設備（CPU 或 GPU）
    :return: 加載後的模型、優化器、epoch 和 best_iou
    """
    checkpoint = torch.load(load_path, map_location=device)
    model.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    epoch = checkpoint['epoch']
    best_iou = checkpoint['best_iou']

    print(f"已加載模型於 epoch {epoch+1}，最佳 IoU: {best_iou:.4f}")
    return model

#### Loading model

In [9]:
if model_load_file is not None:
   model = load_model(model,model_load_file,device)

  checkpoint = torch.load(load_path, map_location=device)


已加載模型於 epoch 25，最佳 IoU: 0.6322


### show 出圖片 function

In [10]:
import matplotlib.pyplot as plt
import numpy as np
import torch

def show_image_and_mask(img, mask, origin):
    """
    顯示圖像及其對應的遮罩。

    :param img: 圖像 (torch.Tensor)
    :param mask: 遮罩 (torch.Tensor)
    """
    # 確保圖像是 NumPy 陣列，並將圖像轉換為 HWC 格式
    img = img.permute(1, 2, 0).cpu().detach().numpy()  # HWC 格式

    # 如果圖像是單通道，則轉換為灰度圖
    if img.shape[2] == 1:
        img = img.squeeze(-1)  # 去掉單通道維度 (變成 2D)

    # 確保 mask 是 NumPy 陣列
    mask = mask.cpu().detach().numpy()
    origin = origin.cpu().detach().numpy()
    # 如果 mask 是 3D，則選擇單通道
    if mask.ndim == 3:
        mask = mask.squeeze(0)  # 去掉最後一維 (如果是單通道)
    if origin.ndim == 3:
        origin = origin.squeeze(0)  # 去掉最後一維 (如果是單通道)
    # 創建畫布並顯示圖像與遮罩
    fig, ax = plt.subplots(1, 3, figsize=(12, 6))

    # 顯示圖像
    ax[0].imshow(img)
    ax[0].set_title("Image")
    ax[0].axis('off')

    # 顯示遮罩
    ax[1].imshow(mask, cmap='gray')  # 使用灰度色圖顯示遮罩
    ax[1].set_title("Predict")
    ax[1].axis('off')

    ax[2].imshow(origin, cmap='gray')  # 使用灰度色圖顯示遮罩
    ax[2].set_title("Origin")
    ax[2].axis('off')
    # 顯示圖像
    plt.show()


#### 計算train 、test 的 IOU 平均 (512X512)

In [11]:
model.eval()
iou_mean = 0
out_path = test_mask_output_dir
# model.eval()
total_img = 0
with torch.no_grad():
      for count, (x, y) in enumerate(valid_loader,start=0):
            x = x.to(device)
            y = y.to(device)


            predict_img = model(x)
            batch_size = x.size(0)
            for i in range(batch_size):

                  total_img += 1
                  input_img = x[i].squeeze(0)

                  origin_y = y[i]>=THRESOLD
                  origin_y = origin_y.squeeze(0)


                  probability = torch.sigmoid(predict_img[i])>=THRESOLD
                  predict_mask = probability.squeeze(0)
                  ###### save img
                   # 保存 predict_mask 512x512 寫進output
                  predict_mask_np = predict_mask.cpu().numpy().astype(np.uint8) * 255  # 转换为 0 和 255
                  mask_image = Image.fromarray(predict_mask_np)
                  mask_filename = os.path.join(out_path, f"{count*BATCH_SIZE+(i+1)}.png")
                  mask_image.save(mask_filename)
                  ###########
                  iou = calculate_iou_tensor(origin_y,predict_mask)
                  iou_mean += iou
                  # print(f"{count} : input_img : {input_img.size()} | predict_mask : {predict_mask.size()} | origin_mask : {origin_y.size()}")
                  # print(f"IOU : {iou}")
                  show_image_and_mask(input_img,predict_mask,origin_y)

      print(f"IOU_MEAN : {iou_mean/total_img}")



Output hidden; open in https://colab.research.google.com to view.

In [12]:
model.eval()
iou_mean = 0
out_dir = train_mask_output_dir
total_img_num=0
with torch.no_grad():
      for count, (x, y) in enumerate(train_loader,start=0):
            x = x.to(device)
            y = y.to(device)
            predict_img = model(x)
            batch_size = x.size(0)
            for i in range(batch_size):
                  total_img_num+=1
                  input_img = x[i].squeeze(0)

                  origin_y = y[i]>=THRESOLD
                  origin_y = origin_y.squeeze(0)


                  probability = torch.sigmoid(predict_img[i])>=THRESOLD
                  predict_mask = probability.squeeze(0)
                  ###### save img
                   # 保存 predict_mask
                  predict_mask_np = predict_mask.cpu().numpy().astype(np.uint8) * 255  # 转换为 0 和 255
                  mask_image = Image.fromarray(predict_mask_np)
                  mask_filename = os.path.join(out_dir, f"{count*BATCH_SIZE+(i+1)}.png")
                  mask_image.save(mask_filename)
                  ###########
                  iou = calculate_iou_tensor(origin_y,predict_mask)
                  iou_mean += iou
                  # print(f"input_img : {input_img.size()} | predict_mask : {predict_mask.size()} | origin_mask : {origin_y.size()}")
                  # print(f"IOU : {iou}")
                  show_image_and_mask(input_img,predict_mask,origin_y)

      print(f"IOU_MEAN : {iou_mean/total_img_num}")

Output hidden; open in https://colab.research.google.com to view.

#### 讀取資料夾裡面的output(512x512) 和對應的原圖做原始的 IOU
###### 會先對output 做Resize 到 一樣的大小，並且存起來。


In [13]:
import os
from PIL import Image
from torchvision import transforms as T
from torchvision.transforms import InterpolationMode
# path
train_origin = train_mask_dir
train_output = train_mask_output_dir

# get origin file name
origin = sorted([os.path.join(train_origin, f) for f in os.listdir(train_origin) ])
output = sorted([os.path.join(train_output, f) for f in os.listdir(train_output) ])

# cehck img size is same
if len(origin) != len(output):
    print(f"The number of origin : {len(origin)} and output files:{len(output)} does not match!")
    exit()

# transform tensor
transform_origin = T.Compose([
    T.ToTensor()
])

# 初始化 IoU
iou = 0

# count IoU
for origin_path, output_path in zip(origin, output):
    # img file open
    train_origin_mask = Image.open(origin_path).convert('L')  # 轉成黑白圖像
    train_output_mask = Image.open(output_path).convert('L')  # 轉成黑白圖像

    # transform tensor
    train_origin_mask_tensor = transform_origin(train_origin_mask) >= 0.3  # 轉成 0、1

    height = train_origin_mask_tensor.shape[1]
    width  = train_origin_mask_tensor.shape[2]

    transform_output = T.Compose([
                        T.ToTensor(),
                        T.Resize((height,width))
                    ])
    train_output_mask_tensor = transform_output(train_output_mask) >= 0.3  # 轉成 0、1
    ##################################
    predict_mask = train_output_mask_tensor.squeeze(0) #[1xheightxwidth] -> [heightxwidth]
    predict_mask_np = predict_mask.cpu().numpy().astype(np.uint8) * 255  # 轉成 0、25
    mask_image = Image.fromarray(predict_mask_np)


    mask_filename = f"{output_path}"
    mask_image.save(mask_filename)
    ####################################
    # count IoU
    iou += calculate_iou_tensor(train_origin_mask_tensor, train_output_mask_tensor)

# print IoU
print(f'TRAIN_Data - IOU_MEAN : {iou / len(origin):.4f}')


TRAIN_Data - IOU_MEAN : 0.6502


In [14]:
import os
from PIL import Image
from torchvision import transforms as T

# file path
test_origin = test_mask_dir
test_output = test_mask_output_dir

# get all file name
origin = sorted([os.path.join(test_origin, f) for f in os.listdir(test_origin) ])
output = sorted([os.path.join(test_output, f) for f in os.listdir(test_output) ])

# check size is same
if len(origin) != len(output):
    print(f"The number of origin : {len(origin)} and output files:{len(output)} does not match!")
    exit()

# transform tensor
transform_origin = T.Compose([
    T.ToTensor()
])

# init IoU
iou = 0

# count IoU
for origin_path, output_path in zip(origin, output):
    #open the mask file
    test_origin_mask = Image.open(origin_path).convert('L')  # 轉成黑白圖像
    test_output_mask = Image.open(output_path).convert('L')  # 轉成黑白圖像

    # transform to tensor
    test_origin_mask_tensor = transform_origin(test_origin_mask) >= 0.5  # 轉成 0、1

    height = test_origin_mask_tensor.shape[1]
    width  = test_origin_mask_tensor.shape[2]

    transform_output = T.Compose([
                            T.ToTensor(),
                            T.Resize((height,width))
                        ])
    test_output_mask_tensor = transform_output(test_output_mask) >= 0.5  # 轉成 0、1
    ##################################
    predict_mask = test_output_mask_tensor.squeeze(0) #[1xheightxwidth] -> [heightxwidth]
    predict_mask_np = predict_mask.cpu().numpy().astype(np.uint8) * 255  # 轉成 0、255
    # print(predict_mask_np.shape)
    mask_image = Image.fromarray(predict_mask_np)
    mask_filename = f"{output_path}"
    mask_image.save(mask_filename)
    ####################################
    # count IoU
    iou += calculate_iou_tensor(test_origin_mask_tensor, test_output_mask_tensor)

# print IoU
print(f'TEST_Data - IOU_MEAN : {iou / len(origin):.4f}')


TEST_Data - IOU_MEAN : 0.6209
