<a href="https://colab.research.google.com/github/KaiaLee/semantic-segmentation_practice/blob/main/cars_segmentation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Mount to Google Drive
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

# Move to your current working directory
%cd drive/MyDrive/Colab\ Notebooks/Cars

Mounted at /content/drive
/content/drive/MyDrive/Colab Notebooks/Cars


In [2]:
# Import all the packages
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as T
import torch.optim as optim
from torch.utils.data import Dataset
from PIL import Image
import numpy as np

In [16]:
# Build one of the main components - DoubleConv - for UNet
class DoubleConv(nn.Module):
  def __init__(self, in_channels, out_channels):
    super().__init__()
    self.conv = nn.Sequential(
        nn.Conv2d(in_channels, out_channels, 3, 1, 1),
        nn.BatchNorm2d(out_channels),
        nn.ReLU(inplace=True),
        nn.Conv2d(out_channels, out_channels, 3, 1, 1),
        nn.BatchNorm2d(out_channels),
        nn.ReLU(inplace=True)
    )
  def forward(self, x):
    return self.conv(x)

In [20]:
# Build UNet from scrach
class UNet(nn.Module):
  def __init__(self, in_channels=3, out_channels=1, features=[64, 128, 256, 512]):
    super().__init__()
    self.ups = nn.ModuleList()
    self.downs = nn.ModuleList()
    self.pool = nn.MaxPool2d(2, 2) ##
    # Down part of UNet
    for feature in features:
      self.downs.append(DoubleConv(in_channels, feature))
      in_channels = feature
    self.bottleneck = DoubleConv(features[-1], features[-1]*2)
    # Up part of UNet
    for feature in reversed(features):
      self.ups.append(nn.ConvTranspose2d(feature*2, feature, 2, 2))
      self.ups.append(DoubleConv(feature*2, feature))
    self.final_conv = nn.Conv2d(features[0], out_channels, 1)
  def forward(self, x):
    skip_connections = []
    for down in self.downs:
      x = down(x)
      skip_connections.append(x)
      x = self.pool(x)
    x = self.bottleneck(x)
    skip_connections.reverse() #直接在原始列表上進行反轉，而不會返回一個新的列表
    #skip_connections = skip_connections[::-1] #從列表的最後一個元素到第一個元素，步長為 -1，反轉列表
    for i in range(0, len(self.ups), 2): #生成一個從 0 到 self.ups 長度的範圍，每次增量為 2。這樣的循環可以用於遍歷上升（up）部分的每一對層。
      x = self.ups[i](x) #對於每個偶數索引 i，這行將 x 輸入到對應的上升層
      skip_connection = skip_connections[i//2] #這行從反轉的 skip_connections 列表中選取相應的跳接特徵圖，索引為 i//2，因為每次迭代中 i 增加 2(ConvTranspose+DoubleConv這兩個為一組)
      concat_skip = torch.cat((skip_connection, x), dim=1) # N xC x H x W
      x = self.ups[i+1](concat_skip)
    return self.final_conv(x)

In [21]:
# Create an UNet model object
model = UNet()
toy_data = torch.rand((16, 3, 160, 240))
output = model(toy_data)
print(output.shape)
# Move the model to GPU
#model = model.cuda()

torch.Size([16, 1, 160, 240])


In [43]:
# Build CustomDataset for loading data from Google Drive
class CustomDataset(Dataset):   #使用pytorch已經寫好的(可以丟到dataloader裡、讓data可以用minibanch gradient decent(banch size=16一次loading16張圖，丟給model train))
  def __init__(self, image_dir, mask_dir, transform=None):
    super().__init__()
    self.image_dir = image_dir
    self.mask_dir = mask_dir
    self.transform = transform
    self.images = os.listdir(image_dir)
  def __len__(self):
    print(f"Number of images: {len(self.images)}")  # 輸出圖像數量
    return len(self.images)

  def __getitem__(self, index):
    print(f"Fetching item at index: {index}")  # 輸出當前索引
    image_path = os.path.join(self.image_dir, self.images[index]) #資料夾, 檔名
    mask_path = os.path.join(self.mask_dir, self.images[index].replace('.jpg', '_mask.gif'))
    image = np.array(Image.open(image_path)) #pytorch在處理pyarray都會比較快，list轉tensor不會比np.array快，通常都先轉成np.array #一次開一張圖片不爆掉
    mask = np.array(Image.open(mask_path).convert('L')) #RGB轉L灰階影像(黑色0和白色255)
    return self.transform(image), self.transform(mask) #data pre-processing

    if self.transform:  # 確保 transform 不為 None
        image = self.transform(image)
        mask = self.transform(mask)
    return image, mask  # 返回處理後的數據

In [44]:
# Check the device we are using is GPU or CPU
if torch.cuda.is_available():
  device = torch.device('cuda')
else:
  device = torch.device('cpu')
print(device)

cpu


In [45]:
# Constants for UNet model training process
BATCH_SIZE = 16
NUM_EPOCHS = 3
IMG_WIDTH = 240
IMG_HEIGHT = 160

In [46]:
# Load data
all_data = CustomDataset('small_train', 'small_train_masks', T.Compose([T.ToTensor(), T.Resize((IMG_HEIGHT, IMG_WIDTH))]))
#pytorch裡的ToTensor會把所有數值除以255，所以不用變0跟1就可丟transform

In [53]:
# Split data into train and val
train_data, val_data = torch.utils.data.random_split(all_data, [0.7, 0.3])

Number of images: 1600
Number of images: 1600
Number of images: 1600
Number of images: 1600


In [48]:
# Create loader for mini-batch gradient descent
from torch.utils.data import DataLoader
train_loader = DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_data, batch_size=BATCH_SIZE, shuffle=False)

In [49]:
# The loss function for bianry classification
loss_function = nn.BCELoss()

# Choosing Adam as our optimizer
optimizer = optim.Adam(model.parameters())

In [50]:
def train(model, num_epochs, train_loader, optimizer, print_every=30):
  for epoch in range(num_epochs):
    for count, (x, y) in enumerate(train_loader):
      model.train()
      x = x.to(device)
      y = y.to(device)
      out = model(x)
      if count % print_every == 0:
        eval(model, val_loader, epoch)
      out = torch.sigmoid(out)
      loss = loss_function(out, y)
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()

In [51]:
def eval(model, val_loader, epoch):
  model.eval()
  num_correct = 0
  num_pixels = 0
  with torch.no_grad():
    for x, y in val_loader:
      x = x.to(device)
      y = y.to(device)
      out_img = model(x)
      probability = torch.sigmoid(out_img)
      predictions = probability>0.5
      num_correct += (predictions==y).sum()
      num_pixels += BATCH_SIZE*IMG_WIDTH*IMG_HEIGHT
  print(f'Epoch[{epoch+1}] Acc: {num_correct/num_pixels}')


In [None]:
train(model, NUM_EPOCHS, train_loader, optimizer)

Fetching item at index: 1218
Fetching item at index: 449
Fetching item at index: 53
Fetching item at index: 871
Fetching item at index: 875
Fetching item at index: 148
Fetching item at index: 136
Fetching item at index: 69
Fetching item at index: 609
Fetching item at index: 1088
Fetching item at index: 304
Fetching item at index: 1486
Fetching item at index: 612
Fetching item at index: 623
Fetching item at index: 1031
Fetching item at index: 432
