# 複数カーネルの効果検証をUNetで行う


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
from torch.utils.data import Dataset, DataLoader, random_split
from PIL import Image
import cv2
import albumentations as A
from albumentations.pytorch import ToTensorV2
import segmentation_models_pytorch as smp
import glob
import os

In [None]:
import random
def set_seed(seed=0):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
set_seed(0)

In [None]:
path = r"C:\Users\PC_User\Python\Semantic_Segmentation\data\archive\labels_class_dict.csv"
class_dict = pd.read_csv(path)
class_dict.values

上から順に class0 ~ 8

In [None]:
def create_rgb_to_class_mapping():
    rgb_to_class = {}
    for i, row in enumerate(class_dict.values):
        rgb_to_class[(row[1], row[2], row[3])] = i  # RGB -> class_id
    return rgb_to_class

class StanfordDataset(Dataset):
    def __init__(self, image_dir, label_dir):
        self.image_paths = sorted(glob.glob(os.path.join(image_dir, "*.jpg")))  
        self.label_paths = sorted(glob.glob(os.path.join(label_dir, "*.png")))  

        # RGB -> クラスIDのマッピングを作成
        self.rgb_to_class = create_rgb_to_class_mapping()

        assert len(self.image_paths) == len(self.label_paths), "画像とラベルの数が一致しません"

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image = Image.open(self.image_paths[idx]).convert('RGB')
        image = np.array(image)  
        
        mask = Image.open(self.label_paths[idx]).convert('RGB')
        mask = np.array(mask)

        # RGB値をクラスIDに変換（なんか下手。もっといい方法あるはず）
        class_mask = np.zeros(mask.shape[:2], dtype=np.uint8)  # クラスID用の空の配列
        for i in range(mask.shape[0]):  # 高さ
            for j in range(mask.shape[1]):  # 幅
                rgb = tuple(mask[i, j])  # (r, g, b) のタプル
                if rgb in self.rgb_to_class:
                    class_mask[i, j] = self.rgb_to_class[rgb]
                else:
                    class_mask[i, j] = 8  # 未知のクラスは8（背景等）

        return image, class_mask

class DatasetWrapper(Dataset):
    def __init__(self, subset, transform):
        self.subset = subset
        self.transform = transform

    def __getitem__(self, idx):
        image, mask = self.subset[idx]
        augmented = self.transform(image=image, mask=mask)
        image = augmented['image']
        mask = augmented['mask'].long()
        return image, mask

    def __len__(self):
        return len(self.subset)

# --- datasets ---
image_path = r"C:\Users\PC_User\Python\Semantic_Segmentation\data\archive\images"
label_path = r"C:\Users\PC_User\Python\Semantic_Segmentation\data\archive\labels_colored"

full_dataset = StanfordDataset(image_path, label_path)

# --- 70% train, 20% val, 10% test ---
total_len = len(full_dataset)
train_len = int(total_len * 0.7)
val_len = int(total_len * 0.2)
test_len = total_len - train_len - val_len

train_subset, val_subset, test_subset = random_split(
    full_dataset, [train_len, val_len, test_len],
    generator=torch.Generator().manual_seed(42)
)

common_transform = [
    A.Resize(height=256, width=256),  # Fixed input size
    A.HueSaturationValue(hue_shift_limit=20, sat_shift_limit=30, val_shift_limit=20, p=0.5),
    A.RandomGamma(gamma_limit=(80, 120), p=0.5),
    A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.5),
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),  # ImageNet
    ToTensorV2()
]


train_transform = A.Compose([
    A.RandomRotate90(p=0.5),
    A.HorizontalFlip(p=0.5),
    *common_transform
])

valid_transform = A.Compose([
    *common_transform
])

test_transform = A.Compose([
    *common_transform
])

# --- Augmentation ---
train_datasets = DatasetWrapper(train_subset, transform=train_transform)
valid_datasets = DatasetWrapper(val_subset, transform=valid_transform)
test_datasets  = DatasetWrapper(test_subset,  transform=test_transform)

# --- DataLoader ---
batch_size = 16
train_loader = DataLoader(train_datasets, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(valid_datasets, batch_size=batch_size)
test_loader = DataLoader(test_datasets, batch_size=batch_size)


## About This Datasets
https://www.kaggle.com/datasets/balraj98/stanford-background-dataset <br>
このデータセットは、LabelMe、MSRC、PASCAL VOC、Geometric Context などの既存の公開データセットから選ばれた 715 枚の画像で構成されている。

In [None]:
def denormalize(img_tensor, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)):
    mean = torch.tensor(mean).view(-1, 1, 1).to(img_tensor.device)
    std = torch.tensor(std).view(-1, 1, 1).to(img_tensor.device)
    return img_tensor * std + mean

def show_image(image, ax):
    image = denormalize(image)
    image = torch.clamp(image, 0, 1)
    ax.imshow(image.permute(1, 2, 0).cpu().numpy())


def show_mask(mask, ax):
    return ax.imshow(mask.cpu(), cmap='tab10', vmin=0, vmax=8)

fig, ax = plt.subplots(4, 4, figsize=(20, 20), constrained_layout=True)
cbar_img = None

for img_batch, mask_batch in train_loader:
    for i in range(8):  
        img = img_batch[i]
        mask = mask_batch[i]

        col = i % 4
        row_img = 0 if i < 4 else 1
        row_mask = 2 if i < 4 else 3

        show_image(img, ax[row_img, col])
        cbar_img = show_mask(mask, ax[row_mask, col])  # 最後の1枚を使ってカラーバー
    break

cbar_ax = fig.add_axes([1.02, 0.15, 0.015, 0.7])  # [left, bottom, width, height]
fig.colorbar(cbar_img, cax=cbar_ax)

plt.show();



## ブロックの実装

In [None]:
class ConvBlock(nn.Module):
    def __init__(self, in_channels, out_channels):
        super().__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
        )

    def forward(self, x):
        return self.conv(x)


In [None]:
class BasicConv2d(nn.Module):
    def __init__(self, in_channels: int, out_channels: int, kernel_size: int, padding: int):
        super().__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, padding=padding),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_channels, out_channels, kernel_size=kernel_size, padding=padding),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True)
        )
        
    def forward(self, x):
        return self.conv(x)
        
class MultiConvBlock(nn.Module):
    def __init__(self, in_channels, mid_channels, out_channels):
        super().__init__()
        
        self.branch0 = BasicConv2d(in_channels, mid_channels, 1, 0)
        self.branch1 = BasicConv2d(in_channels, mid_channels, 3, 1)
        self.branch2 = BasicConv2d(in_channels, mid_channels, 5, 2)
        self.branch3 = BasicConv2d(in_channels, mid_channels, 7, 3)

        self.conv = nn.Conv2d(mid_channels * 4, out_channels, kernel_size=1)
        self.bn = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU()


    def forward(self, x):
        out0 = self.branch0(x)
        out1 = self.branch1(x)
        out2 = self.branch2(x)
        out3 = self.branch3(x)
        out = torch.cat([out0, out1, out2, out3], dim=1)
        out = self.conv(out)
        out = self.bn(out)
        out = self.relu(out)
        return out

In [None]:
class ConvBlockTranspose(nn.Module):
    def __init__(self, in_channels, out_channels):
        super().__init__()
        self.up = nn.ConvTranspose2d(in_channels, out_channels, kernel_size=2, stride=2)
        self.conv = nn.Sequential(
            nn.Conv2d(out_channels, out_channels,  kernel_size=3, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_channels, out_channels,  kernel_size=3, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
        )

    def forward(self, x):
        x = self.up(x)
        x = self.conv(x)
        return x


In [None]:
class MultiConvBlockTranspose(nn.Module):
    def __init__(self, in_channels, mid_channels, out_channels):
        super().__init__()
        self.up = nn.ConvTranspose2d(in_channels, mid_channels, kernel_size=2, stride=2)
        self.branch0 = BasicConv2d(mid_channels, mid_channels//2, 1, 0)
        self.branch1 = BasicConv2d(mid_channels, mid_channels//2, 3, 1)
        self.branch2 = BasicConv2d(mid_channels, mid_channels//2, 5, 2)
        self.branch3 = BasicConv2d(mid_channels, mid_channels//2, 7, 3)

        self.conv = nn.Conv2d(mid_channels * 2, out_channels, kernel_size=1)
        self.bn = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.up(x)
        out0 = self.branch0(x)
        out1 = self.branch1(x)
        out2 = self.branch2(x)
        out3 = self.branch3(x)
        out = torch.cat([out0, out1, out2, out3], dim=1)
        out = self.conv(out)
        out = self.bn(out)
        out = self.relu(out)
        return out

## Unet

In [None]:
class UNet(nn.Module):
    def __init__(self, in_channels: int, num_classes: int):
        super().__init__()
        
        self.enc1 = ConvBlock(in_channels, 64)
        self.enc2 = ConvBlock(64, 128)
        self.enc3 = ConvBlock(128, 256)
        self.enc4 = ConvBlock(256, 512)

        self.bottleneck = ConvBlock(512, 1024)

        self.dec4 = ConvBlockTranspose(1024 + 512, 512)
        self.dec3 = ConvBlockTranspose(512 + 256, 256)
        self.dec2 = ConvBlockTranspose(256 + 128, 128)
        self.dec1 = ConvBlockTranspose(128 + 64, 64)

        self.final_conv = nn.Conv2d(64, num_classes, kernel_size=1)  # logits

        self.pool = nn.MaxPool2d(2)

        self._reset_parameters()

    def _reset_parameters(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.ConvTranspose2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)

    def forward(self, x):
        
        # Encoder 
        out1 = self.enc1(x)                   # (B, 64, H, W)
        e1 = self.pool(out1)                  # (B, 64, H/2, W/2)
        out2 = self.enc2(e1)                  # (B, 128, H/2, W/2)
        e2 = self.pool(out2)                  # (B, 128, H/4, W/4)
        out3 = self.enc3(e2)                  # (B, 256, H/4, W/4)
        e3 = self.pool(out3)                  # (B, 256, H/8, W/8)
        out4 = self.enc4(e3)                  # (B, 512, H/8, W/8)
        e4 = self.pool(out4)                  # (B, 512, H/16, W/16)

        # Bottleneck
        b = self.bottleneck(e4)               # (B, 1024, H/16, W/16)


        # Decoder with skip connections
        d4 = self.dec4(torch.cat([b, e4], dim=1))     # (B, 512, H/8, H/8)
        d3 = self.dec3(torch.cat([d4, e3], dim=1))    # (B, 256, H/4, W/4)
        d2 = self.dec2(torch.cat([d3, e2], dim=1))    # (B, 128, H/2, W/2)
        d1 = self.dec1(torch.cat([d2, e1], dim=1))    # (B, 64, H, W)

        # Final segmentation map
        out = self.final_conv(d1)  # (B, num_classes, H, W)
        return out

## Multi-kernel UNet

In [None]:
class Multi_kernel_UNet(nn.Module):
    def __init__(self, in_channels: int, num_classes: int):
        super().__init__()

        self.enc1 = MultiConvBlock(in_channels, 32, 64)
        self.enc2 = MultiConvBlock(64, 64, 128)
        self.enc3 = MultiConvBlock(128, 128, 256)
        self.enc4 = MultiConvBlock(256, 256, 512)

        self.bottleneck = MultiConvBlock(512, 512, 1024)

        self.dec4 = MultiConvBlockTranspose(1024 + 512, 512, 512)
        self.dec3 = MultiConvBlockTranspose(512 + 256, 256, 256)
        self.dec2 = MultiConvBlockTranspose(256 + 128, 128, 128)
        self.dec1 = MultiConvBlockTranspose(128 + 64, 64, 64)

        self.final_conv = nn.Conv2d(64, num_classes, kernel_size=1)  # logits

        self.pool = nn.MaxPool2d(2)

        self._reset_parameters()

    def _reset_parameters(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.ConvTranspose2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)

    def forward(self, x):
        
        # Encoder 
        out1 = self.enc1(x)                   # (B, 64, H, W)
        e1 = self.pool(out1)                  # (B, 64, H/2, W/2)
        out2 = self.enc2(e1)                  # (B, 128, H/2, W/2)
        e2 = self.pool(out2)                  # (B, 128, H/4, W/4)
        out3 = self.enc3(e2)                  # (B, 256, H/4, W/4)
        e3 = self.pool(out3)                  # (B, 256, H/8, W/8)
        out4 = self.enc4(e3)                  # (B, 512, H/8, W/8)
        e4 = self.pool(out4)                  # (B, 512, H/16, W/16)

        # Bottleneck
        b = self.bottleneck(e4)               # (B, 1024, H/16, W/16)


        # Decoder with skip connections
        d4 = self.dec4(torch.cat([b, e4], dim=1))     # (B, 512, H/8, H/8)
        d3 = self.dec3(torch.cat([d4, e3], dim=1))    # (B, 256, H/4, W/4)
        d2 = self.dec2(torch.cat([d3, e2], dim=1))    # (B, 128, H/2, W/2)
        d1 = self.dec1(torch.cat([d2, e1], dim=1))    # (B, 64, H, W)

        # Final segmentation map
        out = self.final_conv(d1)  # (B, num_classes, H, W)
        return out

## Loss function
IoU は割愛（次することがあったらちゃんと実装する）

In [None]:
class LossFunction(nn.Module):
    def __init__(self, ignore_index=8):
        super().__init__()
        self.CELoss = nn.CrossEntropyLoss(ignore_index=ignore_index)
        self.DiceLoss = smp.losses.DiceLoss(mode='multiclass', ignore_index=ignore_index)

    def forward(self, pred, target):
        ce = self.CELoss(pred, target)
        dice = self.DiceLoss(pred, target)
        return  ce + dice


## Train

In [None]:
def train(model_type, num_epochs, train_loader, val_loader):
    if model_type == 1:
        model = UNet(in_channels=3, num_classes=9).to('cuda')
    elif model_type == 2:
        model = Multi_kernel_UNet(in_channels=3, num_classes=9).to('cuda')
    else:
        raise ValueError(f"Invalid model_type {model_type}. Expected 1 or 2")
      
    optimizer = torch.optim.Adam(model.parameters(), lr=0.0005)
    criterion = LossFunction().to('cuda')
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=2, factor=0.1)

    train_history = []
    val_history = []
    best_val_loss = float('inf')
    best_model_state = None
    
    for epoch in range(num_epochs):
        model.train()
        train_loss, total_train_samples = 0, 0

        for img, mask in train_loader:
            img = img.to('cuda')
            mask = mask.to('cuda')
            
            optimizer.zero_grad()
            mask_pred = model(img)
            
            loss = criterion(mask_pred, mask)
            loss.backward()
            optimizer.step()

            train_loss += loss.item() * img.size(0)
            total_train_samples += img.size(0)

        train_loss /= total_train_samples

        train_history.append(train_loss)


        model.eval()
        with torch.no_grad():
            val_loss, total_val_samples = 0, 0

            for img, mask in val_loader:
                img = img.to('cuda')
                mask = mask.to('cuda')

                mask_pred = model(img)
                loss = criterion(mask_pred, mask)

                val_loss += loss.item() * img.size(0)
                total_val_samples += img.size(0)

            val_loss /= total_val_samples
 
            val_history.append(val_loss)

            scheduler.step(val_loss)

            print(f"[{epoch+1:02d}] Train Loss: {train_loss:.5f} | Valid Loss: {val_loss:.5f}")

            if val_loss < best_val_loss:
                best_val_loss = val_loss
                best_model_state = model.state_dict()

    torch.save(best_model_state, f"UNet_type{model_type}.pth")
    print(f"Best model saved with validation loss: {best_val_loss:.5f}")

    if model_type == 1:
        best_model = UNet(in_channels=3, num_classes=9).to('cuda')
    elif model_type == 2:
        best_model = Multi_kernel_UNet(in_channels=3, num_classes=9).to('cuda')

    best_model.load_state_dict(best_model_state)
    
    return best_model, train_history, val_history

In [None]:
def plot_history(model_type, num_epoch, train_history, val_history):
    
    if model_type == 1:
        model = 'UNet'
    elif model_type == 2:
        model = 'Multi kernel UNet'
    
    train_history = np.array(train_history)
    val_history = np.array(val_history)
    plt.figure(figsize=(8, 5))
    plt.plot(np.arange(num_epoch)+1, train_history, label='Train Loss')
    plt.plot(np.arange(num_epoch)+1, val_history, label='Val Loss')
    plt.xlabel('Epoch', fontsize=12)
    plt.ylabel(f'Loss', fontsize=12)
    plt.title(f'{model} Training History', fontsize=12)
    plt.xlim(1, num_epoch)
    plt.legend(loc='upper left')

In [None]:
num_epochs = 50

In [None]:
UNet, train_history1, val_history1 = train(1, num_epochs, train_loader, val_loader)

In [None]:
plot_history(1, num_epochs, train_history1, val_history1);

In [None]:
Multi_kernel_UNet, train_history2, val_history2 = train(2, num_epochs, train_loader, val_loader)

In [None]:
plot_history(2, num_epochs, train_history2, val_history2);

## Test

In [None]:
def test(model):
    model.eval()
    
    criterion = LossFunction().to('cuda')

    test_loss, total_test_samples = 0, 0
    
    first_batch_img = None
    first_batch_true_mask = None
    first_batch_pred_mask = None

    with torch.no_grad():
        for idx, (img, mask) in enumerate(test_loader):
            img = img.to('cuda')
            mask = mask.to('cuda')

            if idx == 0:
                first_batch_img = img.cpu().numpy()
                first_batch_true_mask = mask.cpu().numpy()
            
            mask_pred = model(img)
            loss = criterion(mask_pred, mask)

            test_loss += loss.item() * img.size(0)
            total_test_samples += img.size(0)

            if idx == 0:
                mask_pred = torch.softmax(mask_pred, dim=1)
                first_batch_pred_mask = torch.argmax(mask_pred, dim=1).cpu().numpy()

        test_loss /= total_test_samples

    return test_loss, first_batch_img, first_batch_true_mask, first_batch_pred_mask

In [None]:
test_loss1, first_batch_img, first_batch_true_mask, first_batch_pred_mask1 = test(UNet)
test_loss2, first_batch_img, first_batch_true_mask, first_batch_pred_mask2 = test(Multi_kernel_UNet)

In [None]:
print(f'UNet | Loss: {test_loss1:05f} | Multi kernel UNet | Loss: {test_loss2:05f}')

## まとめ
マルチカーネルを実装して精度が上がるかどうかは、カーネルサイズ次第だと考えられる。カーネルサイズが大きいもの（7×7）がかえってノイズになる可能性もある。<br>
学習の安定性に関して、カーネルサイズが大きいもの（7×7）がノイズになった結果、学習が普通の UNet よりも不安定になったのではないだろうか。思ったより精度が上がらなかったのもそのせいか。<br>
マルチスケールの特徴を集約することで 1 つの局所パターンに過度に依存するリスクが下がるため、過学習に強くなりやすいと思ったのだが（アンサンブル学習的な効果）、そのような効果は今回は確認できなかった。
### 精度向上の要因
- マルチスケール設計だけでなく、**「パラメータの増加」** は性能向上の大きな要因の一つだと考えられる。
- ただし、マルチスケール設計は **「多様な特徴抽出」** という付加価値があるので、単なるパラメータ数増加以上のメリットも期待できるはず。


In [None]:
ignore_mask = first_batch_true_mask == 8
first_batch_pred_mask1[ignore_mask] = 8
first_batch_pred_mask2[ignore_mask] = 8

In [None]:
def denormalize(img_array, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)):
    mean = np.array(mean).reshape(-1, 1, 1)
    std = np.array(std).reshape(-1, 1, 1)
    return img_array * std + mean

def show_image(image, ax):
    image = denormalize(image)
    image = np.clip(image, 0, 1)
    ax.imshow(image.transpose(1, 2, 0))

def plot_mask(mask):
    fig, ax = plt.subplots(2, 4, figsize=(20, 10), constrained_layout=True)
    cbar_img = None
    ax = ax.flatten()
    for i in range(8):
        img = mask[i]
        cbar_img = ax[i].imshow(img, cmap='tab10', vmin=0, vmax=8)
    cbar_ax = fig.add_axes([1.02, 0.15, 0.015, 0.7])
    fig.colorbar(cbar_img, cax=cbar_ax)
    plt.show()

## First batch image

In [None]:
fig, ax = plt.subplots(2, 4, figsize=(20, 10))
ax = ax.flatten()
for i in range(8):
    image = first_batch_img[i]
    show_image(image, ax[i])
plt.tight_layout();

## True mask

In [None]:
plot_mask(first_batch_true_mask);

## UNet predict mask

In [None]:
plot_mask(first_batch_pred_mask1);

## Multi kernel UNet predict mask

In [None]:
plot_mask(first_batch_pred_mask2);

## 感想
マルチカーネル設計のほうが精度がいいが、それでも精度が低い。精度よくやっている人たちはどんな工夫をしているか、損失関数含めて気になった。<br>
今度、ライブラリの内容を確認してみる。


## 🚘 自動運転でよく使われるセグメンテーションモデル

### ✅ 1. **DeepLab v3+**

* **特徴**：Atrous Convolution（空洞畳み込み） + Encoder-Decoder 構造
* **利点**：高精度。遠方の物体や細かい構造にも強い
* **用途**：自動運転の研究・プロトタイプによく使われる
* **実装**：`torchvision.models.segmentation.deeplabv3_resnet101` など

---

### ✅ 2. **PSPNet (Pyramid Scene Parsing Network)**

* **特徴**：ピラミッドプーリングモジュールで広範囲な文脈情報を取得
* **利点**：シーン全体を把握したセグメンテーションに強い
* **用途**：都市シーン理解（例：Cityscapes）に強い実績

---

### ✅ 3. **ENet**

* **特徴**：軽量なリアルタイムセグメンテーションモデル（Speed重視）
* **利点**：組み込みデバイス（NVIDIA Jetson等）に向く
* **用途**：実走行車両でのテストや小型システムに最適
* **フレームレート**：> 60 FPS on embedded devices

---

### ✅ 4. **BiSeNet (Bilateral Segmentation Network)**

* **特徴**：空間情報と文脈情報を並列に扱う2ブランチ構成
* **利点**：**リアルタイム性と高精度の両立**
* **BiSeNetV2** はさらに高効率設計

---

### ✅ 5. **SegFormer（NVIDIA, 2021）**

* **特徴**：**Transformerベース**。軽量 & 高精度
* **利点**：バックボーンにCNNを使わずにTransformerで高性能
* **用途**：SOTA性能 + 小型化（real-time対応）

---

## 📊 モデル選択の目安

| モデル         | 精度 | 推論速度 | 実用性 | 備考          |
| ----------- | -- | ---- | --- | ----------- |
| DeepLab v3+ | ◎  | △    | ◎   | 精度重視        |
| PSPNet      | ◎  | △    | ◎   | 都市風景向き      |
| ENet        | △  | ◎    | ◯   | 組み込み向け      |
| BiSeNetV2   | ◎  | ◎    | ◎   | 実車搭載の選択肢にも  |
| SegFormer   | ◎  | ◎    | ◎   | 最新。軽量 & 高性能 |

---

## 🏙️ よく使われる自動運転データセット

| データセット         | 特徴                |
| -------------- | ----------------- |
| **Cityscapes** | 都市部の高解像度画像（19クラス） |
| **CamVid**     | 道路走行シーン（車載カメラ映像）  |
| **KITTI**      | 車載センサーデータ（ラベル少なめ） |
| **BDD100K**    | 多様な時間帯・天候で大規模     |

---

### 📌 まとめ

* **精度重視（研究用途）**：DeepLab v3+, PSPNet
* **リアルタイム処理（実運用・組み込み）**：ENet, BiSeNetV2
* **次世代モデルに注目**：SegFormer
