In [1]:
import sys

sys.path.append('..')
from src.tools.print_sysinfo import print_env
print_env()

DATE : 2023-08-31
Pyton Version : 3.10.12
PyTorch Version : 2.0.1
OS : Linux 5.15.0-78-generic
CPU spec : x86_64
RAM spec : 122.84 GB
Device 0:
Name: NVIDIA GeForce RTX 3090
Total Memory: 24576.0 MB
Driver Version: 530.41.03
Device 1:
Name: NVIDIA GeForce RTX 3090
Total Memory: 24576.0 MB
Driver Version: 530.41.03


In [9]:
import os
import cv2
from PIL import Image
import pandas as pd
import numpy as np

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms

from tqdm import tqdm
import albumentations as A
from albumentations.pytorch import ToTensorV2

from src.tools.rle_encoder import rle_encode
from src.data.dataset import SourceDataset, TargetDataset
#os.environ["CUDA_VISIBLE_DEVICES"] = "1"
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [3]:
import torch
import albumentations as A
from albumentations.core.transforms_interface import ImageOnlyTransform

def fisheye_circular_transform_torch(image, mask=None, fov_degree=200, focal_scale=4.5):
    _, h, w = image.shape
    
    # Convert degrees to radians using torch tensor
    radian_conversion = torch.tensor(np.pi/180, dtype=image.dtype, device=image.device)
    
    
    # Calculate the focal length using the given FOV
    f = w / (2 * torch.tan(0.5 * fov_degree * radian_conversion))
    f_scaled = f * focal_scale
    
    # Meshgrid for coordinates
    x = torch.linspace(-w//2, w//2, w).repeat(h, 1)
    y = torch.linspace(-h//2, h//2, h).unsqueeze(1).repeat(1, w)
    r = torch.sqrt(x*x + y*y)
    theta = torch.atan2(y, x)
    
    # Apply fisheye transformation
    r_fisheye = f_scaled * torch.atan(r / f_scaled)
    x_fisheye = (w // 2 + r_fisheye * torch.cos(theta)).long()
    y_fisheye = (h // 2 + r_fisheye * torch.sin(theta)).long()
    
    # Create masks for valid coordinates
    valid_coords = (x_fisheye >= 0) & (x_fisheye < w) & (y_fisheye >= 0) & (y_fisheye < h)
    
    # Initialize output images
    new_image = torch.zeros_like(image)
    if mask is not None:
        new_mask = torch.zeros_like(mask)
    else:
        new_mask = None
    
    # Assign values
    new_image[:, valid_coords] = image[:, y_fisheye[valid_coords], x_fisheye[valid_coords]]
    if mask is not None:
        new_mask[:, valid_coords] = mask[:, y_fisheye[valid_coords], x_fisheye[valid_coords]]
    
    return new_image, new_mask

class FisheyeTransform(ImageOnlyTransform):
    def __init__(self, fov_degree=200, focal_scale=4.5, always_apply=False, p=1.0):
        super(FisheyeTransform, self).__init__(always_apply, p)
        self.fov_degree = fov_degree
        self.focal_scale = focal_scale

    def apply(self, image, **params):
        image_tensor = torch.tensor(image).permute(2, 0, 1).float()
        transformed_image, _ = fisheye_circular_transform_torch(image_tensor, fov_degree=self.fov_degree, focal_scale=self.focal_scale)
        return transformed_image.permute(1, 2, 0).byte().numpy()

    def apply_to_mask(self, mask, **params):
        mask_tensor = torch.tensor(mask).unsqueeze(0).float()
        _, transformed_mask = fisheye_circular_transform_torch(mask_tensor, fov_degree=self.fov_degree, focal_scale=self.focal_scale)
        return transformed_mask.squeeze(0).byte().numpy()

In [4]:
augmentation = A.Compose(
    [
        FisheyeTransform(p=0.2),
        A.Resize(224, 224),
        A.Normalize(mean=[0.485, 0.456, 0.406], # ImageNet 데이터의 통계량으로 정규화
                    std=[0.229, 0.224, 0.225]),
        ToTensorV2()
    ]
)


transform = A.Compose(
    [   
        A.Resize(224, 224),
        A.Normalize(mean=[0.485, 0.456, 0.406], # ImageNet 데이터의 통계량으로 정규화
                    std=[0.229, 0.224, 0.225]),
        ToTensorV2()
    ]
)

In [5]:


train_dataset = SourceDataset(csv_file='train_source.csv', transform=augmentation, is_training=True)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=4)

valid_dataset = SourceDataset(csv_file='val_source.csv', transform=transform, is_training=True)
valid_loader = DataLoader(valid_dataset, batch_size=16, shuffle=False, num_workers=4)

In [6]:
import math

def compute_iou(pred, target, num_classes):
    iou_list = []
    pred = pred.view(-1)
    target = target.view(-1)

    # For classes excluding the background
    for cls in range(num_classes - 1):  # We subtract 1 to exclude the background class
        pred_inds = pred == cls
        target_inds = target == cls
        intersection = (pred_inds[target_inds]).sum().float()
        union = (pred_inds + target_inds).sum().float()
        if union == 0:
            iou_list.append(float('nan'))  # If there is no ground truth, do not include in evaluation
        else:
            iou_list.append((intersection / union).item())
    return iou_list

def compute_mIoU(preds, labels, num_classes=13):
    iou_list = compute_iou(preds, labels, num_classes)
    valid_iou_list = [iou for iou in iou_list if not math.isnan(iou)]
    mIoU = sum(valid_iou_list) / len(valid_iou_list)
    return mIoU


In [7]:
# DeepLabV3 with MobileNetV3 backbone:

from torchvision.models.segmentation import deeplabv3_mobilenet_v3_large
model = deeplabv3_mobilenet_v3_large(pretrained=True)
model.classifier[4] = nn.Conv2d(256, 13, kernel_size=(1, 1), stride=(1, 1))
model.aux_classifier[4] = nn.Conv2d(256, 13, kernel_size=(1, 1), stride=(1, 1))

# 사전 학습된 모델의 모든 레이어를 고정
for param in model.parameters():
    param.requires_grad = False

# 마지막 레이어만 학습 가능하게 설정
for param in model.classifier[4].parameters():
    param.requires_grad = True

for param in model.aux_classifier[4].parameters():
    param.requires_grad = True

model.to(device)



In [15]:
from torch.optim.lr_scheduler import StepLR


# 1. 모델 불러오기
if torch.cuda.device_count() > 1:
    print(f"Using {torch.cuda.device_count()} GPUs!")
    model = nn.DataParallel(model)
elif torch.cuda.device_count() == 1:
    print(f"Using only 1 GPU!")
    model.to(device)
else:
    print(f"Using CPU")
    model.to(device)

# 2. 데이터 준비 (여기서는 간략하게 표현합니다)
#train_loader, val_loader = prepare_target_domain_dataloaders()

# 3. 학습 설정
criterion = nn.CrossEntropyLoss() # 예시로 CrossEntropyLoss를 사용합니다

# Optimizer 설정 시, requires_grad=True로 설정된 파라미터만 포함시킵니다.
optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=0.001)

# Learning rate scheduler 설정
scheduler = StepLR(optimizer, step_size=10, gamma=0.1)


# 4. 학습
num_epochs = 1000
train_losses = []
train_mIoUs = []
val_mIoUs = []

# Early stopping 관련 설정
patience = 20  # 10번의 epoch 동안 성능 향상이 없을 경우 학습 중단
no_improve_epochs = 0  # 성능 향상이 없는 epoch의 횟수
best_mIoU = 0.0  # 최고의 검증 mIoU 저장


# 보조 출력에 대한 손실 가중치
aux_loss_weight = 0.4


for epoch in range(num_epochs):
    model.train()
    total_loss = 0.0
    total_iou = 0.0
    num_batches = 0
    num_images = 0
    
    for images, masks in tqdm(train_loader):
        
        images = images.float().to(device)
        masks = masks.long().to(device)
        num_images += images.size(0)
        
        optimizer.zero_grad()
        outputs = model(images)
        
        # 주 출력에 대한 손실 계산
        main_loss = criterion(outputs['out'], masks)
        
        # 보조 출력에 대한 손실 계산
        aux_loss = criterion(outputs['aux'], masks)
        
        # 두 손실을 결합
        loss = main_loss + aux_loss_weight * aux_loss
        
        loss.backward()
        # loss = criterion(outputs, masks)
        # loss.backward()
        optimizer.step()
        
        total_loss += num_images * loss.item()
        _, predicted = outputs['out'].max(1)
        total_iou += compute_mIoU(predicted, masks)
        num_batches += 1
    
    avg_loss = total_loss / num_images
    avg_train_mIoU = total_iou / num_images
    train_losses.append(avg_loss)
    train_mIoUs.append(avg_train_mIoU)
    print(f"Epoch {epoch + 1} - Training Loss: {avg_loss:.4f}, Training mIoU: {avg_train_mIoU:.4f}")

    
    # 5. 검증 (간략하게 표현)
    with torch.no_grad():
        model.eval()
        total_iou = 0
        num_images = 0
        for images, masks in tqdm(valid_loader):
            images = images.float().to(device)
            masks = masks.long().to(device)
        
            outputs = model(images)
            _, predicted = outputs['out'].max(1)
            total_iou += compute_mIoU(predicted, masks)
            num_images += images.size(0)
        avg_mIoU = total_iou / num_images
        print(f"Epoch {epoch + 1}, mIoU: {avg_mIoU:.4f}")


    # 학습률 업데이트
    scheduler.step()

    
    # Early stopping 검사
    if avg_mIoU > best_mIoU:
        best_mIoU = avg_mIoU
        # 최적의 모델 저장
        torch.save(model.state_dict(), 'best_model.pth')
        no_improve_epochs = 0
    else:
        no_improve_epochs += 1
        if no_improve_epochs >= patience:
            print("Early stopping triggered!")
            # 최적의 모델 불러오기
            model.load_state_dict(torch.load('best_model.pth'))
            break

Using 2 GPUs!


  0%|          | 0/69 [00:00<?, ?it/s]

100%|██████████| 69/69 [01:06<00:00,  1.03it/s]


Epoch 1 - Training Loss: 47.9406, Training mIoU: 0.0167


100%|██████████| 30/30 [00:08<00:00,  3.43it/s]


Epoch 1, mIoU: 0.0260


100%|██████████| 69/69 [01:09<00:00,  1.01s/it]


Epoch 2 - Training Loss: 35.5330, Training mIoU: 0.0168


100%|██████████| 30/30 [00:08<00:00,  3.50it/s]


Epoch 2, mIoU: 0.0259


100%|██████████| 69/69 [01:11<00:00,  1.04s/it]


Epoch 3 - Training Loss: 28.6847, Training mIoU: 0.0170


100%|██████████| 30/30 [00:08<00:00,  3.39it/s]


Epoch 3, mIoU: 0.0265


100%|██████████| 69/69 [01:05<00:00,  1.05it/s]


Epoch 4 - Training Loss: 24.1476, Training mIoU: 0.0172


100%|██████████| 30/30 [00:08<00:00,  3.51it/s]


Epoch 4, mIoU: 0.0262


100%|██████████| 69/69 [01:05<00:00,  1.05it/s]


Epoch 5 - Training Loss: 20.8944, Training mIoU: 0.0173


100%|██████████| 30/30 [00:08<00:00,  3.60it/s]


Epoch 5, mIoU: 0.0248


100%|██████████| 69/69 [01:09<00:00,  1.01s/it]


Epoch 6 - Training Loss: 18.9567, Training mIoU: 0.0175


100%|██████████| 30/30 [00:08<00:00,  3.56it/s]


Epoch 6, mIoU: 0.0266


100%|██████████| 69/69 [01:08<00:00,  1.01it/s]


Epoch 7 - Training Loss: 17.7453, Training mIoU: 0.0176


100%|██████████| 30/30 [00:08<00:00,  3.40it/s]


Epoch 7, mIoU: 0.0272


100%|██████████| 69/69 [01:08<00:00,  1.01it/s]


Epoch 8 - Training Loss: 16.8427, Training mIoU: 0.0178


100%|██████████| 30/30 [00:08<00:00,  3.51it/s]


Epoch 8, mIoU: 0.0273


100%|██████████| 69/69 [01:05<00:00,  1.06it/s]


Epoch 9 - Training Loss: 15.7237, Training mIoU: 0.0179


100%|██████████| 30/30 [00:08<00:00,  3.48it/s]


Epoch 9, mIoU: 0.0264


100%|██████████| 69/69 [01:02<00:00,  1.11it/s]


Epoch 10 - Training Loss: 15.2755, Training mIoU: 0.0180


100%|██████████| 30/30 [00:08<00:00,  3.57it/s]


Epoch 10, mIoU: 0.0279


100%|██████████| 69/69 [01:05<00:00,  1.05it/s]


Epoch 11 - Training Loss: 14.6785, Training mIoU: 0.0183


100%|██████████| 30/30 [00:08<00:00,  3.51it/s]


Epoch 11, mIoU: 0.0283


100%|██████████| 69/69 [01:05<00:00,  1.05it/s]


Epoch 12 - Training Loss: 14.6076, Training mIoU: 0.0184


100%|██████████| 30/30 [00:08<00:00,  3.58it/s]


Epoch 12, mIoU: 0.0282


100%|██████████| 69/69 [01:03<00:00,  1.09it/s]


Epoch 13 - Training Loss: 14.5199, Training mIoU: 0.0184


100%|██████████| 30/30 [00:08<00:00,  3.62it/s]


Epoch 13, mIoU: 0.0282


100%|██████████| 69/69 [01:08<00:00,  1.01it/s]


Epoch 14 - Training Loss: 14.5100, Training mIoU: 0.0184


100%|██████████| 30/30 [00:08<00:00,  3.56it/s]


Epoch 14, mIoU: 0.0280


100%|██████████| 69/69 [01:04<00:00,  1.06it/s]


Epoch 15 - Training Loss: 14.3430, Training mIoU: 0.0185


100%|██████████| 30/30 [00:08<00:00,  3.39it/s]


Epoch 15, mIoU: 0.0281


100%|██████████| 69/69 [01:11<00:00,  1.04s/it]


Epoch 16 - Training Loss: 14.0890, Training mIoU: 0.0184


100%|██████████| 30/30 [00:09<00:00,  3.33it/s]


Epoch 16, mIoU: 0.0280


100%|██████████| 69/69 [01:09<00:00,  1.01s/it]


Epoch 17 - Training Loss: 14.1935, Training mIoU: 0.0186


100%|██████████| 30/30 [00:08<00:00,  3.49it/s]


Epoch 17, mIoU: 0.0278


100%|██████████| 69/69 [01:04<00:00,  1.06it/s]


Epoch 18 - Training Loss: 14.0036, Training mIoU: 0.0186


100%|██████████| 30/30 [00:08<00:00,  3.53it/s]


Epoch 18, mIoU: 0.0278


100%|██████████| 69/69 [01:04<00:00,  1.07it/s]


Epoch 19 - Training Loss: 14.0523, Training mIoU: 0.0186


100%|██████████| 30/30 [00:08<00:00,  3.51it/s]


Epoch 19, mIoU: 0.0280


100%|██████████| 69/69 [01:05<00:00,  1.06it/s]


Epoch 20 - Training Loss: 13.8823, Training mIoU: 0.0187


100%|██████████| 30/30 [00:09<00:00,  3.26it/s]


Epoch 20, mIoU: 0.0279


100%|██████████| 69/69 [01:09<00:00,  1.01s/it]


Epoch 21 - Training Loss: 13.9741, Training mIoU: 0.0186


100%|██████████| 30/30 [00:08<00:00,  3.52it/s]


Epoch 21, mIoU: 0.0278


100%|██████████| 69/69 [01:07<00:00,  1.01it/s]


Epoch 22 - Training Loss: 13.9252, Training mIoU: 0.0187


100%|██████████| 30/30 [00:08<00:00,  3.43it/s]


Epoch 22, mIoU: 0.0278


100%|██████████| 69/69 [01:12<00:00,  1.05s/it]


Epoch 23 - Training Loss: 13.9762, Training mIoU: 0.0187


100%|██████████| 30/30 [00:08<00:00,  3.43it/s]


Epoch 23, mIoU: 0.0278


100%|██████████| 69/69 [01:04<00:00,  1.07it/s]


Epoch 24 - Training Loss: 13.7642, Training mIoU: 0.0187


100%|██████████| 30/30 [00:08<00:00,  3.35it/s]


Epoch 24, mIoU: 0.0279


100%|██████████| 69/69 [01:02<00:00,  1.10it/s]


Epoch 25 - Training Loss: 13.9470, Training mIoU: 0.0186


100%|██████████| 30/30 [00:08<00:00,  3.63it/s]


Epoch 25, mIoU: 0.0277


100%|██████████| 69/69 [01:07<00:00,  1.02it/s]


Epoch 26 - Training Loss: 13.8954, Training mIoU: 0.0187


100%|██████████| 30/30 [00:08<00:00,  3.42it/s]


Epoch 26, mIoU: 0.0278


100%|██████████| 69/69 [01:09<00:00,  1.01s/it]


Epoch 27 - Training Loss: 13.8438, Training mIoU: 0.0187


100%|██████████| 30/30 [00:08<00:00,  3.51it/s]


Epoch 27, mIoU: 0.0278


100%|██████████| 69/69 [01:08<00:00,  1.01it/s]


Epoch 28 - Training Loss: 13.8880, Training mIoU: 0.0187


100%|██████████| 30/30 [00:08<00:00,  3.47it/s]


Epoch 28, mIoU: 0.0278


100%|██████████| 69/69 [01:08<00:00,  1.01it/s]


Epoch 29 - Training Loss: 13.8583, Training mIoU: 0.0188


100%|██████████| 30/30 [00:08<00:00,  3.46it/s]


Epoch 29, mIoU: 0.0278


100%|██████████| 69/69 [01:08<00:00,  1.00it/s]


Epoch 30 - Training Loss: 13.8670, Training mIoU: 0.0187


100%|██████████| 30/30 [00:08<00:00,  3.34it/s]


Epoch 30, mIoU: 0.0278


100%|██████████| 69/69 [01:05<00:00,  1.05it/s]


Epoch 31 - Training Loss: 13.8503, Training mIoU: 0.0187


100%|██████████| 30/30 [00:08<00:00,  3.40it/s]


Epoch 31, mIoU: 0.0278
Early stopping triggered!


In [16]:
test_dataset = TargetDataset(csv_file='./test.csv', transform=transform, is_training=False)
test_dataloader = DataLoader(test_dataset, batch_size=16, shuffle=False, num_workers=4)

In [17]:
with torch.no_grad():
    model.eval()
    result = []
    for images in tqdm(test_dataloader):
        images = images.float().to(device)
        outputs = model(images)['out']
        outputs = torch.softmax(outputs, dim=1).cpu()
        outputs = torch.argmax(outputs, dim=1).numpy()
        # batch에 존재하는 각 이미지에 대해서 반복
        for pred in outputs:
            pred = pred.astype(np.int32)
            pred = Image.fromarray(pred) # 이미지로 변환
            pred = pred.resize((960, 540), Image.NEAREST) # 960 x 540 사이즈로 변환
            pred = np.array(pred) # 다시 수치로 변환
            # class 0 ~ 11에 해당하는 경우에 마스크 형성 / 12(배경)는 제외하고 진행
            for class_id in range(12):
                class_mask = (pred == class_id).astype(np.int32)
                if np.sum(class_mask) > 0: # 마스크가 존재하는 경우 encode
                    mask_rle = rle_encode(class_mask)
                    result.append(mask_rle)
                else: # 마스크가 존재하지 않는 경우 -1
                    result.append(-1)

  0%|          | 0/119 [00:00<?, ?it/s]

100%|██████████| 119/119 [00:51<00:00,  2.32it/s]


In [18]:
submit = pd.read_csv('../data/raw/sample_submission.csv')
submit['mask_rle'] = result
submit

Unnamed: 0,id,mask_rle
0,TEST_0000_class_0,229904 4 230864 4 231828 52 232788 52 233748 6...
1,TEST_0000_class_1,-1
2,TEST_0000_class_2,1 206 545 622 1505 622 2465 622 3425 622 4385 ...
3,TEST_0000_class_3,268960 25 269920 25 270871 43 271831 43 272782...
4,TEST_0000_class_4,-1
...,...,...
22771,TEST_1897_class_7,-1
22772,TEST_1897_class_8,155 463 1115 463 2075 463 3035 463 3995 463 49...
22773,TEST_1897_class_9,-1
22774,TEST_1897_class_10,-1


In [14]:
submit.to_csv('./deeplabv3_mobilenet_v3_large_feature_extraction.csv', index=False)