In [1]:
import sys

sys.path.append('..')
from src.tools.print_sysinfo import print_env
print_env()

DATE : 2023-08-31
Pyton Version : 3.10.12
PyTorch Version : 2.0.1
OS : Linux 5.15.0-78-generic
CPU spec : x86_64
RAM spec : 122.84 GB
Device 0:
Name: NVIDIA GeForce RTX 3090
Total Memory: 24576.0 MB
Driver Version: 530.41.03
Device 1:
Name: NVIDIA GeForce RTX 3090
Total Memory: 24576.0 MB
Driver Version: 530.41.03


In [2]:
import os
import cv2
from PIL import Image
import pandas as pd
import numpy as np

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms

from tqdm import tqdm
import albumentations as A
from albumentations.pytorch import ToTensorV2

from src.tools.rle_encoder import rle_encode
from src.data.dataset import SourceDataset, TargetDataset
#os.environ["CUDA_VISIBLE_DEVICES"] = "1"
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [3]:
import torch
import albumentations as A
from albumentations.core.transforms_interface import ImageOnlyTransform

def fisheye_circular_transform_torch(image, mask=None, fov_degree=200, focal_scale=4.5):
    _, h, w = image.shape
    
    # Convert degrees to radians using torch tensor
    radian_conversion = torch.tensor(np.pi/180, dtype=image.dtype, device=image.device)
    
    
    # Calculate the focal length using the given FOV
    f = w / (2 * torch.tan(0.5 * fov_degree * radian_conversion))
    f_scaled = f * focal_scale
    
    # Meshgrid for coordinates
    x = torch.linspace(-w//2, w//2, w).repeat(h, 1)
    y = torch.linspace(-h//2, h//2, h).unsqueeze(1).repeat(1, w)
    r = torch.sqrt(x*x + y*y)
    theta = torch.atan2(y, x)
    
    # Apply fisheye transformation
    r_fisheye = f_scaled * torch.atan(r / f_scaled)
    x_fisheye = (w // 2 + r_fisheye * torch.cos(theta)).long()
    y_fisheye = (h // 2 + r_fisheye * torch.sin(theta)).long()
    
    # Create masks for valid coordinates
    valid_coords = (x_fisheye >= 0) & (x_fisheye < w) & (y_fisheye >= 0) & (y_fisheye < h)
    
    # Initialize output images
    new_image = torch.zeros_like(image)
    if mask is not None:
        new_mask = torch.zeros_like(mask)
    else:
        new_mask = None
    
    # Assign values
    new_image[:, valid_coords] = image[:, y_fisheye[valid_coords], x_fisheye[valid_coords]]
    if mask is not None:
        new_mask[:, valid_coords] = mask[:, y_fisheye[valid_coords], x_fisheye[valid_coords]]
    
    return new_image, new_mask

class FisheyeTransform(ImageOnlyTransform):
    def __init__(self, fov_degree=200, focal_scale=4.5, always_apply=False, p=1.0):
        super(FisheyeTransform, self).__init__(always_apply, p)
        self.fov_degree = fov_degree
        self.focal_scale = focal_scale

    def apply(self, image, **params):
        image_tensor = torch.tensor(image).permute(2, 0, 1).float()
        transformed_image, _ = fisheye_circular_transform_torch(image_tensor, fov_degree=self.fov_degree, focal_scale=self.focal_scale)
        return transformed_image.permute(1, 2, 0).byte().numpy()

    def apply_to_mask(self, mask, **params):
        mask_tensor = torch.tensor(mask).unsqueeze(0).float()
        _, transformed_mask = fisheye_circular_transform_torch(mask_tensor, fov_degree=self.fov_degree, focal_scale=self.focal_scale)
        return transformed_mask.squeeze(0).byte().numpy()

In [None]:
    
def get_training_augmentation():
    train_transform = [
        A.Resize(224, 224),
        A.Normalize(always_apply=True),
        OneOf([
            GaussNoise(always_apply=True),
        ], p=0.2),
        OneOf([
            MotionBlur(p=0.2),
            MedianBlur(blur_limit=3, p=0.1, always_apply=True),
            Blur(blur_limit=3, p=0.1, always_apply=True),
        ], p=0.2),
        ShiftScaleRotate(shift_limit=0.0625, scale_limit=0.2, rotate_limit=15, p=0.2),
        OneOf([
            OpticalDistortion(p=0.3),
            GridDistortion(p=0.1),
            PiecewiseAffine(p=0.3),
        ], p=0.2),
        OneOf([
            Sharpen(always_apply=True, p=1.0),
            Emboss(always_apply=True, p=1.0),
            RandomBrightnessContrast(always_apply=True, p=1.0),
        ], p=0.3),
        HueSaturationValue(always_apply=True, p=1.0),
        FisheyeAug(k=0.5, p=1.0),
        ToTensorV2()
    ]
    return Compose(train_transform)


In [4]:
augmentation = A.Compose(
    [
        FisheyeTransform(p=0.2),
        A.Resize(224, 224),
        A.Normalize(),
        ToTensorV2()
    ]
)

In [5]:

transform = A.Compose(
    [   
        A.Resize(224, 224),
        A.Normalize(),
        ToTensorV2()
    ]
)

#augmentation = get_training_augmentation()


train_dataset = SourceDataset(csv_file='train_source.csv', transform=augmentation, is_training=True)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=4)

valid_dataset = SourceDataset(csv_file='val_source.csv', transform=transform, is_training=True)
valid_loader = DataLoader(valid_dataset, batch_size=16, shuffle=False, num_workers=4)

In [6]:
def double_conv(in_channels, out_channels):
    return nn.Sequential(
        nn.Conv2d(in_channels, out_channels, 3, padding=1),
        nn.ReLU(inplace=True),
        nn.Conv2d(out_channels, out_channels, 3, padding=1),
        nn.ReLU(inplace=True)
    )

class FPN_UNet_Dropout(nn.Module):
    def __init__(self):
        super(FPN_UNet_Dropout, self).__init__()

        # Encoder (Downsampling path)
        self.dconv_down1 = double_conv(3, 64)
        self.dropout1 = nn.Dropout(0.5)  # 추가된 Dropout 레이어
        self.dconv_down2 = double_conv(64, 128)
        self.dropout2 = nn.Dropout(0.5)  # 추가된 Dropout 레이어
        self.dconv_down3 = double_conv(128, 256)
        self.dropout3 = nn.Dropout(0.5)  # 추가된 Dropout 레이어
        self.dconv_down4 = double_conv(256, 512)
        self.dropout4 = nn.Dropout(0.5)  # 추가된 Dropout 레이어

        self.maxpool = nn.MaxPool2d(2)
        
        # Upward path and lateral connections for FPN
        self.upconv3 = nn.ConvTranspose2d(512, 256, kernel_size=2, stride=2)
        self.lateral3 = nn.Conv2d(256, 256, kernel_size=1)
        
        self.upconv2 = nn.ConvTranspose2d(256, 128, kernel_size=2, stride=2)
        self.lateral2 = nn.Conv2d(128, 128, kernel_size=1)
        
        self.upconv1 = nn.ConvTranspose2d(128, 64, kernel_size=2, stride=2)
        self.lateral1 = nn.Conv2d(64, 64, kernel_size=1)
        
        # FPN heads for each pyramid level
        self.fpn_out3 = nn.Conv2d(256, 13, kernel_size=3, padding=1)
        self.fpn_out2 = nn.Conv2d(128, 13, kernel_size=3, padding=1)
        self.fpn_out1 = nn.Conv2d(64, 13, kernel_size=3, padding=1)
        
    def forward(self, x):
        # Encoder
        conv1 = self.dconv_down1(x)
        x = self.maxpool(self.dropout1(conv1))  # Dropout 적용
        
        conv2 = self.dconv_down2(x)
        x = self.maxpool(self.dropout2(conv2))  # Dropout 적용
        
        conv3 = self.dconv_down3(x)
        x = self.maxpool(self.dropout3(conv3))  # Dropout 적용
        
        x = self.dropout4(self.dconv_down4(x))  # Dropout 적용

        
        # Upward path with lateral connections
        x = self.upconv3(x)
        conv3 = self.lateral3(conv3)
        p3 = torch.add(x, conv3)  # Element-wise addition
        out3 = self.fpn_out3(p3)
        
        x = self.upconv2(p3)
        conv2 = self.lateral2(conv2)
        p2 = torch.add(x, conv2)
        out2 = self.fpn_out2(p2)
        
        x = self.upconv1(p2)
        conv1 = self.lateral1(conv1)
        p1 = torch.add(x, conv1)
        out1 = self.fpn_out1(p1)
        
        # Note: You can return combined results or individual FPN layer outputs based on the use case.
        return out1, out2, out3

class FPN_UNet_FC(nn.Module):
    def __init__(self):
        super(FPN_UNet_FC, self).__init__()
        self.fpn_unet = FPN_UNet_Dropout()
        self.upsample = nn.Upsample(size=(224, 224), mode='bilinear', align_corners=True)
        self.conv1x1 = nn.Conv2d(13+13+13, 13, kernel_size=1)  # Assuming we're concatenating

    def forward(self, x):
        out1, out2, out3 = self.fpn_unet(x)

        # Upsample each output to the desired size: 224x224
        out1_upsampled = self.upsample(out1)
        out2_upsampled = self.upsample(out2)
        out3_upsampled = self.upsample(out3)

        # Concatenate the outputs along the channel dimension
        merged_output = torch.cat([out1_upsampled, out2_upsampled, out3_upsampled], dim=1)

        # Map to desired number of channels using 1x1 convolution
        final_output = self.conv1x1(merged_output)

        return final_output


In [7]:
import math

def compute_iou(pred, target, num_classes):
    iou_list = []
    pred = pred.view(-1)
    target = target.view(-1)

    # For classes excluding the background
    for cls in range(num_classes - 1):  # We subtract 1 to exclude the background class
        pred_inds = pred == cls
        target_inds = target == cls
        intersection = (pred_inds[target_inds]).sum().float()
        union = (pred_inds + target_inds).sum().float()
        if union == 0:
            iou_list.append(float('nan'))  # If there is no ground truth, do not include in evaluation
        else:
            iou_list.append((intersection / union).item())
    return iou_list

def compute_mIoU(preds, labels, num_classes=13):
    iou_list = compute_iou(preds, labels, num_classes)
    valid_iou_list = [iou for iou in iou_list if not math.isnan(iou)]
    mIoU = sum(valid_iou_list) / len(valid_iou_list)
    return mIoU


In [10]:
from torch.optim.lr_scheduler import StepLR


# 1. 모델 불러오기
model = FPN_UNet_FC()

if torch.cuda.device_count() > 1:
    print(f"Using {torch.cuda.device_count()} GPUs!")
    model = nn.DataParallel(model)
else:
    print(f"Using CPU")
model.to(device)

# 2. 데이터 준비 (여기서는 간략하게 표현합니다)
#train_loader, val_loader = prepare_target_domain_dataloaders()

# 3. 학습 설정
criterion = nn.CrossEntropyLoss() # 예시로 CrossEntropyLoss를 사용합니다
optimizer = torch.optim.Adam(model.parameters(), lr=0.001) # 작은 learning rate 사용

# Learning rate scheduler 설정
scheduler = StepLR(optimizer, step_size=10, gamma=0.1)


# 4. 학습
num_epochs = 1000
train_losses = []
train_mIoUs = []
val_mIoUs = []

# Early stopping 관련 설정
patience = 20  # 10번의 epoch 동안 성능 향상이 없을 경우 학습 중단
no_improve_epochs = 0  # 성능 향상이 없는 epoch의 횟수
best_mIoU = 0.0  # 최고의 검증 mIoU 저장

for epoch in range(num_epochs):
    model.train()
    total_loss = 0.0
    total_iou = 0.0
    num_batches = 0
    num_images = 0
    
    for images, masks in tqdm(train_loader):
        
        images = images.float().to(device)
        masks = masks.long().to(device)
        num_images += images.size(0)
        
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, masks)
        loss.backward()
        optimizer.step()
        
        total_loss += num_images * loss.item()
        _, predicted = outputs.max(1)
        total_iou += compute_mIoU(predicted, masks)
        num_batches += 1
    
    avg_loss = total_loss / num_images
    avg_train_mIoU = total_iou / num_images
    train_losses.append(avg_loss)
    train_mIoUs.append(avg_train_mIoU)
    print(f"Epoch {epoch + 1} - Training Loss: {avg_loss:.4f}, Training mIoU: {avg_train_mIoU:.4f}")

    
    # 5. 검증 (간략하게 표현)
    with torch.no_grad():
        model.eval()
        total_iou = 0
        num_images = 0
        for images, masks in tqdm(valid_loader):
            images = images.float().to(device)
            masks = masks.long().to(device)
        
            outputs = model(images)
            _, predicted = outputs.max(1)
            total_iou += compute_mIoU(predicted, masks)
            num_images += images.size(0)
        avg_mIoU = total_iou / num_images
        print(f"Epoch {epoch + 1}, mIoU: {avg_mIoU:.4f}")


    # 학습률 업데이트
    scheduler.step()

    
    # Early stopping 검사
    if avg_mIoU > best_mIoU:
        best_mIoU = avg_mIoU
        # 최적의 모델 저장
        torch.save(model.state_dict(), 'best_model.pth')
        no_improve_epochs = 0
    else:
        no_improve_epochs += 1
        if no_improve_epochs >= patience:
            print("Early stopping triggered!")
            # 최적의 모델 불러오기
            model.load_state_dict(torch.load('best_model.pth'))
            break

Using 2 GPUs!


100%|██████████| 69/69 [01:04<00:00,  1.07it/s]


Epoch 1 - Training Loss: 1.7846, Training mIoU: 0.0897


100%|██████████| 30/30 [00:08<00:00,  3.71it/s]


Epoch 1, mIoU: 0.0077


100%|██████████| 69/69 [01:03<00:00,  1.08it/s]


Epoch 2 - Training Loss: 0.8554, Training mIoU: 0.2178


100%|██████████| 30/30 [00:08<00:00,  3.68it/s]


Epoch 2, mIoU: 0.0113


100%|██████████| 69/69 [01:06<00:00,  1.03it/s]


Epoch 3 - Training Loss: 0.6967, Training mIoU: 0.2516


100%|██████████| 30/30 [00:08<00:00,  3.68it/s]


Epoch 3, mIoU: 0.0077


100%|██████████| 69/69 [01:06<00:00,  1.04it/s]


Epoch 4 - Training Loss: 0.5998, Training mIoU: 0.2899


100%|██████████| 30/30 [00:08<00:00,  3.50it/s]


Epoch 4, mIoU: 0.0085


100%|██████████| 69/69 [01:02<00:00,  1.10it/s]


Epoch 5 - Training Loss: 0.5297, Training mIoU: 0.3376


100%|██████████| 30/30 [00:08<00:00,  3.64it/s]


Epoch 5, mIoU: 0.0106


100%|██████████| 69/69 [01:12<00:00,  1.04s/it]


Epoch 6 - Training Loss: 0.4748, Training mIoU: 0.3705


100%|██████████| 30/30 [00:08<00:00,  3.69it/s]


Epoch 6, mIoU: 0.0100


100%|██████████| 69/69 [01:09<00:00,  1.00s/it]


Epoch 7 - Training Loss: 0.4452, Training mIoU: 0.3939


100%|██████████| 30/30 [00:08<00:00,  3.52it/s]


Epoch 7, mIoU: 0.0094


100%|██████████| 69/69 [01:05<00:00,  1.05it/s]


Epoch 8 - Training Loss: 0.4138, Training mIoU: 0.4159


100%|██████████| 30/30 [00:08<00:00,  3.39it/s]


Epoch 8, mIoU: 0.0094


100%|██████████| 69/69 [01:03<00:00,  1.08it/s]


Epoch 9 - Training Loss: 0.4055, Training mIoU: 0.4220


100%|██████████| 30/30 [00:08<00:00,  3.52it/s]


Epoch 9, mIoU: 0.0092


100%|██████████| 69/69 [01:04<00:00,  1.07it/s]


Epoch 10 - Training Loss: 0.3680, Training mIoU: 0.4474


100%|██████████| 30/30 [00:08<00:00,  3.67it/s]


Epoch 10, mIoU: 0.0102


100%|██████████| 69/69 [01:07<00:00,  1.03it/s]


Epoch 11 - Training Loss: 0.3388, Training mIoU: 0.4661


100%|██████████| 30/30 [00:08<00:00,  3.40it/s]


Epoch 11, mIoU: 0.0098


100%|██████████| 69/69 [01:05<00:00,  1.05it/s]


Epoch 12 - Training Loss: 0.3342, Training mIoU: 0.4686


100%|██████████| 30/30 [00:08<00:00,  3.70it/s]


Epoch 12, mIoU: 0.0099


100%|██████████| 69/69 [01:06<00:00,  1.04it/s]


Epoch 13 - Training Loss: 0.3338, Training mIoU: 0.4686


100%|██████████| 30/30 [00:08<00:00,  3.63it/s]


Epoch 13, mIoU: 0.0095


100%|██████████| 69/69 [01:05<00:00,  1.06it/s]


Epoch 14 - Training Loss: 0.3247, Training mIoU: 0.4744


100%|██████████| 30/30 [00:08<00:00,  3.42it/s]


Epoch 14, mIoU: 0.0093


100%|██████████| 69/69 [01:05<00:00,  1.05it/s]


Epoch 15 - Training Loss: 0.3255, Training mIoU: 0.4737


100%|██████████| 30/30 [00:08<00:00,  3.45it/s]


Epoch 15, mIoU: 0.0099


100%|██████████| 69/69 [01:07<00:00,  1.02it/s]


Epoch 16 - Training Loss: 0.3261, Training mIoU: 0.4765


100%|██████████| 30/30 [00:08<00:00,  3.74it/s]


Epoch 16, mIoU: 0.0100


100%|██████████| 69/69 [01:06<00:00,  1.03it/s]


Epoch 17 - Training Loss: 0.3178, Training mIoU: 0.4799


100%|██████████| 30/30 [00:08<00:00,  3.49it/s]


Epoch 17, mIoU: 0.0100


100%|██████████| 69/69 [01:04<00:00,  1.07it/s]


Epoch 18 - Training Loss: 0.3189, Training mIoU: 0.4797


100%|██████████| 30/30 [00:08<00:00,  3.50it/s]


Epoch 18, mIoU: 0.0099


100%|██████████| 69/69 [01:03<00:00,  1.08it/s]


Epoch 19 - Training Loss: 0.3113, Training mIoU: 0.4837


100%|██████████| 30/30 [00:08<00:00,  3.63it/s]


Epoch 19, mIoU: 0.0094


100%|██████████| 69/69 [01:05<00:00,  1.06it/s]


Epoch 20 - Training Loss: 0.3129, Training mIoU: 0.4829


100%|██████████| 30/30 [00:07<00:00,  3.76it/s]


Epoch 20, mIoU: 0.0104


100%|██████████| 69/69 [01:05<00:00,  1.06it/s]


Epoch 21 - Training Loss: 0.3061, Training mIoU: 0.4876


100%|██████████| 30/30 [00:08<00:00,  3.46it/s]


Epoch 21, mIoU: 0.0099


100%|██████████| 69/69 [01:04<00:00,  1.06it/s]


Epoch 22 - Training Loss: 0.3073, Training mIoU: 0.4872


100%|██████████| 30/30 [00:08<00:00,  3.72it/s]

Epoch 22, mIoU: 0.0098
Early stopping triggered!





In [12]:
test_dataset = TargetDataset(csv_file='./test.csv', transform=transform, is_training=False)
test_dataloader = DataLoader(test_dataset, batch_size=16, shuffle=False, num_workers=4)

In [14]:
with torch.no_grad():
    model.eval()
    result = []
    for images in tqdm(test_dataloader):
        images = images.float().to(device)
        outputs = model(images)
        outputs = torch.softmax(outputs, dim=1).cpu()
        outputs = torch.argmax(outputs, dim=1).numpy()
        # batch에 존재하는 각 이미지에 대해서 반복
        for pred in outputs:
            pred = pred.astype(np.int32)
            pred = Image.fromarray(pred) # 이미지로 변환
            pred = pred.resize((960, 540), Image.NEAREST) # 960 x 540 사이즈로 변환
            pred = np.array(pred) # 다시 수치로 변환
            # class 0 ~ 11에 해당하는 경우에 마스크 형성 / 12(배경)는 제외하고 진행
            for class_id in range(12):
                class_mask = (pred == class_id).astype(np.int32)
                if np.sum(class_mask) > 0: # 마스크가 존재하는 경우 encode
                    mask_rle = rle_encode(class_mask)
                    result.append(mask_rle)
                else: # 마스크가 존재하지 않는 경우 -1
                    result.append(-1)

100%|██████████| 119/119 [00:46<00:00,  2.59it/s]


In [16]:
submit = pd.read_csv('../data/raw/sample_submission.csv')
submit['mask_rle'] = result
submit

Unnamed: 0,id,mask_rle
0,TEST_0000_class_0,69871 4 69884 17 70831 4 70844 17 71791 4 7180...
1,TEST_0000_class_1,-1
2,TEST_0000_class_2,1 111 725 17 747 325 1685 17 1707 317 2680 25 ...
3,TEST_0000_class_3,-1
4,TEST_0000_class_4,-1
...,...,...
22771,TEST_1897_class_7,-1
22772,TEST_1897_class_8,104 540 648 150 858 17 1064 540 1608 150 1818 ...
22773,TEST_1897_class_9,-1
22774,TEST_1897_class_10,-1


In [17]:
submit.to_csv('./augmentation_submit.csv', index=False)