#  install

In [19]:
import os
import cv2
import pandas as pd
import numpy as np

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms

from tqdm import tqdm
import albumentations as A
from albumentations.pytorch import ToTensorV2

import segmentation_models_pytorch as smp
from sklearn.model_selection import KFold
from copy import deepcopy

In [20]:
!pip install -q -U segmentation-models-pytorch albumentations > /dev/null

In [21]:
from google.colab import drive
drive.mount('/content/gdrive')
%cd "/content/gdrive/MyDrive/sw_contest"

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).
/content/gdrive/MyDrive/sw_contest


In [22]:
!pip install --upgrade opencv-python



# Utils

In [5]:
# RLE 디코딩 함수
def rle_decode(mask_rle, shape):
    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros(shape[0]*shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    return img.reshape(shape)

# RLE 인코딩 함수
def rle_encode(mask):
    pixels = mask.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)

# Custom Dataset

In [6]:
class SatelliteDataset(Dataset):
    def __init__(self, csv_file, transform=None, infer=False):
        self.data = pd.read_csv(csv_file)
        self.transform = transform
        self.infer = infer

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_path = self.data.iloc[idx, 1]
        image = cv2.imread(img_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        if self.infer:
            if self.transform:
                image = self.transform(image=image)['image']
            return image

        mask_rle = self.data.iloc[idx, 2]
        mask = rle_decode(mask_rle, (image.shape[0], image.shape[1]))

        if self.transform:

            augmented = self.transform(image=image, mask=mask)
            image = augmented['image']
            mask = augmented['mask']

        return image, mask

# Data augmentation

* 우승팀 참조: https://github.com/drivendataorg/open-cities-ai-challenge/blob/main/1st%20Place/src/datasets/transforms.py

* 3단계에 걸쳐 new dataset 생성. 이전 단계에 만들어진 new dataset은 원본 데이터와 합쳐짐.
따라서 각 단계 및 모델마다 적용되는 augmentation 다름

* 여기서는 4개의 train augmentation 함수 생성

Augmentation code

In [7]:
def train_transform_1():
  return A.Compose([
    A.RandomCrop(512, 512, p=1.),
    A.Flip(p=0.75),
    A.RandomBrightnessContrast(p=0.5),
    A.Normalize(),
    ToTensorV2(transpose_mask=True)
])


def train_transform_2():
  return A.Compose([

    A.RandomScale(scale_limit=0.3, p=0.5),
    A.PadIfNeeded(768, 768, p=1),
    A.RandomCrop(768, 768, p=1.),
    A.Flip(p=0.75),
    A.Downscale(scale_min=0.5, scale_max=0.75, p=0.05),

    # color transforms
    A.OneOf(
        [
            A.RandomBrightnessContrast(p=1),
            A.RandomGamma(p=1),
            A.ChannelShuffle(p=0.2),
            A.HueSaturationValue(p=1),
            A.RGBShift(p=1),
        ],
        p=0.5,
    ),

    # noise transforms
    A.OneOf(
        [
            A.GaussNoise(p=1),
            A.MultiplicativeNoise(p=1),
            A.IAASharpen(p=1),
            # A.ImageCompression(quality_lower=0.7, p=1),
            A.GaussianBlur(p=1),
        ],
        p=0.2,
    ),
    A.Normalize(),
    ToTensorV2(transpose_mask=True)
])

def train_transform_3():
  return A.Compose([
          A.RandomScale(scale_limit=0.3, p=0.5),
          A.PadIfNeeded(1024, 1024, p=1),
          A.RandomCrop(1024, 1024, p=1.),
          A.Flip(p=0.75),
          A.Downscale(scale_min=0.5, scale_max=0.75, p=0.05),

          # color transforms
          A.OneOf(
              [
                  A.RandomBrightnessContrast(p=1),
                  A.RandomGamma(p=1),
                  A.ChannelShuffle(p=0.2),
                  A.HueSaturationValue(p=1),
                  A.RGBShift(p=1),
              ],
              p=0.5,
          ),

          # noise transforms
          A.OneOf(
              [
                  A.GaussNoise(p=1),
                  A.MultiplicativeNoise(p=1),
                  A.IAASharpen(p=1),
                  # A.ImageCompression(quality_lower=0.7, p=1),
                  A.GaussianBlur(p=1),
              ],
              p=0.2,
          ),
          A.Normalize(),
          ToTensorV2(transpose_mask=True)
      ])

def train_transform_4():
  return A.Compose([
          A.ShiftScaleRotate(scale_limit=0.2, rotate_limit=45, border_mode=0, value=0, p=0.7),
          A.PadIfNeeded(768, 768, border_mode=0, value=0, p=1.),
          A.RandomCrop(768, 768, p=1.),
          A.Flip(p=0.75),
          A.Downscale(scale_min=0.5, scale_max=0.75, p=0.05),
          A.MaskDropout(max_objects=3, image_fill_value=0, mask_fill_value=0, p=0.1),

          # color transforms
          A.OneOf(
              [
                  A.RandomBrightnessContrast(brightness_limit=0.3, contrast_limit=0.3, p=1),
                  A.RandomGamma(gamma_limit=(70, 130), p=1),
                  A.ChannelShuffle(p=0.2),
                  A.HueSaturationValue(hue_shift_limit=30, sat_shift_limit=40, val_shift_limit=30, p=1),
                  A.RGBShift(r_shift_limit=30, g_shift_limit=30, b_shift_limit=30, p=1),
              ],
              p=0.8,
          ),

          # distortion
          A.OneOf(
              [
                  A.ElasticTransform(p=1),
                  A.OpticalDistortion(p=1),
                  A.GridDistortion(p=1),
                  A.IAAPerspective(p=1),
              ],
              p=0.2,
          ),

          # noise transforms
          A.OneOf(
              [
                  A.GaussNoise(p=1),
                  A.MultiplicativeNoise(p=1),
                  A.IAASharpen(p=1),
                  A.GaussianBlur(p=1),
              ],
              p=0.2,
          ),
          A.Normalize(),
          ToTensorV2(transpose_mask=True)
       ])

def train_transform():
        return A.Compose(
            [
                A.Normalize(),
                ToTensorV2(transpose_mask=True)
            ]
        )

def valid_transform():
        return A.Compose(
            [
                A.Normalize(),
                ToTensorV2(transpose_mask=True)
            ]
        )

def test_transform():
        return A.Compose(
            [
                A.Normalize(),
                ToTensorV2(transpose_mask=True)
            ]
        )
train_transform = train_transform()
train_transform_1 = train_transform_1()
train_transform_2 = train_transform_2()
train_transform_3 = train_transform_3()
train_transform_4 = train_transform_4()

valid_transform = valid_transform()
test_transform = test_transform()




Augmentation Data

In [8]:
## train_data transform
# origin - no augmentation
dataset = SatelliteDataset(csv_file='./train.csv', transform=train_transform)

# transform 1
dataset1 = SatelliteDataset(csv_file='./train.csv', transform=train_transform_1)
#dataloader1 = DataLoader(dataset1, batch_size=8, shuffle=True, num_workers=4)

# transform 2
dataset2 = SatelliteDataset(csv_file='./train.csv', transform=train_transform_2)
#dataloader2 = DataLoader(dataset2, batch_size=8, shuffle=True, num_workers=4)

# transform 3
dataset3 = SatelliteDataset(csv_file='./train.csv', transform=train_transform_3)
#dataloader3 = DataLoader(dataset3, batch_size=8, shuffle=True, num_workers=4)

# transform 4
dataset4 = SatelliteDataset(csv_file='./train.csv', transform=train_transform_4)
#dataloader4 = DataLoader(dataset4, batch_size=8, shuffle=True, num_workers=4)

## test_data transform
test_dataset = SatelliteDataset(csv_file='./test.csv', transform=test_transform, infer=True)
test_dataloader = DataLoader(test_dataset, batch_size=4, shuffle=False, num_workers=1)

# Model Train
* Segmentation mdeol: https://github.com/qubvel/segmentation_models.pytorch/tree/master/segmentation_models_pytorch/encoders

* Winner model: https://github.com/drivendataorg/open-cities-ai-challenge/tree/main/1st%20Place

* Whole Pipeline of 3 stages:
 1. Training 10 Unet++ models on that data. make prediction with ensemble of 10 models.
 2. taka prediction from 1 and prepare as train_data. train 10 unet++ models with train_data and stage 1 data and prediction.
 3. taka prediction from 2 and prepare as train_data. train 10 unet++ models with train_data and stage 2 data and prediction.

## Stage 1

* Step 1 - prepraring train data
* Step 2 - training 10 Unet++ models on that data
* Step 3 - make prediction with ensemble of 10 models

stage1-effb7-f0~4 -> make 5 models

In [None]:
# KFold 인스턴스 생성
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# 데이터셋 길이
dataset_size = len(dataset4)
indices = list(range(dataset_size))

# 초기 설정
lr = 0.0001
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
num_epochs = 50
num_classes = 1

# 손실 함수와 옵티마이저 정의
# 이진 분류를 위한 BCEWithLogitsLoss (Sigmoid 포함)
criterion = torch.nn.BCEWithLogitsLoss()

# 교차 검증 루프
for fold, (train_idx, val_idx) in enumerate(kf.split(indices)):
    print(f'Fold {fold+1}')

    train_subsample = torch.utils.data.SubsetRandomSampler(train_idx)
    val_subsample = torch.utils.data.SubsetRandomSampler(val_idx)

    train_loader = DataLoader(dataset4, batch_size=8, sampler=train_subsample, num_workers=4)
    val_loader = DataLoader(dataset4, batch_size=8, sampler=val_subsample, num_workers=4)

    # 모델 초기화 및 최적화(각 폴드마다)
    model = smp.UnetPlusPlus(encoder_name = "efficientnet-b7", encoder_weights='imagenet', in_channels=3, classes=num_classes)
    model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr)

    # 최고 모델 저장 위한 변수 초기화
    best_model_wts = deepcopy(model.state_dict())
    lowest_loss = float('inf')

    for epoch in range(num_epochs):
      model.train() # 모델 훈련 모드로 설정
      train_loss = 0
      for images, masks in tqdm(train_loader):
          images = images.to(device)
          masks = masks.to(device).float()

          optimizer.zero_grad() # gradient 초기화
          outputs = model(images)
          outputs = outputs.squeeze(1)
          loss = criterion(outputs, masks)
          loss.backward() # 역전파
          optimizer.step() # 가중치 업데이트

          train_loss += loss.item() # 에폭별 총 손실 계산
      train_loss /= len(train_loader) # 평균 손실 계산

      model.eval() # 모델을 평가 모드로 설정
      val_loss = 0

      with torch.no_grad(): # 그레디언트 계산 비활성화
        for images, masks in val_loader:
          images = images.to(device)
          masks = masks.to(device).float()

          outputs = model(images)
          outputs = outputs.squeeze(1)
          loss = criterion(outputs, masks)

          val_loss += loss.item()
      val_loss /= len(val_loader)

      if val_loss < lowest_loss:
        lowest_loss = val_loss
        best_model_wts = deepcopy(model.state_dict())

    torch.save(best_model_wts, f'./weights/stage1-effb7-f{fold}-best.pth')

stage1-srx101-f0~4 -> make 5 models

In [None]:
import torch
from tqdm import tqdm
import segmentation_models_pytorch as smp
from sklearn.model_selection import KFold
from copy import deepcopy

# KFold 인스턴스 생성
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# 데이터셋 길이
dataset_size = len(dataset2)
indices = list(range(dataset_size))

# 초기 설정
lr = 0.0001
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
num_epochs = 50
num_classes = 1

# 손실 함수와 옵티마이저 정의
# 이진 분류를 위한 BCEWithLogitsLoss (Sigmoid 포함)
criterion = torch.nn.BCEWithLogitsLoss()

# 교차 검증 루프
for fold, (train_idx, val_idx) in enumerate(kf.split(indices)):
    print(f'Fold {fold+1}')

    train_subsample = torch.utils.data.SubsetRandomSampler(train_idx)
    val_subsample = torch.utils.data.SubsetRandomSampler(val_idx)

    train_loader = DataLoader(dataset2, batch_size=8, sampler=train_subsample, num_workers=4)
    val_loader = DataLoader(dataset2, batch_size=8, sampler=val_subsample, num_workers=4)

    # 모델 초기화 및 최적화(각 폴드마다)
    model = smp.UnetPlusPlus(encoder_name = "se_resnext101_32x4d", encoder_weights='imagenet', in_channels=3, classes=num_classes)
    model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr)

    # 최고 모델 저장 위한 변수 초기화
    best_model_wts = deepcopy(model.state_dict())
    lowest_loss = float('inf')

    for epoch in range(num_epochs):
      model.train() # 모델 훈련 모드로 설정
      train_loss = 0
      for images, masks in tqdm(train_loader):
          images = images.to(device)
          masks = masks.to(device).float()

          optimizer.zero_grad() # gradient 초기화
          outputs = model(images)
          outputs = outputs.squeeze(1)
          loss = criterion(outputs, masks)
          loss.backward() # 역전파
          optimizer.step() # 가중치 업데이트

          train_loss += loss.item() # 에폭별 총 손실 계산
      train_loss /= len(train_loader) # 평균 손실 계산

      model.eval() # 모델을 평가 모드로 설정
      val_loss = 0

      with torch.no_grad(): # 그레디언트 계산 비활성화
        for images, masks in val_loader:
          images = images.to(device)
          masks = masks.to(device).float()

          outputs = model(images)
          outputs = outputs.squeeze(1)
          loss = criterion(outputs, masks)

          val_loss += loss.item()
      val_loss /= len(val_loader)

      if val_loss < lowest_loss:
        lowest_loss = val_loss
        best_model_wts = deepcopy(model.state_dict())

    torch.save(best_model_wts, f'./weights/stage1-srx101-f{fold}-best.pth')

Load weights & Ensemble & Prediction  

In [None]:
# 가중치 로드
weight = torch.load('./weights/stage1-effb7-f0-best.pth')
model0 = smp.UnetPlusPlus(encoder_name="efficientnet-b7", encoder_weights=None, in_channels=3, classes=num_classes)
model0.to(device)
model0.load_state_dict(weight, strict=False)

weight = torch.load('./weights/stage1-effb7-f1-best.pth')
model1 = smp.UnetPlusPlus(encoder_name="efficientnet-b7", encoder_weights=None, in_channels=3, classes=num_classes)
model1.to(device)
model1.load_state_dict(weight, strict=False)

weight = torch.load('./weights/stage1-effb7-f2-best.pth')
model2 = smp.UnetPlusPlus(encoder_name="efficientnet-b7", encoder_weights=None, in_channels=3, classes=num_classes)
model2.to(device)
model2.load_state_dict(weight, strict=False)

weight = torch.load('./weights/stage1-effb7-f3-best.pth')
model3 = smp.UnetPlusPlus(encoder_name="efficientnet-b7", encoder_weights=None, in_channels=3, classes=num_classes)
model3.to(device)
model3.load_state_dict(weight, strict=False)

weight = torch.load('./weights/stage1-effb7-f4-best.pth')
model4 = smp.UnetPlusPlus(encoder_name="efficientnet-b7", encoder_weights=None, in_channels=3, classes=num_classes)
model4.to(device)
model4.load_state_dict(weight, strict=False)



weight = torch.load('./weights/stage1-srx101-f0-best.pth')
model5 = smp.UnetPlusPlus(encoder_name="se_resnext101_32x4d", encoder_weights=None, in_channels=3, classes=num_classes)
model5.to(device)
model5.load_state_dict(weight, strict=False)

weight = torch.load('./weights/stage1-srx101-f1-best.pth')
model6 = smp.UnetPlusPlus(encoder_name="se_resnext101_32x4d", encoder_weights=None, in_channels=3, classes=num_classes)
model6.to(device)
model6.load_state_dict(weight, strict=False)

weight = torch.load('./weights/stage1-srx101-f2-best.pth')
model7 = smp.UnetPlusPlus(encoder_name="se_resnext101_32x4d", encoder_weights=None, in_channels=3, classes=num_classes)
model7.to(device)
model7.load_state_dict(weight, strict=False)

weight = torch.load('./weights/stage1-srx101-f3-best.pth')
model8 = smp.UnetPlusPlus(encoder_name="se_resnext101_32x4d", encoder_weights=None, in_channels=3, classes=num_classes)
model8.to(device)
model8.load_state_dict(weight, strict=False)

weight = torch.load('./weights/stage1-srx101-f4-best.pth')
model9 = smp.UnetPlusPlus(encoder_name="se_resnext101_32x4d", encoder_weights=None, in_channels=3, classes=num_classes)
model9.to(device)
model9.load_state_dict(weight, strict=False)


In [None]:
# 1단계: 예측 수행
final_predictions = []
with torch.no_grad():
    for images in tqdm(test_dataloader):
        images = images.float().to(device)
        masks = [model(images) for model in [model0, model1, model2, model3, model4, model5, model6, model7, model8, model9]]
        masks = [torch.sigmoid(mask).cpu().numpy() for mask in masks]
        masks = [np.squeeze(mask, axis=1) for mask in masks]
        averaged_mask = np.mean(masks, axis=0)
        final_mask = (averaged_mask > 0.5).astype(np.uint8)
        final_predictions.extend(final_mask)

# 2단계: RLE 인코딩 적용
encoded_masks = [rle_encode(mask) for mask in final_predictions]

# 3단계: 새로운 데이터셋 생성
stage1_dataset = []
for i, image in enumerate(test_dataset):
    # 원본 이미지를 가져옵니다 (변환 적용된 상태)
    original_image = image[0]

    # 해당 이미지의 예측된 마스크(의사 레이블)를 가져옵니다
    predicted_mask = encoded_masks[i]

    # 원본 이미지와 예측된 마스크를 쌍으로 새 데이터셋에 추가합니다
    stage1_dataset.append((original_image, predicted_mask))


## Stage2

* Step 1 - take prediction from stage 1 and prepare as train data
* Step 2 - train 10 Unet++ models with train_data and stage_2 data
* Step 3 - make predictions with new models

stage2-effb7-f0~4 -> make 5 models

In [None]:
import torch
from tqdm import tqdm
import segmentation_models_pytorch as smp
from sklearn.model_selection import KFold
from copy import deepcopy

# KFold 인스턴스 생성
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# 데이터셋 길이
dataset_size = len(dataset4 + stage1_dataset)
indices = list(range(dataset_size))

# 초기 설정
lr = 0.0001
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
num_epochs = 50
num_classes = 1

# 손실 함수와 옵티마이저 정의
# 이진 분류를 위한 BCEWithLogitsLoss (Sigmoid 포함)
criterion = torch.nn.BCEWithLogitsLoss()

# 교차 검증 루프
for fold, (train_idx, val_idx) in enumerate(kf.split(indices)):
    print(f'Fold {fold+1}')

    train_subsample = torch.utils.data.SubsetRandomSampler(train_idx)
    val_subsample = torch.utils.data.SubsetRandomSampler(val_idx)

    train_loader = DataLoader(dataset4 + stage1_dataset, batch_size=8, sampler=train_subsample, num_workers=4)
    val_loader = DataLoader(dataset4 + stage1_dataset, batch_size=8, sampler=val_subsample, num_workers=4)

    # 모델 초기화 및 최적화(각 폴드마다)
    model = smp.UnetPlusPlus(encoder_name = "efficientnet-b7", encoder_weights='imagenet', in_channels=3, classes=num_classes)
    model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr)

    # 최고 모델 저장 위한 변수 초기화
    best_model_wts = deepcopy(model.state_dict())
    lowest_loss = float('inf')

    for epoch in range(num_epochs):
      model.train() # 모델 훈련 모드로 설정
      train_loss = 0
      for images, masks in tqdm(train_loader):
          images = images.to(device)
          masks = masks.to(device).float()

          optimizer.zero_grad() # gradient 초기화
          outputs = model(images)
          outputs = outputs.squeeze(1)
          loss = criterion(outputs, masks)
          loss.backward() # 역전파
          optimizer.step() # 가중치 업데이트

          train_loss += loss.item() # 에폭별 총 손실 계산
      train_loss /= len(train_loader) # 평균 손실 계산

      model.eval() # 모델을 평가 모드로 설정
      val_loss = 0

      with torch.no_grad(): # 그레디언트 계산 비활성화
        for images, masks in val_loader:
          images = images.to(device)
          masks = masks.to(device).float()

          outputs = model(images)
          outputs = outputs.squeeze(1)
          loss = criterion(outputs, masks)

          val_loss += loss.item()
      val_loss /= len(val_loader)

      if val_loss < lowest_loss:
        lowest_loss = val_loss
        best_model_wts = deepcopy(model.state_dict())

    torch.save(best_model_wts, f'./weights/stage2-effb7-f{fold}-best.pth')

stage2-srx101-f0~4 -> make 5 models

In [None]:
import torch
from tqdm import tqdm
import segmentation_models_pytorch as smp
from sklearn.model_selection import KFold
from copy import deepcopy

# KFold 인스턴스 생성
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# 데이터셋 길이
dataset_size = len(dataset2 + stage1_dataset)
indices = list(range(dataset_size))

# 초기 설정
lr = 0.0001
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
num_epochs = 50
num_classes = 1

# 손실 함수와 옵티마이저 정의
# 이진 분류를 위한 BCEWithLogitsLoss (Sigmoid 포함)
criterion = torch.nn.BCEWithLogitsLoss()

# 교차 검증 루프
for fold, (train_idx, val_idx) in enumerate(kf.split(indices)):
    print(f'Fold {fold+1}')

    train_subsample = torch.utils.data.SubsetRandomSampler(train_idx)
    val_subsample = torch.utils.data.SubsetRandomSampler(val_idx)

    train_loader = DataLoader(dataset2 + stage1_dataset, batch_size=8, sampler=train_subsample, num_workers=4)
    val_loader = DataLoader(dataset2 + stage1_dataset, batch_size=8, sampler=val_subsample, num_workers=4)

    # 모델 초기화 및 최적화(각 폴드마다)
    model = smp.UnetPlusPlus(encoder_name = "se_resnext101_32x4d", encoder_weights='imagenet', in_channels=3, classes=num_classes)
    model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr)

    # 최고 모델 저장 위한 변수 초기화
    best_model_wts = deepcopy(model.state_dict())
    lowest_loss = float('inf')

    for epoch in range(num_epochs):
      model.train() # 모델 훈련 모드로 설정
      train_loss = 0
      for images, masks in tqdm(train_loader):
          images = images.to(device)
          masks = masks.to(device).float()

          optimizer.zero_grad() # gradient 초기화
          outputs = model(images)
          outputs = outputs.squeeze(1)
          loss = criterion(outputs, masks)
          loss.backward() # 역전파
          optimizer.step() # 가중치 업데이트

          train_loss += loss.item() # 에폭별 총 손실 계산
      train_loss /= len(train_loader) # 평균 손실 계산

      model.eval() # 모델을 평가 모드로 설정
      val_loss = 0

      with torch.no_grad(): # 그레디언트 계산 비활성화
        for images, masks in val_loader:
          images = images.to(device)
          masks = masks.to(device).float()

          outputs = model(images)
          outputs = outputs.squeeze(1)
          loss = criterion(outputs, masks)

          val_loss += loss.item()
      val_loss /= len(val_loader)

      if val_loss < lowest_loss:
        lowest_loss = val_loss
        best_model_wts = deepcopy(model.state_dict())

    torch.save(best_model_wts, f'./weights/stage2-srx101-f{fold}-best.pth')

Load weights & Ensemble & Prediction  

In [None]:
# 가중치 로드
weight = torch.load('./weights/stage2-effb7-f0-best.pth')
model0 = smp.UnetPlusPlus(encoder_name="efficientnet-b7", encoder_weights=None, in_channels=3, classes=num_classes)
model0.to(device)
model0.load_state_dict(weight, strict=False)

weight = torch.load('./weights/stage2-effb7-f1-best.pth')
model1 = smp.UnetPlusPlus(encoder_name="efficientnet-b7", encoder_weights=None, in_channels=3, classes=num_classes)
model1.to(device)
model1.load_state_dict(weight, strict=False)

weight = torch.load('./weights/stage2-effb7-f2-best.pth')
model2 = smp.UnetPlusPlus(encoder_name="efficientnet-b7", encoder_weights=None, in_channels=3, classes=num_classes)
model2.to(device)
model2.load_state_dict(weight, strict=False)

weight = torch.load('./weights/stage2-effb7-f3-best.pth')
model3 = smp.UnetPlusPlus(encoder_name="efficientnet-b7", encoder_weights=None, in_channels=3, classes=num_classes)
model3.to(device)
model3.load_state_dict(weight, strict=False)

weight = torch.load('./weights/stage2-effb7-f4-best.pth')
model4 = smp.UnetPlusPlus(encoder_name="efficientnet-b7", encoder_weights=None, in_channels=3, classes=num_classes)
model4.to(device)
model4.load_state_dict(weight, strict=False)



weight = torch.load('./weights/stage2-srx101-f0-best.pth')
model5 = smp.UnetPlusPlus(encoder_name="se_resnext101_32x4d", encoder_weights=None, in_channels=3, classes=num_classes)
model5.to(device)
model5.load_state_dict(weight, strict=False)

weight = torch.load('./weights/stage2-srx101-f1-best.pth')
model6 = smp.UnetPlusPlus(encoder_name="se_resnext101_32x4d", encoder_weights=None, in_channels=3, classes=num_classes)
model6.to(device)
model6.load_state_dict(weight, strict=False)

weight = torch.load('./weights/stage2-srx101-f2-best.pth')
model7 = smp.UnetPlusPlus(encoder_name="se_resnext101_32x4d", encoder_weights=None, in_channels=3, classes=num_classes)
model7.to(device)
model7.load_state_dict(weight, strict=False)

weight = torch.load('./weights/stage2-srx101-f3-best.pth')
model8 = smp.UnetPlusPlus(encoder_name="se_resnext101_32x4d", encoder_weights=None, in_channels=3, classes=num_classes)
model8.to(device)
model8.load_state_dict(weight, strict=False)

weight = torch.load('./weights/stage2-srx101-f4-best.pth')
model9 = smp.UnetPlusPlus(encoder_name="se_resnext101_32x4d", encoder_weights=None, in_channels=3, classes=num_classes)
model9.to(device)
model9.load_state_dict(weight, strict=False)


In [None]:
# 1단계: 예측 수행
final_predictions = []
with torch.no_grad():
    for images in tqdm(test_dataloader):
        images = images.float().to(device)
        masks = [model(images) for model in [model0, model1, model2, model3, model4, model5, model6, model7, model8, model9]]
        masks = [torch.sigmoid(mask).cpu().numpy() for mask in masks]
        masks = [np.squeeze(mask, axis=1) for mask in masks]
        averaged_mask = np.mean(masks, axis=0)
        final_mask = (averaged_mask > 0.5).astype(np.uint8)
        final_predictions.extend(final_mask)

# 2단계: RLE 인코딩 적용
encoded_masks = [rle_encode(mask) for mask in final_predictions]

# 3단계: 새로운 데이터셋 생성
stage2_dataset = []
for i, image in enumerate(test_dataset):
    # 원본 이미지를 가져옵니다 (변환 적용된 상태)
    original_image = image[0]

    # 해당 이미지의 예측된 마스크(의사 레이블)를 가져옵니다
    predicted_mask = encoded_masks[i]

    # 원본 이미지와 예측된 마스크를 쌍으로 새 데이터셋에 추가합니다
    stage2_dataset.append((original_image, predicted_mask))


## stage3

* Step 1 - take prediction from stage 2 and prepare as train data
* Step 2 - train 5 Unet++ models with train data and stage_3 data
* Step 3 - make prediction with new models

stage3-effb7-f0~4

In [None]:
import torch
from tqdm import tqdm
import segmentation_models_pytorch as smp
from sklearn.model_selection import KFold
from copy import deepcopy

# KFold 인스턴스 생성
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# 데이터셋 길이
dataset_size = len(dataset4 + stage2_dataset)
indices = list(range(dataset_size))

# 초기 설정
lr = 0.0001
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
num_epochs = 50
num_classes = 1

# 손실 함수와 옵티마이저 정의
# 이진 분류를 위한 BCEWithLogitsLoss (Sigmoid 포함)
criterion = torch.nn.BCEWithLogitsLoss()

# 교차 검증 루프
for fold, (train_idx, val_idx) in enumerate(kf.split(indices)):
    print(f'Fold {fold+1}')

    train_subsample = torch.utils.data.SubsetRandomSampler(train_idx)
    val_subsample = torch.utils.data.SubsetRandomSampler(val_idx)

    train_loader = DataLoader(dataset4 + stage2_dataset, batch_size=8, sampler=train_subsample, num_workers=4)
    val_loader = DataLoader(dataset4 + stage2_dataset, batch_size=8, sampler=val_subsample, num_workers=4)

    # 모델 초기화 및 최적화(각 폴드마다)
    model = smp.UnetPlusPlus(encoder_name = "efficientnet-b7", encoder_weights='imagenet', in_channels=3, classes=num_classes)
    model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr)

    # 최고 모델 저장 위한 변수 초기화
    best_model_wts = deepcopy(model.state_dict())
    lowest_loss = float('inf')

    for epoch in range(num_epochs):
      model.train() # 모델 훈련 모드로 설정
      train_loss = 0
      for images, masks in tqdm(train_loader):
          images = images.to(device)
          masks = masks.to(device).float()

          optimizer.zero_grad() # gradient 초기화
          outputs = model(images)
          outputs = outputs.squeeze(1)
          loss = criterion(outputs, masks)
          loss.backward() # 역전파
          optimizer.step() # 가중치 업데이트

          train_loss += loss.item() # 에폭별 총 손실 계산
      train_loss /= len(train_loader) # 평균 손실 계산

      model.eval() # 모델을 평가 모드로 설정
      val_loss = 0

      with torch.no_grad(): # 그레디언트 계산 비활성화
        for images, masks in val_loader:
          images = images.to(device)
          masks = masks.to(device).float()

          outputs = model(images)
          outputs = outputs.squeeze(1)
          loss = criterion(outputs, masks)

          val_loss += loss.item()
      val_loss /= len(val_loader)

      if val_loss < lowest_loss:
        lowest_loss = val_loss
        best_model_wts = deepcopy(model.state_dict())

    torch.save(best_model_wts, f'./weights/stage3-effb7-f{fold}-best.pth')

stage3-srx101-f0~4

In [None]:
import torch
from tqdm import tqdm
import segmentation_models_pytorch as smp
from sklearn.model_selection import KFold
from copy import deepcopy

# KFold 인스턴스 생성
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# 데이터셋 길이
dataset_size = len(dataset2 + stage2_dataset)
indices = list(range(dataset_size))

# 초기 설정
lr = 0.0001
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
num_epochs = 50
num_classes = 1

# 손실 함수와 옵티마이저 정의
# 이진 분류를 위한 BCEWithLogitsLoss (Sigmoid 포함)
criterion = torch.nn.BCEWithLogitsLoss()

# 교차 검증 루프
for fold, (train_idx, val_idx) in enumerate(kf.split(indices)):
    print(f'Fold {fold+1}')

    train_subsample = torch.utils.data.SubsetRandomSampler(train_idx)
    val_subsample = torch.utils.data.SubsetRandomSampler(val_idx)

    train_loader = DataLoader(dataset2 + stage2_dataset, batch_size=8, sampler=train_subsample, num_workers=4)
    val_loader = DataLoader(dataset2 + stage2_dataset, batch_size=8, sampler=val_subsample, num_workers=4)

    # 모델 초기화 및 최적화(각 폴드마다)
    model = smp.UnetPlusPlus(encoder_name = "se_resnext101_32x4d", encoder_weights='imagenet', in_channels=3, classes=num_classes)
    model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr)

    # 최고 모델 저장 위한 변수 초기화
    best_model_wts = deepcopy(model.state_dict())
    lowest_loss = float('inf')

    for epoch in range(num_epochs):
      model.train() # 모델 훈련 모드로 설정
      train_loss = 0
      for images, masks in tqdm(train_loader):
          images = images.to(device)
          masks = masks.to(device).float()

          optimizer.zero_grad() # gradient 초기화
          outputs = model(images)
          outputs = outputs.squeeze(1)
          loss = criterion(outputs, masks)
          loss.backward() # 역전파
          optimizer.step() # 가중치 업데이트

          train_loss += loss.item() # 에폭별 총 손실 계산
      train_loss /= len(train_loader) # 평균 손실 계산

      model.eval() # 모델을 평가 모드로 설정
      val_loss = 0

      with torch.no_grad(): # 그레디언트 계산 비활성화
        for images, masks in val_loader:
          images = images.to(device)
          masks = masks.to(device).float()

          outputs = model(images)
          outputs = outputs.squeeze(1)
          loss = criterion(outputs, masks)

          val_loss += loss.item()
      val_loss /= len(val_loader)

      if val_loss < lowest_loss:
        lowest_loss = val_loss
        best_model_wts = deepcopy(model.state_dict())

    torch.save(best_model_wts, f'./weights/stage3-srx101-f{fold}-best.pth')

stage3-inrv2-f0

In [None]:
# SSL 인증서가 유효하지 않을 때 특히 임시로 문제를 해결하기 위한 방법으로 사용됩니다.
# 그러나 보안상의 이유로 인해 일반적으로 권장되지 않습니다.

import ssl
ssl._create_default_https_context = ssl._create_unverified_context

In [None]:
import torch
from tqdm import tqdm
import segmentation_models_pytorch as smp
from sklearn.model_selection import KFold
from copy import deepcopy

# KFold 인스턴스 생성
kf = KFold(n_splits=1, shuffle=True, random_state=42)

# 데이터셋 길이
dataset_size = len(dataset2 + stage2_dataset)
indices = list(range(dataset_size))

# 초기 설정
lr = 0.0001
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
num_epochs = 50
num_classes = 1

# 손실 함수와 옵티마이저 정의
# 이진 분류를 위한 BCEWithLogitsLoss (Sigmoid 포함)
criterion = torch.nn.BCEWithLogitsLoss()

# 교차 검증 루프
for fold, (train_idx, val_idx) in enumerate(kf.split(indices)):
    print(f'Fold {fold+1}')

    train_subsample = torch.utils.data.SubsetRandomSampler(train_idx)
    val_subsample = torch.utils.data.SubsetRandomSampler(val_idx)

    train_loader = DataLoader(dataset2 + stage2_dataset, batch_size=8, sampler=train_subsample, num_workers=4)
    val_loader = DataLoader(dataset2 + stage2_dataset, batch_size=8, sampler=val_subsample, num_workers=4)

    # 모델 초기화 및 최적화(각 폴드마다)
    model = smp.UnetPlusPlus(encoder_name = "inceptionresnetv2", encoder_weights='imagenet', in_channels=3, classes=1)
    model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr)

    # 최고 모델 저장 위한 변수 초기화
    best_model_wts = deepcopy(model.state_dict())
    lowest_loss = float('inf')

    for epoch in range(num_epochs):
      model.train() # 모델 훈련 모드로 설정
      train_loss = 0
      for images, masks in tqdm(train_loader):
          images = images.to(device)
          masks = masks.to(device).float()

          optimizer.zero_grad() # gradient 초기화
          outputs = model(images)
          outputs = outputs.squeeze(1)
          loss = criterion(outputs, masks)
          loss.backward() # 역전파
          optimizer.step() # 가중치 업데이트

          train_loss += loss.item() # 에폭별 총 손실 계산
      train_loss /= len(train_loader) # 평균 손실 계산

      model.eval() # 모델을 평가 모드로 설정
      val_loss = 0

      with torch.no_grad(): # 그레디언트 계산 비활성화
        for images, masks in val_loader:
          images = images.to(device)
          masks = masks.to(device).float()

          outputs = model(images)
          outputs = outputs.squeeze(1)
          loss = criterion(outputs, masks)

          val_loss += loss.item()
      val_loss /= len(val_loader)

      if val_loss < lowest_loss:
        lowest_loss = val_loss
        best_model_wts = deepcopy(model.state_dict())

    torch.save(best_model_wts, f'./weights/stage3-inrv2-f{fold}-best.pth')

Load weights & Ensemble & Prediction  

In [None]:
# 가중치 로드
weight = torch.load('./weights/stage3-effb7-f0-best.pth')
model0 = smp.UnetPlusPlus(encoder_name="efficientnet-b7", encoder_weights=None, in_channels=3, classes=num_classes)
model0.to(device)
model0.load_state_dict(weight, strict=False)

weight = torch.load('./weights/stage3-effb7-f5-best.pth')
model1 = smp.UnetPlusPlus(encoder_name="efficientnet-b7", encoder_weights=None, in_channels=3, classes=num_classes)
model1.to(device)
model1.load_state_dict(weight, strict=False)


weight = torch.load('./weights/stage3-inrv2-f0-best.pth')
model2 = smp.UnetPlusPlus(encoder_name="inceptionresnetv2", encoder_weights=None, in_channels=3, classes=num_classes)
model2.to(device)
model2.load_state_dict(weight, strict=False)

weight = torch.load('./weights/stage3-srx101-f0-best.pth')
model3 = smp.UnetPlusPlus(encoder_name="se_resnext101_32x4d", encoder_weights=None, in_channels=3, classes=num_classes)
model3.to(device)
model3.load_state_dict(weight, strict=False)

weight = torch.load('./weights/stage3-srx101-f5-best.pth')
model4 = smp.UnetPlusPlus(encoder_name="se_resnext101_32x4d", encoder_weights=None, in_channels=3, classes=num_classes)
model4.to(device)
model4.load_state_dict(weight, strict=False)


In [None]:
test_dataset = SatelliteDataset(csv_file='./test.csv', transform=test_transform, infer=True)
test_dataloader = DataLoader(test_dataset, batch_size=4, shuffle=False, num_workers=2)

model0.eval()
model1.eval()
model2.eval()
model3.eval()
model4.eval()


with torch.no_grad():
    result = []
    for images in tqdm(test_dataloader):
        images = images.float().to(device)
        masks = [model(images) for model in [model0, model1, model2, model3, model4]]
        masks = [torch.sigmoid(mask).cpu().numpy() for mask in masks]
        masks = [np.squeeze(mask, axis=1) for mask in masks]
        averaged_mask = np.mean(masks, axis=0)
        final_predictions = (averaged_mask > 0.5).astype(np.uint8)

        for i in range(len(images)):
          mask_rle = rle_encode(final_predictions[i])
          if mask_rle == '': # 예측된 건물 픽셀이 없는 경우 -1
              result.append(-1)
          else:
              result.append(mask_rle)





In [None]:
submit = pd.read_csv('./sample_submission.csv')
submit['mask_rle'] = result
submit.to_csv("./submission/stage1-3.csv", index=False)


# Pretrained Model load in Github
* last stage 3 weights

## load model

In [23]:
num_classes = 1
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

weight = torch.load('/content/gdrive/MyDrive/sw_contest/pretrained/models/stage3/effb1-f0/checkpoints/best.pth')
model1 = smp.UnetPlusPlus(encoder_name="efficientnet-b1", encoder_weights=None, in_channels=3, classes=num_classes)
model1.to(device)
model1.load_state_dict(weight, strict=False)


_IncompatibleKeys(missing_keys=['encoder._conv_stem.weight', 'encoder._bn0.weight', 'encoder._bn0.bias', 'encoder._bn0.running_mean', 'encoder._bn0.running_var', 'encoder._blocks.0._depthwise_conv.weight', 'encoder._blocks.0._bn1.weight', 'encoder._blocks.0._bn1.bias', 'encoder._blocks.0._bn1.running_mean', 'encoder._blocks.0._bn1.running_var', 'encoder._blocks.0._se_reduce.weight', 'encoder._blocks.0._se_reduce.bias', 'encoder._blocks.0._se_expand.weight', 'encoder._blocks.0._se_expand.bias', 'encoder._blocks.0._project_conv.weight', 'encoder._blocks.0._bn2.weight', 'encoder._blocks.0._bn2.bias', 'encoder._blocks.0._bn2.running_mean', 'encoder._blocks.0._bn2.running_var', 'encoder._blocks.1._depthwise_conv.weight', 'encoder._blocks.1._bn1.weight', 'encoder._blocks.1._bn1.bias', 'encoder._blocks.1._bn1.running_mean', 'encoder._blocks.1._bn1.running_var', 'encoder._blocks.1._se_reduce.weight', 'encoder._blocks.1._se_reduce.bias', 'encoder._blocks.1._se_expand.weight', 'encoder._blocks.1

In [24]:
weight = torch.load('/content/gdrive/MyDrive/sw_contest/pretrained/models/stage3/effb4-f0/checkpoints/best.pth')
model2 = smp.UnetPlusPlus(encoder_name="efficientnet-b4", encoder_weights=None, in_channels=3, classes=num_classes)
model2.to(device)
model2.load_state_dict(weight, strict=False)


_IncompatibleKeys(missing_keys=['encoder._conv_stem.weight', 'encoder._bn0.weight', 'encoder._bn0.bias', 'encoder._bn0.running_mean', 'encoder._bn0.running_var', 'encoder._blocks.0._depthwise_conv.weight', 'encoder._blocks.0._bn1.weight', 'encoder._blocks.0._bn1.bias', 'encoder._blocks.0._bn1.running_mean', 'encoder._blocks.0._bn1.running_var', 'encoder._blocks.0._se_reduce.weight', 'encoder._blocks.0._se_reduce.bias', 'encoder._blocks.0._se_expand.weight', 'encoder._blocks.0._se_expand.bias', 'encoder._blocks.0._project_conv.weight', 'encoder._blocks.0._bn2.weight', 'encoder._blocks.0._bn2.bias', 'encoder._blocks.0._bn2.running_mean', 'encoder._blocks.0._bn2.running_var', 'encoder._blocks.1._depthwise_conv.weight', 'encoder._blocks.1._bn1.weight', 'encoder._blocks.1._bn1.bias', 'encoder._blocks.1._bn1.running_mean', 'encoder._blocks.1._bn1.running_var', 'encoder._blocks.1._se_reduce.weight', 'encoder._blocks.1._se_reduce.bias', 'encoder._blocks.1._se_expand.weight', 'encoder._blocks.1

In [25]:
weight = torch.load('/content/gdrive/MyDrive/sw_contest/pretrained/models/stage3/inrv2-f0/checkpoints/best.pth')
model3 = smp.UnetPlusPlus(encoder_name="inceptionresnetv2", encoder_weights=None, in_channels=3, classes=num_classes)
model3.to(device)
model3.load_state_dict(weight, strict=False)


_IncompatibleKeys(missing_keys=['encoder.conv2d_1a.conv.weight', 'encoder.conv2d_1a.bn.weight', 'encoder.conv2d_1a.bn.bias', 'encoder.conv2d_1a.bn.running_mean', 'encoder.conv2d_1a.bn.running_var', 'encoder.conv2d_2a.conv.weight', 'encoder.conv2d_2a.bn.weight', 'encoder.conv2d_2a.bn.bias', 'encoder.conv2d_2a.bn.running_mean', 'encoder.conv2d_2a.bn.running_var', 'encoder.conv2d_2b.conv.weight', 'encoder.conv2d_2b.bn.weight', 'encoder.conv2d_2b.bn.bias', 'encoder.conv2d_2b.bn.running_mean', 'encoder.conv2d_2b.bn.running_var', 'encoder.conv2d_3b.conv.weight', 'encoder.conv2d_3b.bn.weight', 'encoder.conv2d_3b.bn.bias', 'encoder.conv2d_3b.bn.running_mean', 'encoder.conv2d_3b.bn.running_var', 'encoder.conv2d_4a.conv.weight', 'encoder.conv2d_4a.bn.weight', 'encoder.conv2d_4a.bn.bias', 'encoder.conv2d_4a.bn.running_mean', 'encoder.conv2d_4a.bn.running_var', 'encoder.mixed_5b.branch0.conv.weight', 'encoder.mixed_5b.branch0.bn.weight', 'encoder.mixed_5b.branch0.bn.bias', 'encoder.mixed_5b.branch

In [26]:
weight = torch.load('/content/gdrive/MyDrive/sw_contest/pretrained/models/stage3/srx50-2-f0/checkpoints/best.pth')
model4 = smp.UnetPlusPlus(encoder_name="se_resnext50_32x4d", encoder_weights=None, in_channels=3, classes=num_classes)
model4.to(device)
model4.load_state_dict(weight, strict=False)



_IncompatibleKeys(missing_keys=['encoder.layer0.conv1.weight', 'encoder.layer0.bn1.weight', 'encoder.layer0.bn1.bias', 'encoder.layer0.bn1.running_mean', 'encoder.layer0.bn1.running_var', 'encoder.layer1.0.conv1.weight', 'encoder.layer1.0.bn1.weight', 'encoder.layer1.0.bn1.bias', 'encoder.layer1.0.bn1.running_mean', 'encoder.layer1.0.bn1.running_var', 'encoder.layer1.0.conv2.weight', 'encoder.layer1.0.bn2.weight', 'encoder.layer1.0.bn2.bias', 'encoder.layer1.0.bn2.running_mean', 'encoder.layer1.0.bn2.running_var', 'encoder.layer1.0.conv3.weight', 'encoder.layer1.0.bn3.weight', 'encoder.layer1.0.bn3.bias', 'encoder.layer1.0.bn3.running_mean', 'encoder.layer1.0.bn3.running_var', 'encoder.layer1.0.se_module.fc1.weight', 'encoder.layer1.0.se_module.fc1.bias', 'encoder.layer1.0.se_module.fc2.weight', 'encoder.layer1.0.se_module.fc2.bias', 'encoder.layer1.0.downsample.0.weight', 'encoder.layer1.0.downsample.1.weight', 'encoder.layer1.0.downsample.1.bias', 'encoder.layer1.0.downsample.1.runni

In [27]:
weight = torch.load('/content/gdrive/MyDrive/sw_contest/pretrained/models/stage3/srx50-f0/checkpoints/best.pth')
model5 = smp.UnetPlusPlus(encoder_name="se_resnext50_32x4d", encoder_weights=None, in_channels=3, classes=num_classes)
model5.to(device)
model5.load_state_dict(weight, strict=False)


_IncompatibleKeys(missing_keys=['encoder.layer0.conv1.weight', 'encoder.layer0.bn1.weight', 'encoder.layer0.bn1.bias', 'encoder.layer0.bn1.running_mean', 'encoder.layer0.bn1.running_var', 'encoder.layer1.0.conv1.weight', 'encoder.layer1.0.bn1.weight', 'encoder.layer1.0.bn1.bias', 'encoder.layer1.0.bn1.running_mean', 'encoder.layer1.0.bn1.running_var', 'encoder.layer1.0.conv2.weight', 'encoder.layer1.0.bn2.weight', 'encoder.layer1.0.bn2.bias', 'encoder.layer1.0.bn2.running_mean', 'encoder.layer1.0.bn2.running_var', 'encoder.layer1.0.conv3.weight', 'encoder.layer1.0.bn3.weight', 'encoder.layer1.0.bn3.bias', 'encoder.layer1.0.bn3.running_mean', 'encoder.layer1.0.bn3.running_var', 'encoder.layer1.0.se_module.fc1.weight', 'encoder.layer1.0.se_module.fc1.bias', 'encoder.layer1.0.se_module.fc2.weight', 'encoder.layer1.0.se_module.fc2.bias', 'encoder.layer1.0.downsample.0.weight', 'encoder.layer1.0.downsample.1.weight', 'encoder.layer1.0.downsample.1.bias', 'encoder.layer1.0.downsample.1.runni

## Ensemble & Inference

In [None]:
test_dataset = SatelliteDataset(csv_file='./test.csv', transform=test_transform, infer=True)
test_dataloader = DataLoader(test_dataset, batch_size=8, shuffle=False, num_workers=4)


model1.eval()
model2.eval()
model3.eval()
model4.eval()
model5.eval()

with torch.no_grad():
    result = []
    for images in tqdm(test_dataloader):  # test_dataloader는 테스트 데이터셋을 위한 DataLoader
        images = images.float().to(device)

        # 두 모델의 예측 수행
        masks1 = torch.sigmoid(model1(images)).cpu().numpy() # torch.sigmoid는 로짓을 확률로 변환
        masks1 = np.squeeze(masks1, axis=1)

        masks2 = torch.sigmoid(model2(images)).cpu().numpy() # torch.sigmoid는 로짓을 확률로 변환
        masks2 = np.squeeze(masks2, axis=1)

        masks3 = torch.sigmoid(model3(images)).cpu().numpy() # torch.sigmoid는 로짓을 확률로 변환
        masks3 = np.squeeze(masks3, axis=1)

        masks4 = torch.sigmoid(model4(images)).cpu().numpy() # torch.sigmoid는 로짓을 확률로 변환
        masks4 = np.squeeze(masks4, axis=1)

        masks5 = torch.sigmoid(model5(images)).cpu().numpy() # torch.sigmoid는 로짓을 확률로 변환
        masks5 = np.squeeze(masks5, axis=1)


        # 예측 평균화
        averaged_outputs = (masks1 + masks2 + masks3 + masks4 + masks5) / 2

        # 임계값 적용하여 최종 이진 마스크 생성
        final_predictions = (averaged_outputs > 0.5).astype(np.uint8) # 0.5 이상이면 1, 그렇지 않으면 0

        for i in range(len(images)):
          mask_rle = rle_encode(final_predictions[i])
          if mask_rle == '': # 예측된 건물 픽셀이 없는 경우 -1
              result.append(-1)
          else:
              result.append(mask_rle)


 74%|███████▍  | 5612/7580 [58:43<20:11,  1.62it/s]

In [None]:
submit = pd.read_csv('./sample_submission.csv')
submit['mask_rle'] = result
submit.to_csv("./submission/stage1-3.csv", index=False)
