In [3]:

import shutil


# 파일을 이동할 경로 설정
source_path = '/content/drive/MyDrive/Colab Notebooks/open.zip'
destination_path = '/content'

# 파일 이동
shutil.move(source_path, destination_path)

'/content/open.zip'

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
!unzip /content/open.zip

[1;30;43m스트리밍 출력 내용이 길어서 마지막 5000줄이 삭제되었습니다.[0m
  inflating: train/XB4SE5IZ.jpg      
  inflating: train/XB50FVB5.jpg      
  inflating: train/XB66TFR3.jpg      
  inflating: train/XB7HYTV9.jpg      
  inflating: train/XB7UZZES.jpg      
  inflating: train/XB983VHW.jpg      
  inflating: train/XB9QFMMB.jpg      
  inflating: train/XB9W8IUN.jpg      
  inflating: train/XBA3URKT.jpg      
  inflating: train/XBAGLGMN.jpg      
  inflating: train/XBAM9JDX.jpg      
  inflating: train/XBAPV3CV.jpg      
  inflating: train/XBB2K4NW.jpg      
  inflating: train/XBB4PTS3.jpg      
  inflating: train/XBBHU04Q.jpg      
  inflating: train/XBC8J55W.jpg      
  inflating: train/XBCU8Y7L.jpg      
  inflating: train/XBDXFSXZ.jpg      
  inflating: train/XBE14U9J.jpg      
  inflating: train/XBEJW8N5.jpg      
  inflating: train/XBG171H4.jpg      
  inflating: train/XBGZ1JIW.jpg      
  inflating: train/XBHLA4KC.jpg      
  inflating: train/XBHVA1OC.jpg      
  inflating: train/XBINC6PY.jpg      


In [5]:
import random
import pandas as pd
import numpy as np
import os
from PIL import Image

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

from torchvision import transforms
import torchvision.models as models


from tqdm.auto import tqdm

import warnings
warnings.filterwarnings(action='ignore')

In [6]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [22]:
CFG = {
    'IMG_SIZE':224,
    'EPOCHS':20,
    'LEARNING_RATE':3e-5,
    'BATCH_SIZE':64,
    'SEED':41
}

In [23]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(CFG['SEED']) # Seed 고정

In [24]:
df = pd.read_csv('./train.csv')
df.head()

Unnamed: 0,img_id,img_path,airplane,airport,bare soil,baseball diamond,basketball court,beach,bridge,buildings,...,tanks,tennis court,terrace,track,trail,transmission tower,trees,water,wetland,wind turbine
0,000L8TYE,./train/000L8TYE.jpg,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,1,1,0,0
1,0035QTK9,./train/0035QTK9.jpg,0,0,1,0,0,0,0,1,...,0,0,0,0,0,0,1,0,0,0
2,00470HEH,./train/00470HEH.jpg,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,1,0,0,0
3,004CCB2Q,./train/004CCB2Q.jpg,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,1,1,0,0
4,005AY4ES,./train/005AY4ES.jpg,0,0,1,0,0,0,0,0,...,0,0,0,0,1,0,1,1,0,0


In [25]:
df = df.sample(frac=1)
train_len = int(len(df) * 0.8)
train_df = df[:train_len]
val_df = df[train_len:]

In [26]:
(train_df.shape, val_df.shape)

((52396, 62), (13100, 62))

In [27]:
def get_labels(df):
    return df.iloc[:,2:].values

train_labels = get_labels(train_df)
val_labels = get_labels(val_df)

In [28]:
(train_labels.shape, val_labels.shape)

((52396, 60), (13100, 60))

In [29]:
class CustomDataset(Dataset):
    def __init__(self, img_path_list, label_list, transform=None):
        self.img_path_list = img_path_list
        self.label_list = label_list
        self.transform = transform

    def __getitem__(self, index):
        img_path = self.img_path_list[index]

        # PIL 이미지로 불러오기
        image = Image.open(img_path).convert("RGB")
        if self.transform is not None:
            image = self.transform(image)

        if self.label_list is not None:
            label = torch.tensor(self.label_list[index], dtype=torch.float32)
            return image, label
        else:
            return image

    def __len__(self):
        return len(self.img_path_list)

In [30]:
train_transform = transforms.Compose([
    transforms.Resize((CFG['IMG_SIZE'] + 30, CFG['IMG_SIZE'] + 30)),  # 먼저 이미지 크기를 증가
    transforms.RandomCrop(CFG['IMG_SIZE'], padding=4),  # 이후 RandomCrop 적용
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.RandomRotation(degrees=15),
    transforms.RandomAffine(degrees=0, translate=(0.1, 0.1), scale=(0.9, 1.1), shear=10),
    transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.1),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize((CFG['IMG_SIZE'], CFG['IMG_SIZE'])),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [31]:
train_dataset = CustomDataset(train_df['img_path'].values, train_labels, train_transform)
train_loader = DataLoader(train_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=True, num_workers=0)

val_dataset = CustomDataset(val_df['img_path'].values, val_labels, test_transform)
val_loader = DataLoader(val_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=False, num_workers=0)

In [32]:
class BasicConv(nn.Module):
    def __init__(self, in_planes, out_planes, kernel_size, stride=1, padding=0, dilation=1, groups=1, relu=True, bn=True, bias=False):
        super(BasicConv, self).__init__()
        self.out_channels = out_planes
        self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, groups=groups, bias=bias)
        self.bn = nn.BatchNorm2d(out_planes,eps=1e-5, momentum=0.01, affine=True) if bn else None
        self.relu = nn.ReLU() if relu else None

    def forward(self, x):
        x = self.conv(x)
        if self.bn is not None:
            x = self.bn(x)
        if self.relu is not None:
            x = self.relu(x)
        return x

class Flatten(nn.Module):
    def forward(self, x):
        return x.view(x.size(0), -1)

class ChannelGate(nn.Module):
    def __init__(self, gate_channels, reduction_ratio=16, pool_types=['avg', 'max']):
        super(ChannelGate, self).__init__()
        self.gate_channels = gate_channels
        self.mlp = nn.Sequential(
            Flatten(),
            nn.Linear(gate_channels, gate_channels // reduction_ratio),
            nn.ReLU(),
            nn.Linear(gate_channels // reduction_ratio, gate_channels)
            )
        self.pool_types = pool_types
    def forward(self, x):
        channel_att_sum = None
        for pool_type in self.pool_types:
            if pool_type=='avg':
                avg_pool = F.avg_pool2d( x, (x.size(2), x.size(3)), stride=(x.size(2), x.size(3)))
                channel_att_raw = self.mlp( avg_pool )
            elif pool_type=='max':
                max_pool = F.max_pool2d( x, (x.size(2), x.size(3)), stride=(x.size(2), x.size(3)))
                channel_att_raw = self.mlp( max_pool )
            elif pool_type=='lp':
                lp_pool = F.lp_pool2d( x, 2, (x.size(2), x.size(3)), stride=(x.size(2), x.size(3)))
                channel_att_raw = self.mlp( lp_pool )
            elif pool_type=='lse':
                # LSE pool only
                lse_pool = logsumexp_2d(x)
                channel_att_raw = self.mlp( lse_pool )

            if channel_att_sum is None:
                channel_att_sum = channel_att_raw
            else:
                channel_att_sum = channel_att_sum + channel_att_raw

        scale = torch.sigmoid( channel_att_sum ).unsqueeze(2).unsqueeze(3).expand_as(x)
        return x * scale

def logsumexp_2d(tensor):
    tensor_flatten = tensor.view(tensor.size(0), tensor.size(1), -1)
    s, _ = torch.max(tensor_flatten, dim=2, keepdim=True)
    outputs = s + (tensor_flatten - s).exp().sum(dim=2, keepdim=True).log()
    return outputs

class ChannelPool(nn.Module):
    def forward(self, x):
        return torch.cat( (torch.max(x,1)[0].unsqueeze(1), torch.mean(x,1).unsqueeze(1)), dim=1 )

class SpatialGate(nn.Module):
    def __init__(self):
        super(SpatialGate, self).__init__()
        kernel_size = 7
        self.compress = ChannelPool()
        self.spatial = BasicConv(2, 1, kernel_size, stride=1, padding=(kernel_size-1) // 2, relu=False)
    def forward(self, x):
        x_compress = self.compress(x)
        x_out = self.spatial(x_compress)
        scale = torch.sigmoid(x_out) # broadcasting
        return x * scale

class CBAM(nn.Module):
    def __init__(self, gate_channels, reduction_ratio=16, pool_types=['avg', 'max'], no_spatial=False):
        super(CBAM, self).__init__()
        self.ChannelGate = ChannelGate(gate_channels, reduction_ratio, pool_types)
        self.no_spatial=no_spatial
        if not no_spatial:
            self.SpatialGate = SpatialGate()
    def forward(self, x):
        x_out = self.ChannelGate(x)
        if not self.no_spatial:
            x_out = self.SpatialGate(x_out)
        return x_out

In [33]:
class ModifiedBaseModel(nn.Module):
    def __init__(self, num_classes=60):
        super(ModifiedBaseModel, self).__init__()

        self.backbone = models.resnet101(pretrained=True)

        # 마지막 분류기 레이어를 교체 (1000개 출력에서 60개 출력으로)
        in_features = self.backbone.fc.in_features
        self.backbone.fc = nn.Linear(in_features, num_classes)

        # CBAM 모듈 추가
        self.cbam = CBAM(gate_channels=in_features)

    def forward(self, x):
        # ResNet101을 통한 특징 추출
        x = self.backbone.conv1(x)
        x = self.backbone.bn1(x)
        x = self.backbone.relu(x)
        x = self.backbone.maxpool(x)

        x = self.backbone.layer1(x)
        x = self.backbone.layer2(x)
        x = self.backbone.layer3(x)
        x = self.backbone.layer4(x)

        # CBAM 모듈 적용
        x = self.cbam(x)

        # 평균 풀링 및 분류기
        x = self.backbone.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.backbone.fc(x)


        return x

In [34]:
class FocalLoss(nn.Module):
    def __init__(self, alpha=1, gamma=8, reduction='mean'):
        super(FocalLoss, self).__init__()
        self.alpha = 1
        self.gamma = 8
        self.reduction = reduction

    def forward(self, inputs, targets):
        BCE_loss = F.binary_cross_entropy_with_logits(inputs, targets, reduction='none')
        pt = torch.exp(-BCE_loss)
        F_loss = self.alpha * (1-pt)**self.gamma * BCE_loss

        if self.reduction == 'mean':
            return torch.mean(F_loss)
        elif self.reduction == 'sum':
            return torch.sum(F_loss)
        else:
            return F_loss


In [35]:
def validation(model, criterion, val_loader, device):
    model.eval()
    val_loss = []
    with torch.no_grad():
        for imgs, labels in tqdm(iter(val_loader)):
            imgs = imgs.float().to(device)
            labels = labels.to(device)

            logits = model(imgs)  # 로짓을 입력으로 사용

            loss = criterion(logits, labels)  # FocalLoss 적용

            val_loss.append(loss.item())

        _val_loss = np.mean(val_loss)

    return _val_loss


In [37]:
def train(model, optimizer, scheduler, train_loader, val_loader, device):
    model.to(device)

    criterion = FocalLoss(alpha=1, gamma=8, reduction='mean').to(device)

    best_val_loss = float('inf')
    best_model = None

    for epoch in range(1, CFG['EPOCHS']+1):
        model.train()
        train_loss = []
        for imgs, labels in tqdm(iter(train_loader)):
            imgs = imgs.float().to(device)
            labels = labels.to(device)

            optimizer.zero_grad()

            logits = model(imgs)  # 로짓을 입력으로 사용

            loss = criterion(logits, labels)  # FocalLoss 적용

            loss.backward()
            optimizer.step()

            train_loss.append(loss.item())

        # 스케줄러의 step() 함수 호출
        scheduler.step()

        _val_loss = validation(model, criterion, val_loader, device)
        _train_loss = np.mean(train_loss)
        print(f'Epoch [{epoch}], Train Loss : [{_train_loss:.5f}] Val Loss : [{_val_loss:.5f}]')

        if best_val_loss > _val_loss:
            best_val_loss = _val_loss
            best_model = model

    return best_model

In [38]:
model = ModifiedBaseModel(num_classes=60)
optimizer = torch.optim.RMSprop(params=model.parameters(), lr=CFG["LEARNING_RATE"])
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)
infer_model = train(model, optimizer, scheduler, train_loader, val_loader, device)

Downloading: "https://download.pytorch.org/models/resnet101-63fe2227.pth" to /root/.cache/torch/hub/checkpoints/resnet101-63fe2227.pth
100%|██████████| 171M/171M [00:01<00:00, 150MB/s]


  0%|          | 0/819 [00:00<?, ?it/s]

  0%|          | 0/205 [00:00<?, ?it/s]

Epoch [1], Train Loss : [0.00050] Val Loss : [0.00035]


  0%|          | 0/819 [00:00<?, ?it/s]

  0%|          | 0/205 [00:00<?, ?it/s]

Epoch [2], Train Loss : [0.00035] Val Loss : [0.00031]


  0%|          | 0/819 [00:00<?, ?it/s]

  0%|          | 0/205 [00:00<?, ?it/s]

Epoch [3], Train Loss : [0.00030] Val Loss : [0.00029]


  0%|          | 0/819 [00:00<?, ?it/s]

  0%|          | 0/205 [00:00<?, ?it/s]

Epoch [4], Train Loss : [0.00028] Val Loss : [0.00028]


  0%|          | 0/819 [00:00<?, ?it/s]

  0%|          | 0/205 [00:00<?, ?it/s]

Epoch [5], Train Loss : [0.00026] Val Loss : [0.00028]


  0%|          | 0/819 [00:00<?, ?it/s]

  0%|          | 0/205 [00:00<?, ?it/s]

Epoch [6], Train Loss : [0.00025] Val Loss : [0.00027]


  0%|          | 0/819 [00:00<?, ?it/s]

  0%|          | 0/205 [00:00<?, ?it/s]

Epoch [7], Train Loss : [0.00023] Val Loss : [0.00026]


  0%|          | 0/819 [00:00<?, ?it/s]

  0%|          | 0/205 [00:00<?, ?it/s]

Epoch [8], Train Loss : [0.00023] Val Loss : [0.00027]


  0%|          | 0/819 [00:00<?, ?it/s]

  0%|          | 0/205 [00:00<?, ?it/s]

Epoch [9], Train Loss : [0.00021] Val Loss : [0.00028]


  0%|          | 0/819 [00:00<?, ?it/s]

  0%|          | 0/205 [00:00<?, ?it/s]

Epoch [10], Train Loss : [0.00021] Val Loss : [0.00026]


  0%|          | 0/819 [00:00<?, ?it/s]

  0%|          | 0/205 [00:00<?, ?it/s]

Epoch [11], Train Loss : [0.00018] Val Loss : [0.00027]


  0%|          | 0/819 [00:00<?, ?it/s]

  0%|          | 0/205 [00:00<?, ?it/s]

Epoch [12], Train Loss : [0.00017] Val Loss : [0.00026]


  0%|          | 0/819 [00:00<?, ?it/s]

  0%|          | 0/205 [00:00<?, ?it/s]

Epoch [13], Train Loss : [0.00017] Val Loss : [0.00026]


  0%|          | 0/819 [00:00<?, ?it/s]

  0%|          | 0/205 [00:00<?, ?it/s]

Epoch [14], Train Loss : [0.00016] Val Loss : [0.00027]


  0%|          | 0/819 [00:00<?, ?it/s]

  0%|          | 0/205 [00:00<?, ?it/s]

Epoch [15], Train Loss : [0.00016] Val Loss : [0.00029]


  0%|          | 0/819 [00:00<?, ?it/s]

  0%|          | 0/205 [00:00<?, ?it/s]

Epoch [16], Train Loss : [0.00016] Val Loss : [0.00028]


  0%|          | 0/819 [00:00<?, ?it/s]

  0%|          | 0/205 [00:00<?, ?it/s]

Epoch [17], Train Loss : [0.00016] Val Loss : [0.00028]


  0%|          | 0/819 [00:00<?, ?it/s]

  0%|          | 0/205 [00:00<?, ?it/s]

Epoch [18], Train Loss : [0.00016] Val Loss : [0.00026]


  0%|          | 0/819 [00:00<?, ?it/s]

  0%|          | 0/205 [00:00<?, ?it/s]

Epoch [19], Train Loss : [0.00015] Val Loss : [0.00028]


  0%|          | 0/819 [00:00<?, ?it/s]

  0%|          | 0/205 [00:00<?, ?it/s]

Epoch [20], Train Loss : [0.00015] Val Loss : [0.00030]


In [39]:
model_save_path = "/content/drive/MyDrive/32try.pth"

# infer_model의 상태(가중치)를 저장합니다.
torch.save(infer_model.state_dict(), model_save_path)

In [40]:
test = pd.read_csv('./test.csv')

In [41]:
test_dataset = CustomDataset(test['img_path'].values, None, test_transform)
test_loader = DataLoader(test_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=False, num_workers=0)

In [42]:
def inference(model, test_loader, device):
    model.to(device)
    model.eval()
    predictions = []
    with torch.no_grad():
        for imgs in tqdm(iter(test_loader)):
            imgs = imgs.float().to(device)

            probs = model(imgs)

            probs  = probs.cpu().detach().numpy()
            predictions += probs.tolist()
    return predictions

In [43]:
preds = inference(infer_model, test_loader, device)

  0%|          | 0/683 [00:00<?, ?it/s]

In [44]:
submit = pd.read_csv('./sample_submission.csv')

In [45]:
submit.iloc[:,1:] = preds
submit.head()

Unnamed: 0,img_id,airplane,airport,bare soil,baseball diamond,basketball court,beach,bridge,buildings,cars,...,tanks,tennis court,terrace,track,trail,transmission tower,trees,water,wetland,wind turbine
0,000TT5XV,-1.607496,-1.695988,0.194765,-2.131657,-1.379079,-1.52266,-1.426679,0.612175,0.19432,...,-2.375839,-1.903047,-2.362963,-1.129805,-1.70757,-2.138465,-1.133962,1.306812,-1.814554,-2.543777
1,0013XXDH,-3.148116,-2.677833,-0.276875,-2.69697,-2.807859,-2.57845,-2.936242,-0.227503,-1.174141,...,-2.816972,-2.026468,-1.806008,-2.057039,-0.506793,-2.539567,1.520144,-1.543502,-2.53915,-3.168192
2,001Z4YNH,-2.528123,-2.445001,-0.092985,-1.748764,-1.471684,-2.959965,-2.0569,0.536124,0.792093,...,-1.207669,1.247195,-1.161896,-0.796445,-0.627531,-1.078264,1.046412,-0.665753,-2.522462,-3.352233
3,00297F36,-1.748261,-1.653668,-0.550411,-1.985776,-1.35969,-2.205999,-1.614707,0.675356,1.130641,...,-0.824287,-0.543431,-0.894628,-0.76688,-0.965133,-0.701926,0.904104,-0.443862,-1.863106,-2.852141
4,002GFJL0,-2.259387,-2.24844,0.207022,-2.324167,-1.576853,-2.254628,-1.957784,0.708883,0.474501,...,-2.556099,-1.877054,-2.33552,-1.469348,-1.611386,-2.268836,-0.891026,0.930595,-2.107972,-2.946039


In [46]:
submit.to_csv('/content/drive/MyDrive/Colab Notebooks/submit32.csv', index=False)