In [73]:
import numpy as np
import pandas as pd
import os
import sys
from torchinfo import summary
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as T
import torchvision.models as models
import torch.optim as optim
import torch.utils.data as data_utils
from torch.utils.data import DataLoader, Dataset
import cv2
from tqdm import tqdm
from sklearn.model_selection import KFold
from typing import Tuple, Sequence, Callable
from torch import nn, Tensor
from torch.utils.data.sampler import SubsetRandomSampler
import csv

In [54]:
from efficientnet_pytorch import EfficientNet
model = EfficientNet.from_pretrained('efficientnet-b3')

Loaded pretrained weights for efficientnet-b3


In [55]:
class MnistModel_efficientb3(nn.Module):
    def __init__(self) -> None:
        super().__init__()
        self.backbone = EfficientNet.from_pretrained('efficientnet-b3')

        self.backbone._fc = nn.Linear(1536, 512)
        self.dropout = nn.Dropout(p=0.2)
        self.activation = self.backbone._swish
        self.classifier = nn.Linear(512, 26)

    def forward(self, x):
        x = self.backbone(x)
        x = self.dropout(x)
        x = self.activation(x)
        x = self.classifier(x)

In [56]:
class SAM(torch.optim.Optimizer):
    def __init__(self, params, base_optimizer, rho=0.05, **kwargs):
        assert rho >= 0.0, f"Invalid rho, should be non-negative: {rho}"

        defaults = dict(rho=rho, **kwargs)
        super(SAM, self).__init__(params, defaults)

        self.base_optimizer = base_optimizer(self.param_groups, **kwargs)
        self.param_groups = self.base_optimizer.param_groups

    @torch.no_grad()
    def first_step(self, zero_grad=False):
        grad_norm = self._grad_norm()
        for group in self.param_groups:
            scale = group["rho"] / (grad_norm + 1e-12)

            for p in group["params"]:
                if p.grad is None: continue
                e_w = p.grad * scale.to(p)
                p.add_(e_w)  # climb to the local maximum "w + e(w)"
                self.state[p]["e_w"] = e_w

        if zero_grad: self.zero_grad()

    @torch.no_grad()
    def second_step(self, zero_grad=False):
        for group in self.param_groups:
            for p in group["params"]:
                if p.grad is None: continue
                p.sub_(self.state[p]["e_w"])  # get back to "w" from "w + e(w)"

        self.base_optimizer.step()  # do the actual "sharpness-aware" update

        if zero_grad: self.zero_grad()

    @torch.no_grad()
    def step(self, closure=None):
        assert closure is not None, "Sharpness Aware Minimization requires closure, but it was not provided"
        closure = torch.enable_grad()(closure)  # the closure should do a full forward-backward pass

        self.first_step(zero_grad=True)
        closure()
        self.second_step()

    def _grad_norm(self):
        shared_device = self.param_groups[0]["params"][0].device  # put everything on the same device, in case of model parallelism
        norm = torch.norm(
                torch.stack([
                  p.grad.norm(p=2).to(shared_device)
                    for group in self.param_groups for p in group["params"]
                      if p.grad is not None
                    ]),
                    p=2
               )
        return norm

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

data_dir =  r"C:\dacon\\data\\dirty_mnist_2nd\\"
train_label_dir = pd.read_csv(r"C:\dacon\\data\\dirty_mnist_2nd_answer.csv")

    
kfold = KFold(n_splits=5, shuffle=False, random_state=0)
epoch_size = 5
batch_size = 8
    
best_model = []
for fold_index, (train_idx, valid_idx) in enumerate(kfold.split(train_label_dir), 1):
        
        print(f'[fold:] {fold_index}')
        torch.cuda.empty_cache()#gpu에서 메모리 내려놓음
        
        train_label = train_label_dir.iloc[train_idx]
        valid_label = train_label_dir.iloc[valid_idx]
        train_image = Generator(r"C:\dacon\\data\\dirty_mnist_2nd\\", train_label)
        valid_image = Generator(r"C:\dacon\\data\\dirty_mnist_2nd\\", valid_label)
        
        train_loader = DataLoader(
            train_image,
            batch_size=batch_size,
            shuffle = False,
            num_workers=0                  
        )
        valid_loader = DataLoader(
            valid_image,
            batch_size=batch_size,
            shuffle=False,
            num_workers=0                  
        )

In [74]:
def seed_everything(seed: int = 42):
    random.seed(seed)
    np.random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)  # type: ignore
    torch.backends.cudnn.deterministic = True  # type: ignore
    torch.backends.cudnn.benchmark = True  # type: ignore
seed_everything(42)

class MnistDataset(Dataset):
    def __init__(
        self,
        dir: os.PathLike,
        image_ids: os.PathLike,
        transforms: Sequence[Callable]
    ) -> None:
        self.dir = dir
        self.transforms = transforms

        self.labels = {}
        with open(image_ids, 'r') as f:
            reader = csv.reader(f)
            next(reader)
            for row in reader:
                self.labels[int(row[0])] = list(map(int, row[1:]))

        self.image_ids = list(self.labels.keys())

    def __len__(self) -> int:
        return len(self.image_ids)

    def __getitem__(self, index: int) -> Tuple[Tensor]:
        image_id = self.image_ids[index]
        image = Image.open(
            os.path.join(
                self.dir, f'{str(image_id).zfill(5)}.png')).convert('RGB')
        target = np.array(self.labels.get(image_id)).astype(np.float32)

        if self.transforms is not None:
            image = self.transforms(image)

        return image, target

transforms_train = T.Compose([
    #transforms.RandomHorizontalFlip(p=0.5),
    #transforms.RandomVerticalFlip(p=0.5),
    T.RandomRotation(180, expand=False),
    T.ToTensor(),
    T.Normalize(
        [0.485, 0.456, 0.406],
        [0.229, 0.224, 0.225]
    )
])

trainset = MnistDataset(r"C:\dacon\\data\\dirty_mnist_2nd", r"C:\dacon\\data\\dirty_mnist_2nd_answer.csv", transforms_train)

dataset_size = len(trainset)
validation_split = 0.1
indices = list(range(dataset_size))
split = int(np.floor(validation_split * dataset_size))

np.random.shuffle(indices)
train_indices, val_indices = indices[split:], indices[:split]

# Creating PT data samplers and loaders:
train_sampler = SubsetRandomSampler(train_indices)
valid_sampler = SubsetRandomSampler(val_indices)

train_loader = torch.utils.data.DataLoader(trainset, batch_size=8, sampler=train_sampler, num_workers=8)
validation_loader = torch.utils.data.DataLoader(trainset, batch_size=8, sampler=valid_sampler, num_workers=4)



device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = MnistModel_efficientb3().to(device)
print(summary(model, input_size=(1, 3, 256, 256), verbose=0))

Loaded pretrained weights for efficientnet-b3
Layer (type:depth-idx)                                  Output Shape              Param #
├─EfficientNet: 1-1                                     [1, 512]                  --
|    └─Conv2dStaticSamePadding: 2-1                     [1, 40, 128, 128]         --
|    |    └─ZeroPad2d: 3-1                              [1, 3, 258, 258]          --
|    └─BatchNorm2d: 2-2                                 [1, 40, 128, 128]         80
|    └─MemoryEfficientSwish: 2-3                        [1, 40, 128, 128]         --
├─MemoryEfficientSwish: 1-2                             [1, 40, 128, 128]         --
├─EfficientNet: 1                                       []                        --
|    └─ModuleList: 2                                    []                        --
|    |    └─MBConvBlock: 3-2                            [1, 24, 128, 128]         2,298
|    |    └─MBConvBlock: 3-3                            [1, 24, 128, 128]         1,206
|    |  

In [75]:
optimizer = optim.AdamW(model.parameters(), lr=1e-3)
#optimizer = SAM(model.parameters(), base_optimizer, lr=1e-3)

criterion = nn.MultiLabelSoftMarginLoss()


In [None]:
num_epochs = 60

best_loss = 1e10
best_acc = 0
no_improvement = 0
for epoch in range(num_epochs):
    for i, (images, targets) in enumerate(train_loader):
        model.train()
        optimizer.zero_grad()

        images = images.to(device) 
        targets = targets.to(device)
        
        images, targets_a, targets_b, lam = mixup_data(images, targets)
        images, targets_a, targets_b = map(Variable, (images, targets_a, targets_b))

        outputs = model(images)
        loss = mixup_criterion(criterion, outputs, targets_a, targets_b, lam)

        loss.backward()
        optimizer.step()

        #mixup_criterion(criterion, model(images), targets_a, targets_b, lam).backward()
        #optimizer.second_step(zero_grad=True)

        outputs = outputs > 0.5
        acc = (outputs == targets).float().mean()
        print(f'EPOCH: {epoch}/{num_epochs} | {i} / {len(train_loader)} | LOSS: {loss.item():.5f}, ACCURACY: {acc.item():.5f}')