<a href="https://colab.research.google.com/github/LeeDayday/efficientNet_transfer_learning/blob/v0.1/4dblock.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import random
import pandas as pd
import numpy as np
import os
import cv2

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2
import torchvision.models as models

from tqdm.auto import tqdm
from sklearn.metrics import accuracy_score

import warnings
warnings.filterwarnings(action='ignore') 

In [2]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')


In [3]:
CFG = {
    'IMG_SIZE':224,
    'EPOCHS':5,
    'LEARNING_RATE':3e-4,
    'BATCH_SIZE':128,
    'SEED':41
}

### google drive mount

In [4]:
import os
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [5]:
!pwd

/content


In [8]:
%cd /content/drive/MyDrive/dacon_4dblock

/content/drive/MyDrive/dacon_4dblock


In [9]:
!unzip -qq '/content/drive/MyDrive/dacon_4dblock/open.zip'

In [10]:
from glob import glob
train_filepaths = list(glob('./train/*.jpg'))
len(train_filepaths)

32994

In [11]:
test_filepaths = list(glob('./test/*.jpg'))
len(test_filepaths)

1460

In [12]:
csv_filepaths = list(glob('./*.csv'))
len(csv_filepaths)

3

### git

In [57]:
# !git status

### code

In [13]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(CFG['SEED']) # Seed 고정

In [15]:
df = pd.read_csv('./train.csv')
print(df.head())

            id                 img_path  A  B  C  D  E  F  G  H  I  J
0  TRAIN_00000  ./train/TRAIN_00000.jpg  1  0  0  0  0  0  0  0  0  0
1  TRAIN_00001  ./train/TRAIN_00001.jpg  1  0  0  0  0  0  0  0  0  0
2  TRAIN_00002  ./train/TRAIN_00002.jpg  1  0  0  0  0  0  0  0  0  0
3  TRAIN_00003  ./train/TRAIN_00003.jpg  1  0  0  0  0  0  0  0  0  0
4  TRAIN_00004  ./train/TRAIN_00004.jpg  1  0  0  0  0  0  0  0  0  0


In [16]:
df = df.sample(frac=1)
train_len = int(len(df) * 0.8)

In [17]:
train = df[:train_len]
val = df[train_len:]

In [18]:
def get_labels(df):
    return df.iloc[:,2:].values

In [19]:
train_labels = get_labels(train)
val_labels = get_labels(val)

In [20]:
class CustomDataset(Dataset):
    def __init__(self, img_path_list, label_list, transforms=None):
        self.img_path_list = img_path_list
        self.label_list = label_list
        self.transforms = transforms
        
    def __getitem__(self, index):
        img_path = self.img_path_list[index]
        
        image = cv2.imread(img_path)
        
        if self.transforms is not None:
            image = self.transforms(image=image)['image']
        
        if self.label_list is not None:
            label = torch.FloatTensor(self.label_list[index])
            return image, label
        else:
            return image
        
    def __len__(self):
        return len(self.img_path_list)

In [21]:
train_transform = A.Compose([
                            A.Resize(CFG['IMG_SIZE'],CFG['IMG_SIZE']),
                            A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), max_pixel_value=255.0, always_apply=False, p=1.0),
                            ToTensorV2()
                            ])

test_transform = A.Compose([
                            A.Resize(CFG['IMG_SIZE'],CFG['IMG_SIZE']),
                            A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), max_pixel_value=255.0, always_apply=False, p=1.0),
                            ToTensorV2()
                            ])

In [22]:
train_dataset = CustomDataset(train['img_path'].values, train_labels, train_transform)
train_loader = DataLoader(train_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=True, num_workers=0)

val_dataset = CustomDataset(val['img_path'].values, val_labels, test_transform)
val_loader = DataLoader(val_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=False, num_workers=0)

In [23]:
class BaseModel(nn.Module):
    def __init__(self, num_classes=10):
        super(BaseModel, self).__init__()
        self.backbone = models.efficientnet_b0(pretrained=True)
        self.classifier = nn.Linear(1000, num_classes)
        
    def forward(self, x):
        x = self.backbone(x)
        x = F.sigmoid(self.classifier(x))
        return x

In [24]:
def train(model, optimizer, train_loader, val_loader, scheduler, device):
    model.to(device)
    criterion = nn.BCELoss().to(device)
    
    best_val_acc = 0
    best_model = None
    
    for epoch in range(1, CFG['EPOCHS']+1):
        model.train()
        train_loss = []
        for imgs, labels in tqdm(iter(train_loader)):
            imgs = imgs.float().to(device)
            labels = labels.to(device)
            
            optimizer.zero_grad()
            
            output = model(imgs)
            loss = criterion(output, labels)
            
            loss.backward()
            optimizer.step()
            
            train_loss.append(loss.item())
                    
        _val_loss, _val_acc = validation(model, criterion, val_loader, device)
        _train_loss = np.mean(train_loss)
        print(f'Epoch [{epoch}], Train Loss : [{_train_loss:.5f}] Val Loss : [{_val_loss:.5f}] Val ACC : [{_val_acc:.5f}]')
        
        if scheduler is not None:
            scheduler.step(_val_acc)
            
        if best_val_acc < _val_acc:
            best_val_acc = _val_acc
            best_model = model
    
    return best_model

In [25]:
def validation(model, criterion, val_loader, device):
    model.eval()
    val_loss = []
    val_acc = []
    with torch.no_grad():
        for imgs, labels in tqdm(iter(val_loader)):
            imgs = imgs.float().to(device)
            labels = labels.to(device)
            
            probs = model(imgs)
            
            loss = criterion(probs, labels)
            
            probs  = probs.cpu().detach().numpy()
            labels = labels.cpu().detach().numpy()
            preds = probs > 0.5
            batch_acc = (labels == preds).mean()
            
            val_acc.append(batch_acc)
            val_loss.append(loss.item())
        
        _val_loss = np.mean(val_loss)
        _val_acc = np.mean(val_acc)
    
    return _val_loss, _val_acc

In [82]:
!pwd

/content/drive/MyDrive/dacon


In [26]:
model = BaseModel()
model.eval()
optimizer = torch.optim.Adam(params = model.parameters(), lr = CFG["LEARNING_RATE"])
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=2,threshold_mode='abs',min_lr=1e-8, verbose=True)

infer_model = train(model, optimizer, train_loader, val_loader, scheduler, device)

  0%|          | 0/207 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

Epoch [1], Train Loss : [0.30091] Val Loss : [0.13923] Val ACC : [0.94168]


  0%|          | 0/207 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

Epoch [2], Train Loss : [0.10615] Val Loss : [0.08468] Val ACC : [0.96600]


  0%|          | 0/207 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

Epoch [3], Train Loss : [0.06116] Val Loss : [0.06068] Val ACC : [0.97607]


  0%|          | 0/207 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

Epoch [4], Train Loss : [0.04205] Val Loss : [0.06742] Val ACC : [0.97470]


  0%|          | 0/207 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

Epoch [5], Train Loss : [0.03181] Val Loss : [0.05503] Val ACC : [0.98028]


In [27]:
test = pd.read_csv('./test.csv')

In [28]:
test.head()

Unnamed: 0,id,img_path
0,TEST_00000,./test/TEST_00000.jpg
1,TEST_00001,./test/TEST_00001.jpg
2,TEST_00002,./test/TEST_00002.jpg
3,TEST_00003,./test/TEST_00003.jpg
4,TEST_00004,./test/TEST_00004.jpg


In [29]:
test_dataset = CustomDataset(test['img_path'].values, None, test_transform)
test_loader = DataLoader(test_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=False, num_workers=0)

In [30]:
def inference(model, test_loader, device):
    model.to(device)
    model.eval()
    predictions = []
    with torch.no_grad():
        for imgs in tqdm(iter(test_loader)):
            imgs = imgs.float().to(device)
            
            probs = model(imgs)

            probs  = probs.cpu().detach().numpy()
            preds = probs > 0.5
            preds = preds.astype(int)
            predictions += preds.tolist()
    return predictions

In [31]:
preds = inference(model, test_loader, device)

  0%|          | 0/12 [00:00<?, ?it/s]

In [32]:
submit = pd.read_csv('./sample_submission.csv')

In [33]:
submit.iloc[:,1:] = preds
submit.head()

Unnamed: 0,id,A,B,C,D,E,F,G,H,I,J
0,TEST_00000,1,1,1,0,0,1,1,0,1,0
1,TEST_00001,0,1,1,0,1,1,1,0,0,0
2,TEST_00002,1,1,0,1,1,1,0,1,0,1
3,TEST_00003,1,1,0,0,1,1,1,0,1,0
4,TEST_00004,1,1,1,0,1,1,1,0,0,0


In [34]:
submit.to_csv('./baseline_submit.csv', index=False)