In [1]:
import os
import copy
import time
import random

import PIL
from PIL import Image


import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

import torch
import torchvision
from torchvision import datasets, transforms, models

import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.autograd import Variable
from torch.utils.data import DataLoader, Dataset

from sklearn.metrics import f1_score
# fix seeds
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

SEED = 2019
seed_everything(SEED)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.backends.cudnn.enabled = False
print(device)

cuda


### Data
- 196종의 차

In [2]:
df_class = pd.read_csv('./data/class.csv')
df_class.head()
print(df_class.shape)

(196, 2)


In [3]:
df_train = pd.read_csv('./data/train.csv')
df_train.head()

Unnamed: 0,img_file,bbox_x1,bbox_y1,bbox_x2,bbox_y2,class
0,train_00001.jpg,1,80,641,461,108
1,train_00002.jpg,57,53,293,236,71
2,train_00003.jpg,35,42,227,160,76
3,train_00004.jpg,10,21,254,165,188
4,train_00005.jpg,1,67,772,573,44


In [4]:
df_test = pd.read_csv('./data/test.csv')
df_test.head()

Unnamed: 0,img_file,bbox_x1,bbox_y1,bbox_x2,bbox_y2
0,test_00001.jpg,199,184,1116,809
1,test_00002.jpg,55,61,480,257
2,test_00003.jpg,42,123,602,413
3,test_00004.jpg,13,8,619,393
4,test_00005.jpg,8,9,209,93


### Train vs Validation Data split
- 신경망의 validation 을 위해 dataset을 분리합니다.

In [5]:
X_train, X_val, y_train, y_val = train_test_split(df_train['img_file'], df_train['class'], stratify=df_train['class'], test_size=0.2, random_state=SEED)
print(X_train.head(), y_train.head())
print(X_val.head(), y_val.head())

X_train = X_train.values
X_val = X_val.values
y_train = y_train.values
y_val = y_val.values

539     train_00540.jpg
9477    train_09478.jpg
6514    train_06515.jpg
3135    train_03136.jpg
8591    train_08592.jpg
Name: img_file, dtype: object 539      88
9477     26
6514     94
3135     79
8591    160
Name: class, dtype: int64
6361    train_06362.jpg
1933    train_01934.jpg
7496    train_07497.jpg
3364    train_03365.jpg
8349    train_08350.jpg
Name: img_file, dtype: object 6361    152
1933     82
7496    156
3364     77
8349    115
Name: class, dtype: int64


In [6]:
print(X_train.shape)
print(X_val.shape)

(7992,)
(1998,)


### Data Preparation

#### Torch Dataset

In [7]:
TRAIN_DATA_PATH = './data/train_crop/'
TEST_DATA_PATH = './data/test_crop/'

class TrainImages(Dataset):
    def __init__(self, images, labels, mode=None, transforms=None):
        self.images = images
        self.labels = labels
        self.mode = mode
        self.transforms = transforms[self.mode]
        
    def __len__(self):
        return self.images.shape[0]
        
    def __getitem__(self, idx):
        image = Image.open(TRAIN_DATA_PATH + self.images[idx]).convert("RGB")
        image = self.transforms(image)
        label = self.labels[idx]
        
        return image, label
    
    
class TestImages(Dataset):
    def __init__(self, images, labels, mode=None, transforms=None):
        self.images = images
        self.laels = labels
        self.mode = mode
        self.transforms = transforms[self.mode]
        
    def __getitem__(self, idx):
        image = Image.open(TEST_DATA_PATH + self.images[idx]).convert("RGB")
        image = self.transforms(image)
        labels = self.labels[idx]
        
        return image, label

#### Image Transforms

In [8]:
transform = {
    'train': transforms.Compose([
        transforms.Resize((250, 250)),
        transforms.ToTensor(),
        transforms.Normalize(
            [0.485, 0.456, 0.406], 
            [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize((250, 250)),
        transforms.ToTensor(),
        transforms.Normalize(
            [0.485, 0.456, 0.406], 
            [0.229, 0.224, 0.225])
    ]),
    'test': transforms.Compose([
        transforms.Resize((250, 250)),
        transforms.ToTensor(),
        transforms.Normalize(
            [0.485, 0.456, 0.406], 
            [0.229, 0.224, 0.225])
    ])
}

In [9]:
batch_size = 256

train_dataset = TrainImages(images=X_train, labels=y_train, mode='train', transforms=transform)
val_dataset = TrainImages(images=X_val, labels=y_val, mode='val', transforms=transform)

train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

dataloaders = {
    'train': train_dataloader,
    'val': val_dataloader
}

dataset_sizes = {
    'train': len(train_dataset),
    'val': len(val_dataset)
}



### Modeling
- resnext

In [10]:
model_res = models.resnet18(pretrained=True, progress=False)
num_features = model_res.fc.in_features
model_res.fc = nn.Linear(num_features, 196)

#### Trainining

In [11]:
optimizer = optim.Adam(model_res.parameters())
criterion = nn.CrossEntropyLoss()

In [12]:
def train_model(model, dataloaders, dataset_sizes, criterion, optimizer, device, epochs=20):
    start = time.time()
    
    best_model_weights = copy.deepcopy(model.state_dict())
    best_f1 = 0.0
    
    for epoch in range(epochs):
        print("EPOCH {} / {}: ".format(epoch+1, epochs))
        print("-" * 10)
        
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()
            else:
                model.eval()
            
            batch_loss = 0.0
            
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)
                
                optimizer.zero_grad()
                
                with torch.set_grad_enabled(phase =='train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)
                    
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()
                batch_loss += loss.item() * inputs.size(0)
                
                if phase == 'val':
                    val_preds = np.zeros((len(val_dataset), num_classes))
                    val_loss = 0.0
                    with torch.no_grad():
                        for i, (inputs, labels) in dataloaders[phase]:

                            inputs, labels = inputs.to(device), labels.to(device)

                            outputs = model(inputs).detach()
                            loss = criterion(outputs, targets)
                            val_preds[i * batch_size: (i+1) * batch_size] = outputs.cpu().numpy()
                            val_loss += loss.item() / len(dataloaders[phase])
                        
                        y_pred = np.argmax(valid_preds, axis=1)
                        val_score = f1_score(y_val.values, y_pred, average='micro')  
                
        epoch_loss = batch_loss / dataset_sizes[phase] 
        
        print("{} Loss: {:.4f}".format(phase, epoch_loss))
        
        if phase == 'val' and val_score > best_f1:
            best_f1 = val_score
            best_model_weights = copy.deepcopy(model.state_dict())
            
    end = time.time()
    elapsed_time = end - start
    print("Training COMPLETED: {:.0f}m {:.0f}s".format(elapsed_time // 60, elapsed_time % 60))
    print("BEST VALIDATION F1: {:4f}".format(best_f1))
    
    model.load_state_dict(best_model_weights)
    return model




In [13]:
model_res.to(device)
train_model(model_res, dataloaders, dataset_sizes, criterion, optimizer, device, epochs=100)

EPOCH 1 / 100: 
----------


RuntimeError: cublas runtime error : the GPU program failed to execute at /pytorch/aten/src/THC/THCBlas.cu:259