In [None]:
import os
import copy
import time
import random
from tqdm import tqdm

import PIL
from PIL import Image


import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

import torch
import torchvision
from torchvision import datasets, transforms, models

import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.autograd import Variable
from torch.utils.data import DataLoader, Dataset

from sklearn.metrics import f1_score
# fix seeds
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

SEED = 2019
seed_everything(SEED)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.backends.cudnn.enabled = False
print(device)

df_class = pd.read_csv('../data/class.csv')
df_train = pd.read_csv('../data/train.csv')
df_train = df_train[['img_file', 'class']]
df_train.replace(196, 0, inplace=True)

X_train, X_val, y_train, y_val = train_test_split(df_train['img_file'], df_train['class'], stratify=df_train['class'], test_size=0.2, random_state=SEED)

X_train = X_train.values
X_val = X_val.values
y_train = y_train.values
y_val = y_val.values

TRAIN_DATA_PATH = '../data/train_crop/'
TEST_DATA_PATH = '../data/test_crop/'

class TrainImages(Dataset):
    def __init__(self, images, labels, mode=None, transforms=None):
        self.images = images
        self.labels = labels
        self.mode = mode
        self.transforms = transforms[self.mode]
        
    def __len__(self):
        return self.images.shape[0]
        
    def __getitem__(self, idx):
        image = Image.open(TRAIN_DATA_PATH + self.images[idx]).convert("RGB")
        image = self.transforms(image)
        label = self.labels[idx]
        
        return image, label
    
    
class TestImages(Dataset):
    def __init__(self, images, labels, mode=None, transforms=None):
        self.images = images
        self.laels = labels
        self.mode = mode
        self.transforms = transforms[self.mode]
        
    def __getitem__(self, idx):
        image = Image.open(TEST_DATA_PATH + self.images[idx]).convert("RGB")
        image = self.transforms(image)
        labels = self.labels[idx]
        
        return image, label

transform = {
    'train': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(20),
        transforms.ToTensor(),
        transforms.Normalize(
            [0.485, 0.456, 0.406],
            [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(
            [0.485, 0.456, 0.406],
            [0.229, 0.224, 0.225])
    ]),
    'test': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(
            [0.485, 0.456, 0.406],
            [0.229, 0.224, 0.225])
    ])
}

batch_size = 64

train_dataset = TrainImages(images=X_train, labels=y_train, mode='train', transforms=transform)
val_dataset = TrainImages(images=X_val, labels=y_val, mode='val', transforms=transform)

train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

dataloaders = {
    'train': train_dataloader,
    'val': val_dataloader
}

dataset_sizes = {
    'train': len(train_dataset),
    'val': len(val_dataset)
}

model_res = models.resnet101(pretrained=True, progress=False)
num_features = model_res.fc.in_features
model_res.fc = nn.Linear(num_features, 196)

optimizer = optim.Adam(model_res.parameters(), lr=0.00001)
criterion = nn.CrossEntropyLoss()

def train_model(model, dataloaders, dataset_sizes, criterion, optimizer, device, PATH, epochs=20):
    start = time.time()

    num_classes = 196

    best_model_weights = copy.deepcopy(model.state_dict())
    best_f1 = 0.0

    for epoch in tqdm(range(epochs)):
        print("EPOCH {} / {}: ".format(epoch+1, epochs))
        print("-" * 10)

        epoch_loss = 0.0
        phase = 'train'

        for batch_index, (batch_inputs, batch_labels) in enumerate(dataloaders[phase]):
            batch_inputs = batch_inputs.cuda()
            batch_labels = batch_labels.cuda()

            optimizer.zero_grad()
            outputs = model(batch_inputs)
#             _, preds = torch.max(outputs, 1)
            batch_loss = criterion(outputs, batch_labels)
            batch_loss.backward()
            optimizer.step()
            epoch_loss += batch_loss.item() * batch_inputs.size(0)
            if batch_index % 5 == 0:
                print("EPOCH {} BATCH {}: training batch loss: {}".format(epoch+1, batch_index+1, batch_loss.item()))

            if batch_index % 10 == 0:
                phase = 'val'
                val_preds = np.zeros((dataset_sizes['val'], 1))
                val_loss = 0.0
                with torch.no_grad():
                    model.eval()
                    for val_batch_index, (val_batch_inputs, val_batch_labels) in enumerate(dataloaders[phase]):

                        val_batch_inputs = val_batch_inputs.cuda()
                        val_batch_labels = val_batch_labels.cuda()

                        val_outputs = model(val_batch_inputs).detach()
                        _, val_batch_preds = torch.max(val_outputs, 1)
                        val_batch_loss = criterion(val_outputs, val_batch_labels)
                        val_preds[val_batch_index * batch_size: (val_batch_index+1) * batch_size] = val_batch_preds.cpu().view(-1, 1).numpy()
                        val_loss += val_batch_loss.item() * val_batch_inputs.size(0)

                    val_score = f1_score(y_val, val_preds, average='micro')
                    print()
                    print(">>>>>>  EPOCH {} BATCH {}: validation score {}".format(epoch+1, batch_index+1, val_score))
                    print()
                    if val_score > best_f1:
                        best_f1 = val_score
                        best_model_weights = copy.deepcopy(model.state_dict())
                        torch.save(model.state_dict(), '../{}/best_model_{}_{}.pt'.format(PATH, epoch+1, batch_index+1))

                phase = 'train'
                model.train()

        epoch_loss = epoch_loss / dataset_sizes['train']
        print("EPOCH {}: EPOCH_LOSS: {}".format(epoch+1, epoch_loss))
    end = time.time()
    elapsed_time = end - start
    print("Training COMPLETED: {:.0f}m {:.0f}s".format(elapsed_time // 60, elapsed_time % 60))
    print("BEST VALIDATION F1: {:4f}".format(best_f1))

    model.load_state_dict(best_model_weights)
    return model

model_res.to(device)
model_res = train_model(model=model_res, dataloaders=dataloaders, dataset_sizes=dataset_sizes, criterion=criterion, optimizer=optimizer, device=device, epochs=20, PATH='model/ten_crop/rough')

cuda



  0%|          | 0/20 [00:00<?, ?it/s][A

EPOCH 1 / 20: 
----------
EPOCH 1 BATCH 1: training batch loss: 5.365843772888184

>>>>>>  EPOCH 1 BATCH 1: validation score 0.007007007007007007

EPOCH 1 BATCH 6: training batch loss: 5.252135753631592
EPOCH 1 BATCH 11: training batch loss: 5.314694404602051

>>>>>>  EPOCH 1 BATCH 11: validation score 0.011511511511511512

EPOCH 1 BATCH 16: training batch loss: 5.289299488067627
EPOCH 1 BATCH 21: training batch loss: 5.2902913093566895

>>>>>>  EPOCH 1 BATCH 21: validation score 0.011511511511511512

EPOCH 1 BATCH 26: training batch loss: 5.305263996124268
EPOCH 1 BATCH 31: training batch loss: 5.3763651847839355

>>>>>>  EPOCH 1 BATCH 31: validation score 0.010510510510510511

EPOCH 1 BATCH 36: training batch loss: 5.324180603027344
EPOCH 1 BATCH 41: training batch loss: 5.269857406616211

>>>>>>  EPOCH 1 BATCH 41: validation score 0.012012012012012014

EPOCH 1 BATCH 46: training batch loss: 5.292675971984863
EPOCH 1 BATCH 51: training batch loss: 5.272904872894287

>>>>>>  EPOCH 1 B


  5%|▌         | 1/20 [13:51<4:23:11, 831.14s/it][A

EPOCH 1: EPOCH_LOSS: 5.250281436068637
EPOCH 2 / 20: 
----------
EPOCH 2 BATCH 1: training batch loss: 5.094849586486816

>>>>>>  EPOCH 2 BATCH 1: validation score 0.02702702702702703

EPOCH 2 BATCH 6: training batch loss: 5.124922275543213
EPOCH 2 BATCH 11: training batch loss: 5.101089954376221

>>>>>>  EPOCH 2 BATCH 11: validation score 0.032532532532532535

EPOCH 2 BATCH 16: training batch loss: 5.067883014678955
EPOCH 2 BATCH 21: training batch loss: 5.103292942047119

>>>>>>  EPOCH 2 BATCH 21: validation score 0.035035035035035036

EPOCH 2 BATCH 26: training batch loss: 5.0244879722595215
EPOCH 2 BATCH 31: training batch loss: 5.048108100891113

>>>>>>  EPOCH 2 BATCH 31: validation score 0.03853853853853854

EPOCH 2 BATCH 36: training batch loss: 5.034821510314941
EPOCH 2 BATCH 41: training batch loss: 5.042508602142334

>>>>>>  EPOCH 2 BATCH 41: validation score 0.042042042042042045

EPOCH 2 BATCH 46: training batch loss: 5.016838073730469
EPOCH 2 BATCH 51: training batch loss: 


 10%|█         | 2/20 [27:43<4:09:28, 831.60s/it][A

EPOCH 2: EPOCH_LOSS: 4.98726431719653
EPOCH 3 / 20: 
----------
EPOCH 3 BATCH 1: training batch loss: 4.755880355834961

>>>>>>  EPOCH 3 BATCH 1: validation score 0.1086086086086086

EPOCH 3 BATCH 6: training batch loss: 4.793086528778076
EPOCH 3 BATCH 11: training batch loss: 4.82103967666626

>>>>>>  EPOCH 3 BATCH 11: validation score 0.12012012012012012

EPOCH 3 BATCH 16: training batch loss: 4.887209892272949
EPOCH 3 BATCH 21: training batch loss: 4.759101867675781

>>>>>>  EPOCH 3 BATCH 21: validation score 0.12612612612612611

EPOCH 3 BATCH 26: training batch loss: 4.752540588378906
EPOCH 3 BATCH 31: training batch loss: 4.70635461807251

>>>>>>  EPOCH 3 BATCH 31: validation score 0.13713713713713713

EPOCH 3 BATCH 36: training batch loss: 4.755686283111572
EPOCH 3 BATCH 41: training batch loss: 4.659982204437256

>>>>>>  EPOCH 3 BATCH 41: validation score 0.15015015015015015

EPOCH 3 BATCH 46: training batch loss: 4.683686256408691
EPOCH 3 BATCH 51: training batch loss: 4.706743


 15%|█▌        | 3/20 [41:35<3:55:37, 831.61s/it][A

EPOCH 3: EPOCH_LOSS: 4.654855115754946
EPOCH 4 / 20: 
----------
EPOCH 4 BATCH 1: training batch loss: 4.519239902496338

>>>>>>  EPOCH 4 BATCH 1: validation score 0.1866866866866867

EPOCH 4 BATCH 6: training batch loss: 4.541206359863281
EPOCH 4 BATCH 11: training batch loss: 4.467905521392822

>>>>>>  EPOCH 4 BATCH 11: validation score 0.18918918918918917

EPOCH 4 BATCH 16: training batch loss: 4.505221843719482
EPOCH 4 BATCH 21: training batch loss: 4.3791728019714355

>>>>>>  EPOCH 4 BATCH 21: validation score 0.1991991991991992

EPOCH 4 BATCH 26: training batch loss: 4.235015392303467
EPOCH 4 BATCH 31: training batch loss: 4.451373100280762

>>>>>>  EPOCH 4 BATCH 31: validation score 0.2062062062062062

EPOCH 4 BATCH 36: training batch loss: 4.324039936065674
EPOCH 4 BATCH 41: training batch loss: 4.2888994216918945

>>>>>>  EPOCH 4 BATCH 41: validation score 0.21021021021021022

EPOCH 4 BATCH 46: training batch loss: 4.464749336242676
EPOCH 4 BATCH 51: training batch loss: 4.286


 20%|██        | 4/20 [55:26<3:41:45, 831.57s/it][A

EPOCH 4: EPOCH_LOSS: 4.288370823597646
EPOCH 5 / 20: 
----------
EPOCH 5 BATCH 1: training batch loss: 4.088942050933838

>>>>>>  EPOCH 5 BATCH 1: validation score 0.2707707707707708

EPOCH 5 BATCH 6: training batch loss: 4.13288688659668
EPOCH 5 BATCH 11: training batch loss: 3.9394123554229736

>>>>>>  EPOCH 5 BATCH 11: validation score 0.2782782782782783

EPOCH 5 BATCH 16: training batch loss: 4.10756254196167
EPOCH 5 BATCH 21: training batch loss: 3.9245264530181885

>>>>>>  EPOCH 5 BATCH 21: validation score 0.28678678678678676

EPOCH 5 BATCH 26: training batch loss: 4.104048252105713
EPOCH 5 BATCH 31: training batch loss: 3.8105838298797607

>>>>>>  EPOCH 5 BATCH 31: validation score 0.2982982982982983

EPOCH 5 BATCH 36: training batch loss: 3.9696292877197266
EPOCH 5 BATCH 41: training batch loss: 3.917628526687622

>>>>>>  EPOCH 5 BATCH 41: validation score 0.2957957957957958

EPOCH 5 BATCH 46: training batch loss: 3.764296054840088
EPOCH 5 BATCH 51: training batch loss: 4.1150

In [None]:

model_res.to(device)
optimizer = optim.Adam(model_res.parameters(), lr=0.000001)
criterion = nn.CrossEntropyLoss()
model_res = train_model(model=model_res, dataloaders=dataloaders, dataset_sizes=dataset_sizes, criterion=criterion, optimizer=optimizer, device=device, epochs=100, PATH='model/ten_crop/fine_tune')

