In [1]:
import os 
import time 
import json 
import torch 
import random 
import warnings
import torchvision
import numpy as np 
import pandas as pd 

from utils import *
from data import HumanDataset
from tqdm import tqdm 
from config import config
from datetime import datetime
from models.model import *
from torch import nn,optim
from collections import OrderedDict
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torch.optim import lr_scheduler
from sklearn.model_selection import train_test_split
from timeit import default_timer as timer
from sklearn.metrics import f1_score
from torchvision import transforms as T
from imgaug import augmenters as iaa
import math
# 1. set random seed
random.seed(2050)
np.random.seed(2050)
torch.manual_seed(2050)
torch.cuda.manual_seed_all(2050)
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
torch.backends.cudnn.benchmark = True
warnings.filterwarnings('ignore')

if not os.path.exists("./logs/"):
    os.mkdir("./logs/")

log = Logger()
log.open("logs/%s_log_train.txt"%config.model_name,mode="a")
log.write("\n----------------------------------------------- [START %s] %s\n\n" % (datetime.now().strftime('%Y-%m-%d %H:%M:%S'), '-' * 51))
log.write('                           |------------ Train -------------|----------- Valid -------------|----------Best Results---------|------------|\n')
log.write('mode     iter     epoch    |         loss   f1_macro        |         loss   f1_macro       |         loss   f1_macro       | time       |\n')
log.write('-------------------------------------------------------------------------------------------------------------------------------\n')

def train(train_loader,model,criterion,optimizer,epoch,valid_loss,best_results,start):
    losses = AverageMeter()
    f1 = AverageMeter()
    model.train()
    y, preds = None, None
    for i,(images,target) in enumerate(train_loader):
        images = images.cuda(non_blocking=True)
        target = torch.from_numpy(np.array(target)).float().cuda(non_blocking=True)
        # compute output
        output = model(images)
        loss = criterion(output,target)
        losses.update(loss.item(),images.size(0))
        
        np_target = target.cpu().data.numpy()
        np_output = output.sigmoid().cpu().data.numpy()
        if y is None:
            y = np_target
            preds = np_output
        else:
            y = np.concatenate((y, np_target))
            preds = np.concatenate((preds, np_output))
                
        f1_batch = f1_score(target.cpu(),output.sigmoid().cpu() > config.f1_thr,average='macro')
        f1.update(f1_batch,images.size(0))
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        print('\r',end='',flush=True)
        message = '%s %5.2f %6.2f         |         %0.3f  %0.3f           |         %0.3f  %0.4f         |         %s  %s    | %s' % (\
                "train", i/len(train_loader) + epoch, epoch,
                losses.avg, f1.avg, 
                valid_loss[0], valid_loss[1], 
                str(best_results[0])[:8],str(best_results[1])[:8],
                time_to_str((timer() - start),'min'))
        print(message , end='',flush=True)
    log.write("\n")
    all_f1 = f1_score(y,preds > config.f1_thr,average='macro')
    return [losses.avg,all_f1]

# 2. evaluate fuunction
def evaluate(val_loader,model,criterion,epoch,train_loss,best_results,start):
    # only meter loss and f1 score
    losses = AverageMeter()
    f1 = AverageMeter()
    # switch mode for evaluation
    model.cuda()
    model.eval()
    y, preds = None, None
    with torch.no_grad():
        for i, (images,target) in enumerate(val_loader):
            images_var = images.cuda(non_blocking=True)
            target = torch.from_numpy(np.array(target)).float().cuda(non_blocking=True)
            output = model(images_var)
            loss = criterion(output,target)
            losses.update(loss.item(),images_var.size(0))
            np_target = target.cpu().data.numpy()
            np_output = output.sigmoid().cpu().data.numpy()
            if y is None:
                y = np_target
                preds = np_output
            else:
                y = np.concatenate((y, np_target))
                preds = np.concatenate((preds, np_output))
            
            f1_batch = f1_score(target.cpu(),output.sigmoid().cpu().data.numpy() > config.f1_thr,average='macro')
            f1.update(f1_batch,images_var.size(0))
            print('\r',end='',flush=True)
            message = '%s   %5.2f %6.2f         |         %0.3f  %0.3f           |         %0.3f  %0.4f         |         %s  %s    | %s' % (\
                    "val", i/len(val_loader) + epoch, epoch,                    
                    train_loss[0], train_loss[1], 
                    losses.avg, f1.avg,
                    str(best_results[0])[:8],str(best_results[1])[:8],
                    time_to_str((timer() - start),'min'))

            print(message, end='',flush=True)
        log.write("\n")
    all_f1 = f1_score(y,preds > config.f1_thr,average='macro')
    return [losses.avg,all_f1]

# 3. test model on public dataset and save the probability matrix
def test(test_files,model,folds, res_thr, data_list, flag):
    sample_submission_df = data_list
    if flag == "val":
        sample_submission_df.to_csv('./submit/%s/bestloss_submission_%s_label.csv'%(folds, flag), index=None)
    
    test_gen = HumanDataset(test_files,config.test_data,augument=False,mode="test")
    test_loader = DataLoader(test_gen,1,shuffle=False,pin_memory=True,num_workers=2)
    #3.1 confirm the model converted to cuda
    filenames,labels ,submissions= [],[],[]
    model.cuda()
    model.eval()
    submit_results = []
    for i,(input,filepath) in enumerate(tqdm(test_loader)):
        #3.2 change everything to cuda and get only basename
        filepath = [os.path.basename(x) for x in filepath]
        input = input.squeeze(0)
        with torch.no_grad():
            image_var = input.cuda(non_blocking=True)
            y_pred = model(image_var)
            label = y_pred.sigmoid().cpu().data.numpy()
            label = np.mean(label, axis = 0)
            label = np.expand_dims(label, axis = 0)
            
            if flag == "submit":
                max_label_index, max_label_predict = 0, 0
                for i in range(28):
                    if label[0, i] > max_label_predict:
                        max_label_predict = label[0, i]
                        max_label_index = i
                    if label[0, i] > res_thr[i]:
                        label[0, i] = 1
                    else:
                        label[0, i] = 0    
                label[0, max_label_index] = 1
                labels.append(label > config.f1_thr)
            else:
                labels.append(label)
            filenames.append(filepath)
    for row in np.concatenate(labels):
        if flag == "submit":
            subrow = ' '.join(list([str(i) for i in np.nonzero(row)[0]]))
        else:
            subrow = ' '.join(list([str(i) for i in row]))
        submissions.append(subrow)
    sample_submission_df['Predicted'] = submissions
    sample_submission_df.to_csv('./submit/%s/bestloss_submission_%s_pred.csv'%(folds, flag), index=None)

def getClassWeight(all_files, mu = 0.5):
    """
    Return class weight in all_files
    """
    res = Variable(torch.zeros(config.num_classes))
    targets = all_files['Target']
    all_n = len(targets)
    for target in targets:
        class_set = target.split(' ')
        for item in class_set:
            res[int(item)] += 1
    total = res.sum()
    for i in range(config.num_classes):
        res[i] = math.log(mu * total / float(res[i]))
    #for i in range(28):
        #res[1, i] = 0.1 * all_n / (0.1 * all_n + (all_n - res[0, i]))
        #res[0, i] = 0.1 * all_n / (0.1 * all_n + res[0, i])
        #res[1, i] = (all_n - res[0, i]) / res[0, i]
        #res[0, i] = 1
    print("class weight = {0}".format(res))
    return res

def getMultiCsv(files_info):
    """
    Read csv info from official and external
    """
    res = []
    for file_info in files_info:
        files = pd.read_csv(file_info["file_name"])
        for i in range(len(files)):
            files["Id"][i] = file_info["prefix"] + files["Id"][i] + file_info["suffix"]
        res.append(files)
    res = pd.concat(res, ignore_index=True)
    return res

def oversample(file_list):
    """
    Oversample train sample with rare class(8, 9, 10, 15, 16, 17, 20, 24, 26, 27) * 2
    """
    rare_class_index = []
    for i in file_list.index:
        if file_list.iloc[i].Target in ["8","9","10","15","16","17","20","24","26","27"]:
            rare_class_index.append(i)
    res = pd.concat([file_list, file_list.iloc[rare_class_index]], ignore_index=True)
    return res

train_file_info = [{"file_name": "../input/train.csv", "prefix": "train/", "suffix": ".png0"},\
                    {"file_name": "../input/HPAv18/HPAv18RBGY_wodpl.csv", "prefix": "HPAv18/png_gray/", "suffix": ".png0"}]
all_files = getMultiCsv(train_file_info)

test_files = getMultiCsv([{"file_name": "../input/test.csv", "prefix": "test/", "suffix": ".png0"}])

# criterion
# BCEWithLogitsLoss + class weight
class_w = getClassWeight(all_files)
criterion = nn.BCEWithLogitsLoss(weight = class_w).cuda()
# Focal Loss
#class_w_one = Variable(torch.ones(config.num_classes))
#criterion = MyFocalLoss(weight = class_w_one, gamma = 2).cuda()
        
for fold_k in range(0, config.kfoldN):
    fold = "model_1_8_%s"%str(fold_k)
    # 4.1 mkdirs
    if not os.path.exists(config.submit):
        os.makedirs(config.submit)
    if not os.path.exists(config.weights + config.model_name + os.sep +str(fold)):
        os.makedirs(config.weights + config.model_name + os.sep +str(fold))
    if not os.path.exists(config.best_models):
        os.mkdir(config.best_models)
    if not os.path.exists(config.submit + os.sep + fold):
        os.mkdir(config.submit + os.sep + fold)
    if not os.path.exists("./logs/"):
        os.mkdir("./logs/")
    
    # 4.2 get model
    model = get_net_channel3()
    model.cuda()

    optimizer = optim.SGD(model.parameters(),lr = config.lr,momentum=0.9,weight_decay=1e-4)

    start_epoch = 0
    best_loss = 999
    best_f1 = 0
    best_results = [np.inf,0]
    val_metrics = [np.inf,0]
    resume = False

    train_data_list = pd.read_csv('./Train_Val_MultiLabel_KFold/Train_KFold_%s_%s.csv'%(config.kfoldN, str(fold_k)))
    train_data_list = oversample(train_data_list)
    val_data_list = pd.read_csv('./Train_Val_MultiLabel_KFold/Val_KFold_%s_%s.csv'%(config.kfoldN, str(fold_k)))
    test_data_list = pd.read_csv("../input/sample_submission.csv")
    print("finish prepare data")

    # load dataset
    train_gen = HumanDataset(train_data_list,config.train_data,mode="train")
    train_loader = DataLoader(train_gen,batch_size=config.batch_size,shuffle=True,pin_memory=True,num_workers=2)

    val_gen = HumanDataset(val_data_list,config.train_data,augument=False,mode="train")
    val_loader = DataLoader(val_gen,batch_size=config.batch_size,shuffle=False,pin_memory=True,num_workers=2)

    test_gen = HumanDataset(test_files,config.test_data,augument=False,mode="test")
    test_loader = DataLoader(test_gen,1,shuffle=False,pin_memory=True,num_workers=2)

    scheduler = lr_scheduler.StepLR(optimizer,step_size=2,gamma=0.1)
    start = timer()
    
    #train
    for epoch in range(start_epoch, config.epochs):
        if epoch in [2]:
            continue
        scheduler.step(epoch)
        # train
        lr = get_learning_rate(optimizer)
        train_metrics = train(train_loader,model,criterion,optimizer,epoch,val_metrics,best_results,start)
        # val
        val_metrics = evaluate(val_loader,model,criterion,epoch,train_metrics,best_results,start)
        # check results 
        is_best_loss = val_metrics[0] < best_results[0]
        best_results[0] = min(val_metrics[0],best_results[0])
        is_best_f1 = val_metrics[1] > best_results[1]
        best_results[1] = max(val_metrics[1],best_results[1])   
        # save model
        save_checkpoint({
                "epoch":epoch + 1,
                "model_name":config.model_name,
                "state_dict":model.state_dict(),
                "best_loss":best_results[0],
                "optimizer":optimizer.state_dict(),
                "fold":fold,
                "best_f1":best_results[1],
        },is_best_loss,is_best_f1,fold, epoch)
        # print logs
        print('\r',end='',flush=True)
        log.write('%s  %5.1f %6.1f         |         %0.3f  %0.3f           |         %0.3f  %0.4f         |         %s  %s    | %s' % (\
                "best", epoch, epoch,                    
                train_metrics[0], train_metrics[1], 
                val_metrics[0], val_metrics[1],
                str(best_results[0])[:8],str(best_results[1])[:8],
                time_to_str((timer() - start),'min'))
            )
        log.write("\n")
        time.sleep(0.01)

    best_model = torch.load("%s/%s_fold_%s_model_best_loss.pth.tar"%(config.best_models,config.model_name,str(fold)))
    model.load_state_dict(best_model["state_dict"])
    
    # get classify resualt in test data and save to file
    test(test_files,model,fold, [0.2] * 28, test_data_list, "submit")
    # get prediction value in test data and save to file
    test(test_files,model,fold, [0.2] * 28, test_data_list, "test")
    # get label and prediction value in val data and save to file
    test(val_data_list,model,fold, [0.2] * 28, val_data_list, "val")


----------------------------------------------- [START 2019-01-14 16:16:26] ---------------------------------------------------

                           |------------ Train -------------|----------- Valid -------------|----------Best Results---------|------------|
mode     iter     epoch    |         loss   f1_macro        |         loss   f1_macro       |         loss   f1_macro       | time       |
-------------------------------------------------------------------------------------------------------------------------------
class weight = tensor([0.6997, 3.2899, 2.0262, 3.2096, 2.7772, 2.6309, 3.0972, 2.1710, 5.9401,
        6.0368, 6.1160, 3.6265, 3.6089, 4.0352, 3.4220, 7.1769, 4.1576, 5.2197,
        3.7741, 3.1115, 5.2378, 1.7870, 3.4083, 2.0758, 5.2609, 0.7915, 4.7604,
        6.4758])
finish prepare data
train  0.05   0.00         |         0.356  0.159           |         inf  0.0000         |         inf  0    |  0 hr 08 min

KeyboardInterrupt: 