# Preparation

In [127]:
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as T
from torchvision import models
from torch.utils.data import DataLoader
import torch.optim as optim
from torchvision import datasets
from torch.utils.data.dataset import Subset
import matplotlib.pyplot as plt
import random
import os
import enum
import numpy as np
import pandas as pd
import collections

# Create dataset

In [2]:
class QuestionType(enum.Enum):
    WHICH_ONE = 'which_one'
    IS_IN = 'is_in'
class DataSet(enum.Enum):
    MNIST = 'mnist'
    FASHION = 'fashion'
    KMNIST = 'kmnist'
    CIFAR10 = 'cifar-10'

get_question_type = {QuestionType.WHICH_ONE: 'which_one',
                       QuestionType.IS_IN: 'is_in'}
get_dataset_name = {DataSet.MNIST: 'mnist',
                    DataSet.FASHION: 'fashion', 
                    DataSet.KMNIST: 'kmnist',
                    DataSet.CIFAR10: 'cifar-10'}

In [3]:
def prepare_mnist_data(batch_size):
    ordinary_train_dataset = datasets.MNIST(root='./data/mnist', train=True, transform=T.ToTensor(), download=True)
    test_dataset = datasets.MNIST(root='./data/mnist', train=False, transform=T.ToTensor())
    train_loader = torch.utils.data.DataLoader(dataset=ordinary_train_dataset, batch_size=batch_size, shuffle=False, num_workers=0)
    test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False, num_workers=0)
    full_train_loader = torch.utils.data.DataLoader(dataset=ordinary_train_dataset, batch_size=len(ordinary_train_dataset.data), shuffle=True, num_workers=0)
    num_classes = len(ordinary_train_dataset.classes)
    return full_train_loader, train_loader, test_loader

def prepare_kmnist_data(batch_size):
    ordinary_train_dataset = datasets.KMNIST(root='./data/KMNIST', train=True, transform=T.ToTensor(), download=True)
    test_dataset = datasets.KMNIST(root='./data/KMNIST', train=False, transform=T.ToTensor())
    train_loader = torch.utils.data.DataLoader(dataset=ordinary_train_dataset, batch_size=batch_size, shuffle=False, num_workers=0)
    test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False, num_workers=0)
    full_train_loader = torch.utils.data.DataLoader(dataset=ordinary_train_dataset, batch_size=len(ordinary_train_dataset.data), shuffle=True, num_workers=0)
    num_classes = len(ordinary_train_dataset.classes)
    return full_train_loader, train_loader, test_loader

def prepare_fashion_data(batch_size):
    ordinary_train_dataset = datasets.FashionMNIST(root='./data/FashionMnist', train=True, transform=T.ToTensor(), download=True)
    test_dataset = datasets.FashionMNIST(root='./data/FashionMnist', train=False, transform=T.ToTensor())
    train_loader = torch.utils.data.DataLoader(dataset=ordinary_train_dataset, batch_size=batch_size, shuffle=False, num_workers=0)
    test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False, num_workers=0)
    full_train_loader = torch.utils.data.DataLoader(dataset=ordinary_train_dataset, batch_size=len(ordinary_train_dataset.data), shuffle=True, num_workers=0)
    num_classes = len(ordinary_train_dataset.classes)
    return full_train_loader, train_loader, test_loader

#torchvision.datasets.CIFAR10.url="http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz"
def prepare_cifar10_data(batch_size: int) -> [DataLoader, DataLoader, DataLoader, DataLoader]:
    train_transform = T.Compose(
        [T.ToTensor(), # transforms.RandomHorizontalFlip(), transforms.RandomCrop(32,4),
         T.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261))])
    test_transform = T.Compose(
        [T.ToTensor(),
         T.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261))])
    train_dataset = datasets.CIFAR10(root='./data', train=True, transform=train_transform, download=True)
    test_dataset = datasets.CIFAR10(root='./data', train=False, transform=test_transform)
    train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=False, num_workers=0)
    test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False, num_workers=0)
    full_train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=len(train_dataset.data), shuffle=False, num_workers=0)
    return full_train_loader, train_loader, test_loader

full_train_loader_fashion, train_loader_fashion, test_loader_fashion = prepare_fashion_data(256)
datas_fashion, labels_fashion = next(iter(full_train_loader_fashion))
full_train_loader_kmnist, train_loader_kmnist, test_loader_kmnist = prepare_kmnist_data(256)
datas_kmnist, labels_kmnist = next(iter(full_train_loader_kmnist))
full_train_loader_mnist, train_loader_mnist, test_loader_mnist = prepare_mnist_data(256)
datas_mnist, labels_mnist = next(iter(full_train_loader_mnist))
full_train_loader_cifar, train_loader_cifar, test_loader_cifar = prepare_cifar10_data(256)
datas_cifar, labels_cifar = next(iter(full_train_loader_cifar))

Files already downloaded and verified


In [4]:
def get_random_dataloader(full_train_loader: DataLoader, batch_size: int) -> [DataLoader, DataLoader]:
    datas, labels = next(iter(full_train_loader))
    count_list, delete_index_list = [0 for i in range(10)], []
    index_list = list(range(len(datas)))
    random.shuffle(index_list)
    for index in index_list:
        target = labels[index].tolist()
        if count_list[target] >= 1000:
            delete_index_list.append(index)
        else:
            count_list[target] += 1
    datas = np.delete(datas, delete_index_list, 0)
    labels = np.delete(labels, delete_index_list, 0)
    
    random_train_matrix_dataset = torch.utils.data.TensorDataset(datas, labels.float())
    full_random_train_loader = torch.utils.data.DataLoader(dataset=random_train_matrix_dataset, batch_size=labels.shape[0], shuffle=False, num_workers=0)
    random_train_loader = torch.utils.data.DataLoader(dataset=random_train_matrix_dataset, batch_size=batch_size, shuffle=False, num_workers=0)
    return full_random_train_loader, random_train_loader
full_random_train_loader_cifar, random_train_loader_mnist = get_random_dataloader(full_train_loader_mnist, 64)

In [29]:
def generate_Q_A_label(single_class_assigned: int, 
                         question_type: int, 
                         all_class_size: int, 
                         question_class_size: int) -> list:
    
    all_class_set = range(all_class_size)
    question_class_set = random.sample(all_class_set, question_class_size)
    temporary_label_list = [0 for i in range(all_class_size)]
    
    if question_type == QuestionType.WHICH_ONE:
        if single_class_assigned in question_class_set:
            for i in [single_class_assigned]:
                temporary_label_list[i] = 1
        
        else:
            comp_question_class_set = [i for i in all_class_set if i not in question_class_set]
            for i in comp_question_class_set:
                temporary_label_list[i] = 1
        
    elif question_type == QuestionType.IS_IN:
        if single_class_assigned in question_class_set:
            for i in question_class_set:
                temporary_label_list[i] = 1
        
        else:
            comp_question_class_set = [i for i in all_class_set if i not in question_class_set]
            for i in comp_question_class_set:
                temporary_label_list[i] = 1
    
    return temporary_label_list

temporary_label_list = generate_Q_A_label(single_class_assigned=0, 
                                            question_type=QuestionType.WHICH_ONE, 
                                            all_class_size=10, 
                                            question_class_size=9)

temporary_label_list

[1, 0, 0, 0, 0, 0, 0, 0, 0, 0]

In [30]:
def generate_Q_A_labels(labels_tensor: torch.tensor, 
                          question_type: QuestionType,
                          all_class_size: int,
                          question_class_size: int) -> torch.tensor:
    
    labels_list = [int(i) for i in labels_tensor.tolist()]
    Q_A_labels_list = []
    
    for label in labels_list:
        Q_A_label = generate_Q_A_label(single_class_assigned=label, 
                                          question_type=question_type,  # change out of this function
                                          all_class_size=all_class_size, # change out of this function
                                          question_class_size=question_class_size) # change out of this function
        Q_A_labels_list.append(Q_A_label)
        
    Q_A_labels_tensor = torch.tensor(Q_A_labels_list)
    
    return Q_A_labels_tensor

In [31]:
def generate_dataloader_with_Q_A_label(full_random_train_loader: DataLoader, 
                                       question_type: QuestionType,
                                       question_size: int,
                                       batch_size: int) ->[DataLoader, int, int]:
    
    for i, (datas, labels) in enumerate(full_random_train_loader):
        all_class_size = torch.max(labels) + 1 # K is number of classes, full_train_loader is full batch
        all_class_size = int(all_class_size.tolist())
        
    Q_A_labels_tensor = generate_Q_A_labels(labels, question_type, all_class_size, question_size)
    Q_A_labels_matrix_dataset = torch.utils.data.TensorDataset(datas, Q_A_labels_tensor.float())
    
    Q_A_labels_matrix_train_loader = torch.utils.data.DataLoader(dataset=Q_A_labels_matrix_dataset, batch_size=batch_size, shuffle=True, num_workers=0)
    full_Q_A_labels_matrix_train_loader = torch.utils.data.DataLoader(dataset=Q_A_labels_matrix_dataset, batch_size=datas.shape[0], shuffle=False, num_workers=0)
    dimension = int(datas.reshape(-1).shape[0]/datas.shape[0])
    return full_Q_A_labels_matrix_train_loader, Q_A_labels_matrix_train_loader, dimension, all_class_size

In [32]:
from traitlets.traitlets import Integer
# Unclear!!!
DataSet_2_DataLoader_generator = {
    DataSet.MNIST: prepare_mnist_data,
    DataSet.FASHION: prepare_fashion_data,
    DataSet.KMNIST: prepare_kmnist_data,
    DataSet.CIFAR10: prepare_cifar10_data
}

def preparence(dataset: DataSet, batch_size: int):
    dataloader_generator = DataSet_2_DataLoader_generator[dataset]
    full_train_loader, train_loader, test_loader = dataloader_generator(batch_size)
    full_random_train_loader, random_train_loader = get_random_dataloader(full_train_loader, batch_size)
    
    return full_random_train_loader, random_train_loader, test_loader

full_random_train_loader, random_train_loader, test_loader = preparence(DataSet.CIFAR10, 256)

Files already downloaded and verified


In [34]:
def transform_scalar_to_array(labels: torch.tensor, all_class_size: int) -> torch.Tensor:
    outputs = []
    for label in labels:
        label = int(label.tolist())
        label_full_size = [0] * all_class_size
        label_full_size[label] = 1
        outputs.append(label_full_size)
    
    outputs = torch.tensor(outputs)
    
    return outputs

# Model excution

In [64]:
import torch.nn as nn
import torch
import math
import torch.nn.functional as F

def mae_loss(outputs, labels):
    sm_outputs = F.softmax(outputs, dim=1)
    loss_fn = nn.L1Loss(reduction='none')
    loss_matrix = loss_fn(sm_outputs, labels.float())
    sample_loss = loss_matrix.sum(dim=-1)
    return sample_loss
    
def mse_loss(outputs, labels):
    sm_outputs = F.softmax(outputs, dim=1)
    loss_fn = nn.MSELoss(reduction='none')
    loss_matrix = loss_fn(sm_outputs, labels.float())
    sample_loss = loss_matrix.sum(dim=-1)
    return sample_loss

def gce_loss(outputs, labels):
    q = 0.7
    sm_outputs = F.softmax(outputs, dim=1)
    pow_outputs = torch.pow(sm_outputs, q)
    sample_loss = (1-(pow_outputs*labels).sum(dim=1))/q # n
    return sample_loss

def phuber_ce_loss(outputs, labels):
    trunc_point = 0.1
    n = labels.shape[0]
    soft_max = nn.Softmax(dim=1)
    sm_outputs = soft_max(outputs)
    final_outputs = sm_outputs * labels
    final_confidence = final_outputs.sum(dim=1)
    ce_index = (final_confidence > trunc_point)
    sample_loss = torch.zeros(n).to(device)

    if ce_index.sum() > 0:
        ce_outputs = outputs[ce_index,:]
        logsm = nn.LogSoftmax(dim=-1)
        logsm_outputs = logsm(ce_outputs)
        final_ce_outputs = logsm_outputs * labels[ce_index,:]
        sample_loss[ce_index] = - final_ce_outputs.sum(dim=-1)

    linear_index = (final_confidence <= trunc_point)

    if linear_index.sum() > 0:
        sample_loss[linear_index] = -math.log(trunc_point) + (-1/trunc_point)*final_confidence[linear_index] + 1

    return sample_loss

def ce_loss(outputs, labels):
    logsm = nn.LogSoftmax(dim=1)
    logsm_outputs = logsm(outputs)
    final_outputs = logsm_outputs * labels
    sample_loss = - final_outputs.sum(dim=1)
    return sample_loss

def W_O_loss(loss_fn_, outputs, labels, device, question_class_size, all_class_size):
    n, k = labels.shape[0], labels.shape[1]
    temp_loss = torch.zeros(n, k).to(device)
    for i in range(k):
        tempY = torch.zeros(n, k).to(device)
        tempY[:, i] = 1.0
        temp_loss[:, i] = loss_fn_(outputs, tempY)
        
    candidate_loss = (temp_loss * labels).sum(dim=1)
    noncandidate_loss = (temp_loss * (1-labels)).sum(dim=1)
    total_loss = candidate_loss - ((all_class_size - question_class_size) * (all_class_size - question_class_size - 1))/(question_class_size * (2*all_class_size - question_class_size - 1.0)) * noncandidate_loss
    average_loss = total_loss.mean()
    return average_loss

def I_I_loss(loss_fn_, outputs, labels, device, question_class_size, all_class_size):
    n, k = labels.shape[0], labels.shape[1]
    temp_loss = torch.zeros(n, k).to(device)
    for i in range(k):
        tempY = torch.zeros(n, k).to(device)
        tempY[:, i] = 1.0
        temp_loss[:, i] = loss_fn_(outputs, tempY)
        
    candidate_loss = (temp_loss * labels).sum(dim=1)
    noncandidate_loss = (temp_loss * (1-labels)).sum(dim=1)
    total_loss = candidate_loss - (2*question_class_size**2 + all_class_size**2 - all_class_size*(2*question_class_size + 1))/(2*question_class_size * (all_class_size - question_class_size)) * noncandidate_loss
    average_loss = total_loss.mean()
    return average_loss


In [65]:
class mlp_model(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(mlp_model, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        out = x.view(-1, self.num_flat_features(x))
        out = self.fc1(out)
        out = self.relu1(out)
        out = self.fc2(out)
        return out

    def num_flat_features(self, x):
        size = x.size()[1:]
        num_features = 1
        for s in size:
            num_features *= s
        return num_features

In [66]:
def accuracy_check(loader, model, device):
    with torch.no_grad():
        total, num_samples = 0, 0
        for images, labels in loader:
            labels, images = labels.to(device), images.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += (predicted == labels).sum().item()
            num_samples += labels.size(0) 
    return total / num_samples

In [67]:
def loss_check(loader, model, criterion, device, all_class_size):
    with torch.no_grad():
        total, num_samples = 0, 0
        for images, labels in loader:
            labels, images = labels.to(device), images.to(device)
            outputs = model(images)
            labels = transform_scalar_to_array(labels, all_class_size)
            labels = labels.to(device)
            loss = criterion(outputs,labels)
            total += loss.sum().item()
            num_samples += labels.size(0) 
    return total / num_samples

In [68]:
def show_loss(epoch,max_epoch, loss):
    print('TRAIN EPOCH[{:03}/{:03}] LOSS:{:03f}%'.format(epoch, max_epoch, loss))    
def show_eval_loss(epoch,max_epoch, loss, is_val):
    if is_val:
        print('EVAL TEST EPOCH[{:03}/{:03}] LOSS:{:03f}%'.format(epoch, max_epoch, loss))
    else:
        print('EVAL TRAIN EPOCH[{:03}/{:03}] LOSS:{:03f}%'.format(epoch, max_epoch, loss))
def show_acc(epoch,max_epoch, acc, is_val):
    if is_val:
        print('TEST EPOCH[{:03}/{:03}] ACC:{:03f}%'.format(epoch, max_epoch, acc*100))
    else:
        print('TRAIN EPOCH[{:03}/{:03}] ACC:{:03f}%'.format(epoch, max_epoch, acc*100))

In [155]:
def Train(model, 
          question_type, 
          question_class_size, 
          all_class_size, 
          criterion, 
          optimizer, 
          data_loader, 
          device, 
          epoch, 
          max_epoch):
        
    total_loss_train = 0.0
    model.train()

    for n, (data, label) in enumerate(data_loader):
        optimizer.zero_grad()
        data = data.to(device)
        label = label.to(device)
        output = model(data)
        if question_type == QuestionType.WHICH_ONE:
            loss = W_O_loss(criterion, output, label.float(), device, question_class_size, all_class_size)
        elif question_type == QuestionType.IS_IN:
            loss = I_I_loss(criterion, output, label.float(), device, question_class_size, all_class_size)
        else:
            loss = criterion(output,label)

        loss.backward()
        optimizer.step()
        total_loss_train += loss.item()

    show_loss(epoch+1, max_epoch, total_loss_train/(n+1))
    print()

    return total_loss_train/(n+1), model

In [156]:
def Eval(index,
        model, 
        evaluation_data_loader_train, 
        evaluation_data_loader_test,
        all_class_size,
        criterion,
        device, 
        epoch, 
        max_epoch):
    
    model.eval()
    if index == 'acc':
        total_acc_train = accuracy_check(evaluation_data_loader_train, model, device)
        total_acc_test = accuracy_check(evaluation_data_loader_test, model, device)

        show_acc(epoch+1, max_epoch, total_acc_train, is_val=False)
        print()
        show_acc(epoch+1, max_epoch, total_acc_test, is_val=True)
        print()

        return total_acc_train , total_acc_test, model
    elif index == 'loss':
        total_loss_train = loss_check(evaluation_data_loader_train, model, criterion, device, all_class_size)
        total_loss_test = loss_check(evaluation_data_loader_test, model, criterion, device, all_class_size)
        show_eval_loss(epoch+1, max_epoch, total_loss_train, is_val=False)
        print()
        show_eval_loss(epoch+1, max_epoch, total_loss_test, is_val=True)
        print()

        return total_loss_train , total_loss_test, model
        

In [157]:
get_model = {mlp_model: 'MLP'}
get_loss_function = {ce_loss: 'CE_LOSS', 
                     mae_loss: 'MAE_LOSS'}

In [158]:
from tqdm.notebook import tqdm
import collections

In [159]:
def Q_A_label_Train_Eval(index,
                          dataset, 
                          question_type, 
                          question_class_size, 
                          model_name, 
                          batch_size, 
                          loss_fn, 
                          EPOCHS):
    print(f'START: {question_type}, {question_class_size}')
    
    full_Q_A_labels_matrix_train_loader, Q_A_labels_matrix_train_loader, dimension, all_class_size = generate_dataloader_with_Q_A_label(full_random_train_loader,
                                                                                                                                       question_type,
                                                                                                                                       question_class_size,
                                                                                                                                       batch_size)
    full_Q_A_datas, full_Q_A_labels = next(iter(full_Q_A_labels_matrix_train_loader))
    counter = collections.Counter(torch.sum(full_Q_A_labels, 1).int().tolist())
    print(counter)
    
    
    #DEVICE = torch.device('cuda:0') if torch.cuda.is_available() else torch.device('cpu')
    DEVICE = torch.device('mps')
    index=index
    #model = model_name(input_dim=dimension, hidden_dim=500, output_dim=all_class_size).to(DEVICE)
    model = model_name
    optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=0.0001)

    train_index_list = []
    eval_train_index_list = []
    eval_test_index_list = []
    #results_df = pd.DataFrame(columns=["index", "dataset", "epoch", "question_type", "question_class_size", "model", "loss_function", "train_index", "train_index", "test_index"])
    results_df = []
    for epoch in tqdm(range(EPOCHS)):
        train_index, model = Train(model=model, 
                                  question_type=question_type,
                                  question_class_size=question_class_size,
                                  all_class_size=all_class_size,
                                  criterion=loss_fn,
                                  optimizer=optimizer, 
                                  data_loader=Q_A_labels_matrix_train_loader,
                                  device=DEVICE, 
                                  epoch=epoch, 
                                  max_epoch=EPOCHS)
        
        eval_train_index, eval_test_index, model = Eval(index=index,
                                                        model=model, 
                                                        evaluation_data_loader_train=random_train_loader, 
                                                        evaluation_data_loader_test=test_loader,
                                                        all_class_size=all_class_size,
                                                        criterion=loss_fn,
                                                        device=DEVICE, 
                                                        epoch=epoch, 
                                                        max_epoch=EPOCHS)

        train_index_list.append(train_index)
        eval_train_index_list.append(eval_train_index)
        eval_test_index_list.append(eval_test_index)

        result_df = pd.DataFrame({'index': [index],
                                      'dataset': [get_dataset_name[dataset]],
                                      'epoch': [epoch+1],
                                      'question_type': [get_question_type[question_type]], 
                                      'question_class_size': [question_class_size], 
                                      'model': [get_model[model_name]],
                                      'loss_function': [get_loss_function[loss_fn]],
                                      'train_index': [train_index],
                                      'eval_train_index': [eval_train_index],
                                      'eval_test_index': [eval_test_index]})
        results_df.append(result_df)
    
    results_df = pd.concat(results_df, axis=0)
    
    if question_type == QuestionType.WHICH_ONE:
        if question_class_size != 9: 
            results_df['label_size_cand'] = counter[1]
            results_df['label_size_comp'] = counter[all_class_size - question_class_size]
        else:
            results_df['label_size_cand'] = counter[1]
            results_df['label_size_comp'] = np.nan
    elif question_type == QuestionType.IS_IN:
        if question_class_size != 5: 
            results_df['label_size_cand'] = counter[question_class_size]
            results_df['label_size_comp'] = counter[all_class_size - question_class_size]
        else:
            results_df['label_size_cand'] = counter[question_class_size]
            results_df['label_size_comp'] = np.nan
    return results_df

In [160]:
model_name = ResNet18()

In [161]:
full_random_train_loader, random_train_loader, test_loader = preparence(DataSet.CIFAR10, 512)

Files already downloaded and verified


In [162]:
result_df = Q_A_label_Train_Eval(index='loss',
                                dataset=DataSet.CIFAR10, 
                                question_type=QuestionType.WHICH_ONE, 
                                question_class_size=3, 
                                model_name=model_name, 
                                batch_size=500, 
                                loss_fn=mae_loss, 
                                EPOCHS=800)

START: QuestionType.WHICH_ONE, 3
Counter({7: 7009, 1: 2991})


  0%|          | 0/800 [00:00<?, ?it/s]

RuntimeError: Input type (MPSFloatType) and weight type (torch.FloatTensor) should be the same

In [125]:
index = 'loss'
for dataset_name in tqdm([DataSet.MNIST, DataSet.KMNIST, DataSet.FASHION]):
    results = []
    full_random_train_loader, random_train_loader, test_loader = preparence(dataset_name, 512)
    model_name = mlp_model
    for itr in tqdm(range(1,6)):
        for question_type in tqdm([QuestionType.WHICH_ONE, QuestionType.IS_IN]):
            for question_size in tqdm(range(1, 10)):
                    result_df = Q_A_label_Train_Eval(index=index,
                                                     dataset=dataset_name, 
                                                     question_type=question_type, 
                                                     question_class_size=question_size, 
                                                     model_name=model_name, 
                                                     batch_size=500, 
                                                     loss_fn=mae_loss, 
                                                     EPOCHS=800)
                    results.append(result_df)
    output = pd.concat(results, axis=0)
    output.to_csv(f'all_result_{get_dataset_name[dataset_name]}.csv', index=None)

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/9 [00:00<?, ?it/s]

START: QuestionType.WHICH_ONE, 1
Counter({9: 8984, 1: 1016})


  0%|          | 0/800 [00:00<?, ?it/s]

TRAIN EPOCH[001/800] LOSS:1.438538%

EVAL TRAIN EPOCH[001/800] LOSS:1.139098%

EVAL TEST EPOCH[001/800] LOSS:1.119246%

TRAIN EPOCH[002/800] LOSS:0.667375%

EVAL TRAIN EPOCH[002/800] LOSS:0.575509%

EVAL TEST EPOCH[002/800] LOSS:0.541956%

TRAIN EPOCH[003/800] LOSS:0.324756%

EVAL TRAIN EPOCH[003/800] LOSS:0.410514%

EVAL TEST EPOCH[003/800] LOSS:0.382617%

TRAIN EPOCH[004/800] LOSS:0.200409%

EVAL TRAIN EPOCH[004/800] LOSS:0.347897%

EVAL TEST EPOCH[004/800] LOSS:0.326304%

TRAIN EPOCH[005/800] LOSS:0.138972%

EVAL TRAIN EPOCH[005/800] LOSS:0.311073%

EVAL TEST EPOCH[005/800] LOSS:0.293919%

TRAIN EPOCH[006/800] LOSS:0.097857%

EVAL TRAIN EPOCH[006/800] LOSS:0.288278%

EVAL TEST EPOCH[006/800] LOSS:0.270293%

TRAIN EPOCH[007/800] LOSS:0.067674%

EVAL TRAIN EPOCH[007/800] LOSS:0.277312%

EVAL TEST EPOCH[007/800] LOSS:0.262006%

TRAIN EPOCH[008/800] LOSS:0.042067%

EVAL TRAIN EPOCH[008/800] LOSS:0.259741%

EVAL TEST EPOCH[008/800] LOSS:0.249509%

TRAIN EPOCH[009/800] LOSS:0.022887%



KeyboardInterrupt: 