# Preparation

In [1]:
!pip install torch
!pip install torchvision



In [7]:
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as T
from torch.utils.data import DataLoader
import torch.optim as optim
from torchvision import datasets
from torch.utils.data.dataset import Subset
import matplotlib.pyplot as plt
import random
import os
import enum
import numpy as np

# Create dataset

In [8]:
class QuestionType(enum.Enum):
    WHICH_ONE = 'which_one'
    IS_IN = 'is_in'
class DataSet(enum.Enum):
    MNIST = 'mnist'
    CIFAR10 = 'cifar10'
    CIFAR100 = 'cifar100'


In [9]:
def generate_cifar10_dataloader(batch_size: int) -> [DataLoader, DataLoader, DataLoader, DataLoader]:
    train_transform = T.Compose(
        [T.ToTensor(), # transforms.RandomHorizontalFlip(), transforms.RandomCrop(32,4),
         T.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261))])
    test_transform = T.Compose(
        [T.ToTensor(),
         T.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261))])
    train_dataset = datasets.CIFAR10(root='./data', train=True, transform=train_transform, download=True)
    test_dataset = datasets.CIFAR10(root='./data', train=False, transform=test_transform)
    train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=False, num_workers=0)
    test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False, num_workers=0)
    full_train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=len(train_dataset.data), shuffle=False, num_workers=0)
    return full_train_loader, train_loader, test_loader
full_train_loader, train_loader, test_loader = generate_cifar10_dataloader(256)
datas, labels = next(iter(full_train_loader))

Files already downloaded and verified


In [10]:
def generate_Q_A_label(single_class_assigned: int, 
                         question_type: int, 
                         all_class_size: int, 
                         question_class_size: int) -> list:
    
    all_class_set = range(all_class_size)
    question_class_set = random.sample(all_class_set, question_class_size)
    temporary_label_list = [0 for i in range(all_class_size)]
    
    if question_type == QuestionType.WHICH_ONE:
        if single_class_assigned in question_class_set:
            for i in [single_class_assigned]:
                temporary_label_list[i] = 1
        
        else:
            comp_question_class_set = [i for i in all_class_set if i not in question_class_set]
            for i in comp_question_class_set:
                temporary_label_list[i] = 1
        
    elif question_type == QuestionType.IS_IN:
        if single_class_assigned in question_class_set:
            for i in question_class_set:
                temporary_label_list[i] = 1
        
        else:
            comp_question_class_set = [i for i in all_class_set if i not in question_class_set]
            for i in comp_question_class_set:
                temporary_label_list[i] = 1
    
    return temporary_label_list

temporary_label_list = generate_Q_A_label(single_class_assigned=6, 
                                            question_type=QuestionType.WHICH_ONE, 
                                            all_class_size=10, 
                                            question_class_size=9)

temporary_label_list

[0, 0, 0, 0, 0, 0, 1, 0, 0, 0]

In [13]:
def generate_Q_A_labels(labels_tensor: torch.tensor, 
                          question_type: QuestionType,
                          all_class_size: int,
                          question_class_size: int) -> torch.tensor:
    
    labels_list = labels_tensor.tolist()
    Q_A_labels_list = []
    
    for label in labels_list:
        Q_A_label = generate_Q_A_label(single_class_assigned=label, 
                                          question_type=question_type,  # change out of this function
                                          all_class_size=all_class_size, # change out of this function
                                          question_class_size=question_class_size) # change out of this function
        Q_A_labels_list.append(Q_A_label)
        
    Q_A_labels_tensor = torch.tensor(Q_A_labels_list)
    
    return Q_A_labels_tensor

In [28]:
def generate_cifar10_dataloader_with_Q_A_label(full_train_loader: DataLoader, 
                                                  question_type: QuestionType,
                                                  question_size: int,
                                                  batch_size: int) ->[DataLoader, int, int]:
    
    for i, (datas, labels) in enumerate(full_train_loader):
        all_class_size = torch.max(labels) + 1 # K is number of classes, full_train_loader is full batch
        
    Q_A_labels_tensor = generate_Q_A_labels(labels, question_type, all_class_size, question_size)
    Q_A_labels_matrix_dataset = torch.utils.data.TensorDataset(datas, Q_A_labels_tensor.float())
    Q_A_labels_matrix_train_loader = torch.utils.data.DataLoader(dataset=Q_A_labels_matrix_dataset, batch_size=batch_size, shuffle=True, num_workers=0)
    dimension = int(datas.reshape(-1).shape[0]/datas.shape[0])
    return Q_A_labels_matrix_train_loader, dimension, all_class_size.tolist()

Q_A_labels_matrix_train_loader, dimension, all_class_size = generate_cifar10_dataloader_with_Q_A_label(full_train_loader, 
                                                                                                           QuestionType.WHICH_ONE,
                                                                                                           question_size=9,
                                                                                                           batch_size=64)
Q_A_datas, Q_A_labels = next(iter(Q_A_labels_matrix_train_loader))
print(Q_A_datas)
print(Q_A_labels)
print(dimension)
print(all_class_size)

tensor([[[[ 1.1700,  1.0112,  1.0430,  ...,  1.0589,  1.0747,  1.0747],
          [ 0.9160,  0.7731,  0.8048,  ...,  0.7096,  0.7255,  0.7890],
          [ 1.0112,  0.8525,  0.9160,  ...,  0.7890,  0.8207,  0.8683],
          ...,
          [ 1.3764,  1.2176,  1.3129,  ...,  1.2970,  1.3447,  1.3129],
          [ 1.3447,  1.1700,  1.1383,  ...,  1.1224,  1.3129,  1.2176],
          [ 1.4240,  1.2970,  1.1700,  ...,  1.2018,  1.3129,  1.2970]],

         [[ 1.3724,  1.2755,  1.2917,  ...,  1.2755,  1.2594,  1.2917],
          [ 1.3562,  1.2433,  1.2433,  ...,  1.2917,  1.2755,  1.2755],
          [ 1.3724,  1.2594,  1.2755,  ...,  1.3240,  1.3240,  1.3240],
          ...,
          [ 1.2271,  1.1626,  1.1949,  ...,  1.2433,  1.2271,  1.2917],
          [ 1.2110,  1.1142,  1.0335,  ...,  1.0657,  1.1787,  1.1949],
          [ 1.1949,  1.1464,  0.9851,  ...,  1.0657,  1.1142,  1.1787]],

         [[ 1.5197,  1.4145,  1.3544,  ...,  1.3694,  1.3845,  1.4896],
          [ 1.6399,  1.5648,  

In [24]:
from traitlets.traitlets import Integer
# Unclear!!!
DataSet_2_DataLoader_generator = {
    #DataSet.MNIST: generate_mnist_dataloader,
    DataSet.CIFAR10: generate_cifar10_dataloader,
    #DataSet.CIFAR100: generate_cifar100_dataloader,
}
DataSet_2_DataLoader_generator_with_Q_A_label = {
    #DataSet.MNIST: generate_mnist_dataloader,
    DataSet.CIFAR10: generate_cifar10_dataloader_with_Q_A_label,
    #DataSet.CIFAR100: generate_cifar100_dataloader,
}

def DataSet_2_DataLoader_with_Q_A_label(dataset: DataSet,
                                             question_type: QuestionType,
                                             question_size: int,
                                             batch_size: int
                                             ) -> [DataLoader, DataLoader, DataLoader, int, int]:
    
    dataloader_generator = DataSet_2_DataLoader_generator[dataset]
    full_train_loader, train_loader, test_loader = dataloader_generator(batch_size)
    dataloader_generator_with_Q_A_label = DataSet_2_DataLoader_generator_with_Q_A_label[dataset]
    Q_A_labels_matrix_train_loader, dimension, all_class_size = dataloader_generator_with_Q_A_label(full_train_loader, question_type, question_size, batch_size)
    
    return Q_A_labels_matrix_train_loader, train_loader, test_loader, dimension, all_class_size

DataSet_2_DataLoader_with_Q_A_label(DataSet.CIFAR10, QuestionType.WHICH_ONE, 9, 256)

Files already downloaded and verified




(<torch.utils.data.dataloader.DataLoader at 0x229028c0340>,
 <torch.utils.data.dataloader.DataLoader at 0x2294f0ceee0>,
 <torch.utils.data.dataloader.DataLoader at 0x2294f0cee50>,
 3072,
 10)

# Model excution

In [17]:
import torch.nn as nn
import torch
import math
import torch.nn.functional as F

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

def mae_loss(outputs, labels):
    sm_outputs = F.softmax(outputs, dim=1)
    loss_fn = nn.L1Loss(reduction='none')
    loss_matrix = loss_fn(sm_outputs, labels.float())
    sample_loss = loss_matrix.sum(dim=-1)
    return sample_loss
    
def mse_loss(outputs, labels):
    sm_outputs = F.softmax(outputs, dim=1)
    loss_fn = nn.MSELoss(reduction='none')
    loss_matrix = loss_fn(sm_outputs, labels.float())
    sample_loss = loss_matrix.sum(dim=-1)
    return sample_loss

def gce_loss(outputs, labels):
    q = 0.7
    sm_outputs = F.softmax(outputs, dim=1)
    pow_outputs = torch.pow(sm_outputs, q)
    sample_loss = (1-(pow_outputs*labels).sum(dim=1))/q # n
    return sample_loss

def phuber_ce_loss(outputs, labels):
    trunc_point = 0.1
    n = labels.shape[0]
    soft_max = nn.Softmax(dim=1)
    sm_outputs = soft_max(outputs)
    final_outputs = sm_outputs * labels
    final_confidence = final_outputs.sum(dim=1)
    ce_index = (final_confidence > trunc_point)
    sample_loss = torch.zeros(n).to(device)

    if ce_index.sum() > 0:
        ce_outputs = outputs[ce_index,:]
        logsm = nn.LogSoftmax(dim=-1)
        logsm_outputs = logsm(ce_outputs)
        final_ce_outputs = logsm_outputs * labels[ce_index,:]
        sample_loss[ce_index] = - final_ce_outputs.sum(dim=-1)

    linear_index = (final_confidence <= trunc_point)

    if linear_index.sum() > 0:
        sample_loss[linear_index] = -math.log(trunc_point) + (-1/trunc_point)*final_confidence[linear_index] + 1

    return sample_loss

def ce_loss(outputs, labels):
    logsm = nn.LogSoftmax(dim=1)
    logsm_outputs = logsm(outputs)
    final_outputs = logsm_outputs * labels
    sample_loss = - final_outputs.sum(dim=1)
    return sample_loss

def W_O_loss(loss_fn, outputs, labels, device, question_class_size, all_class_size):
    n, k = labels.shape[0], labels.shape[1]
    temp_loss = torch.zeros(n, k).to(device)
    for i in range(k):
        tempY = torch.zeros(n, k).to(device)
        tempY[:, i] = 1.0
        temp_loss[:, i] = loss_fn(outputs, tempY)
        
    candidate_loss = (temp_loss * labels).sum(dim=1)
    noncandidate_loss = (temp_loss * (1-labels)).sum(dim=1)
    total_loss = candidate_loss - ((all_class_size - question_class_size) * (all_class_size - question_class_size - 1))/(question_class_size * (2*all_class_size - question_class_size - 1.0)) * noncandidate_loss
    average_loss = total_loss.mean()
    return average_loss

def I_I_loss(loss_fn, outputs, labels, device, question_class_size, all_class_size):
    n, k = labels.shape[0], labels.shape[1]
    temp_loss = torch.zeros(n, k).to(device)
    for i in range(k):
        tempY = torch.zeros(n, k).to(device)
        tempY[:, i] = 1.0
        temp_loss[:, i] = ce_loss(outputs, tempY)
        
    candidate_loss = (temp_loss * labels).sum(dim=1)
    noncandidate_loss = (temp_loss * (1-labels)).sum(dim=1)
    total_loss = candidate_loss - (2*question_class_size**2 + all_class_size**2 - all_class_size*(2*question_class_size + 1))/(2*question_class_size * (all_class_size - question_class_size)) * noncandidate_loss
    average_loss = total_loss.mean()
    return average_loss


In [18]:
class mlp_model(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(mlp_model, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        out = x.view(-1, self.num_flat_features(x))
        out = self.fc1(out)
        out = self.relu1(out)
        out = self.fc2(out)
        return out

    def num_flat_features(self, x):
        size = x.size()[1:]
        num_features = 1
        for s in size:
            num_features *= s
        return num_features

In [19]:
def accuracy_check(loader, model, device):
    with torch.no_grad():
        total, num_samples = 0, 0
        for images, labels in loader:
            labels, images = labels.to(device), images.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += (predicted == labels).sum().item()
            num_samples += labels.size(0) 
    return total / num_samples

In [20]:
def show_score(epoch,max_epoch, itr, max_itr, loss, acc, is_val=False):
    if is_val:
        print('\r{} EPOCH[{:03}/{:03}] ITR [{:04}/{:04}] ACC:{:03f}'.format("TEST  " if is_val else "TRAIN", epoch, max_epoch, itr, max_itr, acc*100),end = '')
    else:
        print('\r{} EPOCH[{:03}/{:03}] ITR [{:04}/{:04}] LOSS:{:.05f} ACC:{:03f}'.format("VAL  " if is_val else "TRAIN", epoch, max_epoch, itr, max_itr, loss, acc*100), end = '')

In [21]:
def Train_Eval(model, 
               question_type, 
               question_class_size, 
               all_class_size, 
               criterion, 
               optimizer, 
               data_loader, 
               evaluation_data_loader_train, 
               evaluation_data_loader_test, 
               device, 
               epoch, 
               max_epoch):
        
    total_loss_train = 0.0
    total_acc_train = 0.0
    total_acc_test = 0.0
    counter = 0

    for n, (data, label) in enumerate(data_loader):
        model.train()
        counter += data.shape[0]
        optimizer.zero_grad()
        data = data.to(device)
        label = label.to(device)
        output = model(data)
        if question_type == QuestionType.WHICH_ONE:
            loss = W_O_loss(criterion, output, label.float(), device, question_class_size, all_class_size)
        elif question_type == QuestionType.IS_IN:
            loss = I_I_loss(criterion, output, label.float(), device, question_class_size, all_class_size)
        else:
            loss = criterion(output,label)

        loss.backward()
        optimizer.step()
        total_loss_train += loss.item()

        model.eval()
        total_acc_train += accuracy_check(evaluation_data_loader_train, model, device)
        total_acc_test += accuracy_check(evaluation_data_loader_test, model, device)


        show_score(epoch+1, max_epoch, n+1, len(data_loader), total_loss_train/(n+1), total_acc_train/counter, is_val=False)
        print()
        show_score(epoch+1, max_epoch, n+1, len(data_loader), '', total_acc_test/counter, is_val=True)
        print()

    return total_loss , total_acc_train, total_acc_test 
        

In [22]:
def Q_A_label_Train_Eval(dataset, 
                          question_type, 
                          question_class_size, 
                          model, 
                          batch_size, 
                          loss_fn, 
                          EPOCHS):
    Q_A_labels_matrix_train_loader, train_loader, test_loader, dimension, all_class_size = DataSet_2_DataLoader_with_Q_A_label(dataset, 
                                                                                                                           question_type, 
                                                                                                                           question_class_size,
                                                                                                                           batch_size)
    DEVICE= torch.device('cuda:0') if torch.cuda.is_available() else torch.device('cpu')
    model = model(input_dim=dimension, hidden_dim=500, output_dim=all_class_size).to(DEVICE)
    optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=0.0001)
    
    train_loss_list = []

    train_acc_list = []
    test_acc_list = []
    #criterion=nn.CrossEntropyLoss()

    for epoch in range(EPOCHS):
        train_loss, train_acc, test_acc = Train_Eval(model=model, 
                                                    question_type=question_type,
                                                    question_class_size=question_class_size,
                                                    all_class_size=all_class_size,
                                                    criterion=loss_fn,
                                                    optimizer=optimizer, 
                                                    data_loader=Q_A_labels_matrix_train_loader,
                                                    evaluation_data_loader_train=train_loader,
                                                    evaluation_data_loader_test=test_loader,
                                                    device=DEVICE, 
                                                    epoch=epoch, 
                                                    max_epoch=EPOCHS)

        train_loss_list.append(train_loss)
        train_acc_list.append(train_acc)
        test_acc_list.append(test_acc)

        print(f'TRAIN EPOCH[{epoch+1}/{EPOCHS}] LOSS: {train_loss_list.mean()} ACC:{train_acc_list.mean()}')
        print(f'TEST EPOCH[{epoch+1}/{EPOCHS}] ACC:{test_acc_list.mean()}')


In [27]:
Q_A_label_Train_Eval(dataset=DataSet.CIFAR10, 
                      question_type=QuestionType.WHICH_ONE, 
                      question_class_size=9, 
                      model=mlp_model, 
                      batch_size=32, 
                      loss_fn=ce_loss, 
                      EPOCHS=300)

Files already downloaded and verified
TRAIN EPOCH[001/300] ITR [0001/1563] LOSS:2.28674 ACC:0.601562
TEST   EPOCH[001/300] ITR [0001/1563] ACC:0.598750


KeyboardInterrupt: 

In [56]:
def Ord_label_Train_Eval(dataset, 
                          model, 
                          batch_size, 
                          loss_fn, 
                          EPOCHS):
    Q_A_labels_matrix_train_loader, train_loader, test_loader, dimension, all_class_size = DataSet_2_DataLoader_with_Q_A_label(dataset, 
                                                                                                                                   QuestionType.WHICH_ONE, 
                                                                                                                                   5,
                                                                                                                                   batch_size)
    DEVICE= torch.device('cuda:0') if torch.cuda.is_available() else torch.device('cpu')
    model = model(input_dim=dimension, hidden_dim=500, output_dim=all_class_size).to(DEVICE)
    optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=0.0001)
    
    train_loss_list = []

    train_acc_list = []
    test_acc_list = []
    #criterion=nn.CrossEntropyLoss()

    for epoch in range(EPOCHS):
        train_loss,train_acc = Train_Eval(model=model, 
                                          question_type=None,
                                          question_class_size=None,
                                          all_class_size=None,
                                          criterion=loss_fn,
                                          optimizer=optimizer, 
                                          data_loader=train_loader,
                                          evaluation_data_loader_train=train_loader,
                                          evaluation_data_loader_test=test_loader,
                                          device=DEVICE, 
                                          epoch=epoch, 
                                          max_epoch=EPOCHS)
        

        train_loss_list.append(train_loss)
        train_acc_list.append(train_acc)
        test_acc_list.append(test_acc)

        print(f'TRAIN EPOCH[{epoch+1}/{EPOCHS}] LOSS: {train_loss_list.mean()} ACC:{train_acc_list.mean()}')
        print(f'TEST EPOCH[{epoch+1}/{EPOCHS}] ACC:{test_acc_list.mean()}')


In [58]:
Ord_label_Train_Eval(dataset=DataSet.CIFAR10, 
                      model=mlp_model, 
                      batch_size=64, 
                      loss_fn=nn.CrossEntropyLoss(), 
                      EPOCHS=300)

Files already downloaded and verified
TRAIN EPOCH[001/300] ITR [0001/0782] LOSS:2.32732 ACC:0.312250
TEST   EPOCH[001/300] ITR [0001/0782] ACC:0.317812


KeyboardInterrupt: ignored