In [10]:
import argparse
import torch
from tqdm import tqdm
import data_loader.data_loaders as module_data
import loss as module_loss
import model.metric as module_metric
import model.model as module_arch
from parse_config import ConfigParser


import torch.nn as nn
import torch.nn.functional as F
import sys
import os
import json
import random
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt 

print(os.getcwd())

import data_loader.data_loaders as module_data
import model.model as module_arch

config_file = './hyperparams/multistep/config_cifar10_gce.json'
with open(config_file, 'r') as f:
    config = json.load(f)

resume_path = './asym_40_gce.pth'
base_model = getattr(module_arch, config["arch"]['type'])()
checkpoint = torch.load(resume_path)
state_dict = checkpoint['state_dict']
base_model.load_state_dict(state_dict)


/home/taehyeon/jupyter/6_ELR/ELR_CL/ELR


<All keys matched successfully>

In [11]:
# set seed
random.seed(config['seed'])
torch.manual_seed(config['seed'])
torch.cuda.manual_seed_all(config['seed'])
torch.backends.cudnn.deterministic = True
np.random.seed(config['seed'])

data_loader = getattr(module_data, config['data_loader']['type'])(
    config['data_loader']['args']['data_dir'],
    batch_size= 100,
    shuffle=config['data_loader']['args']['shuffle'],
    validation_split=0.0,
    num_batches=config['data_loader']['args']['num_batches'],
    training=True,
    num_workers=config['data_loader']['args']['num_workers'],
    pin_memory=config['data_loader']['args']['pin_memory'],
    config=config
)
criterion = getattr(module_loss, 'GCELoss')(q=config['train_loss']['args']['q'],
                                                     k=config['train_loss']['args']['k'],
                                                     truncated=False)
device = torch.device('cuda:1')

Files already downloaded and verified
Train: 50000 Val: 0


In [12]:
class Represent(nn.Module):
    def __init__(self, base_model):
        super(Represent, self).__init__()
        self.conv1 = base_model.conv1
        self.bn1 = base_model.bn1
        self.layer1 = base_model.layer1
        self.layer2 = base_model.layer2
        self.layer3 = base_model.layer3
        self.layer4 = base_model.layer4
        
    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = F.avg_pool2d(out, 4)
        y = out.view(out.size(0), -1)
        
        return y

In [13]:
isNoisy_list = np.empty((0,))
isFalse_list = np.empty((0,))
label_list = np.empty((0,))
gt_list = np.empty((0,))
# out_list = np.empty((0,512))
# print(out_list.shape)

representer = Represent(base_model)
representer.eval()
representer.to(device)
with tqdm(data_loader) as progress:
    for batch_idx, (data, label, index, label_gt) in enumerate(progress):
        data = data.to(device)
        label, label_gt = label.long().to(device), label_gt.long().to(device)
        output = representer(data)
        isNoisy = label != label_gt
        
        gt_list = np.concatenate((gt_list, label_gt.cpu()))
        label_list = np.concatenate((label_list, label.cpu()))
        isNoisy_list = np.concatenate((isNoisy_list, isNoisy.cpu()))
        if batch_idx == 0:
            out_list = output.detach().cpu()
        else:
            out_list = np.concatenate((out_list, output.detach().cpu()), axis=0)

100%|██████████| 500/500 [00:22<00:00, 22.36it/s]


In [15]:
def get_singular_value_vector(label_list, out_list):
    
    singular_dict = {}
    v_ortho_dict = {}
    
    for index in np.unique(label_list):
        u, s, v = np.linalg.svd(out_list[label_list==index])
        singular_dict[index] = s[0] / s[1]
        v_ortho_dict[index] = torch.from_numpy(v[:2])

    return singular_dict, v_ortho_dict

In [None]:
def singular_pseudo_label(v_ortho_dict, model_represents, label):
    
    model_represents = torch.from_numpy(model_represents).to('cuda:1')
    sing_lbl = torch.zeros(model_represents.shape[0]) == 0.
    
    for i, data in enumerate(model_represents):
        if torch.dot(v_ortho_dict[label[i]][0], data).abs() < 10 torch.dot(v_ortho_dict[label[i]][1], data).abs():
            sing_lbl[i] = False
        
    return sing_lbl

In [55]:
def singular_label(v_ortho_dict, model_represents, label):
    
    model_represents = torch.from_numpy(model_represents).to('cuda:1')
    sing_lbl = torch.zeros(model_represents.shape[0]) == 0.
    
    for i, data in enumerate(model_represents):
        if torch.dot(v_ortho_dict[label[i]][0], data).abs() < 5 * torch.dot(v_ortho_dict[label[i]][1], data).abs():
            sing_lbl[i] = False
        
    return sing_lbl

In [56]:
singular_dict, v_ortho_dict = get_singular_value_vector(label_list, out_list)

for key in v_ortho_dict.keys():
    v_ortho_dict[key] = v_ortho_dict[key].to('cuda:1')

sing_lbl = singular_label(v_ortho_dict, out_list, label_list)

noise = label_list[(isNoisy_list==1) & sing_lbl.numpy()]
gt =gt_list[(isNoisy_list==1) & sing_lbl.numpy()]

tmp = [(noise==i).sum() for i in range(10)]
gtmp = [(gt==i).sum() for i in range(10)]

In [57]:
tmp, gtmp

([123, 106, 0, 854, 0, 47, 0, 144, 0, 0],
 [0, 0, 123, 47, 144, 854, 0, 0, 0, 106])

In [61]:
isNoisy_list[sing_lbl.numpy()!=False].sum()/ isNoisy_list[sing_lbl.numpy()!=False].shape

array([0.03468554])

In [62]:
new_label = [(label_list[sing_lbl.numpy()]==i).sum() for i in range(10)]
new_label

[5010, 4995, 2413, 4371, 2428, 410, 4734, 5041, 4792, 2536]