In [None]:
import os, sys
os.chdir('../')

In [2]:
import argparse
import torch
from tqdm import tqdm
import data_loader.data_loaders as module_data
import loss as module_loss
import model.metric as module_metric
import model.model as module_arch

import easydict
import torch.nn as nn
import torch.nn.functional as F
import sys
import os
import json
import random
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt 

import data_loader.data_loaders as module_data
import model.model as module_arch

from selection.svd_classifier import *
from selection.gmm import *
from selection.util import *

from utils.parse_config import ConfigParser
from utils.util import *
from utils.args import *

In [3]:
config_file = './hyperparams/multistep/config_cifar10_cce_rn34.json'
with open(config_file, 'r') as f:
    config = json.load(f)

# resume_path = './rn34/multistep_asym_40_elr.pth'

In [4]:
def decode(path):
    items = path.split('_')
    noisetype = True if items[1]=='asym' else False
    noiserate = float(items[2]) * 0.01
    
    return noisetype, noiserate, items[3].split('.')[0]

In [5]:
def make_parse(resume_path, config, noise_rate, noisetype):
    parse = easydict.EasyDict({
    "load_name" : resume_path,
    "reinit": False,
    "distill_mode": 'loss'
    })
    
    config['trainer']['percent'] = noise_rate
    config['trainer']['asym'] = noisetype
    
    return parse, config

In [6]:
def extract_cleanidx(teacher, data_loader, parse, print_statistics = True):
    teacher.load_state_dict(torch.load('./checkpoint/' + parse.load_name)['state_dict'])
    teacher = teacher.cuda()

    if not parse.reinit: teacher.load_state_dict(torch.load('./checkpoint/' + parse.load_name)['state_dict'])
    for params in teacher.parameters(): params.requires_grad = False
    
    if 'fine' in parse.distill_mode:
        features, labels = get_features(teacher, data_loader)
        clean_labels = fine(current_features=features, current_labels=labels, fit = parse.distill_mode)
    elif 'loss' in parse.distill_mode:
        clean_labels, labels = cleansing_loss(teacher, data_loader)
    else:
        raise NotImplemented 
    if print_statistics: 
        selected, precision, recall, specificity, accuracy = return_statistics(data_loader, clean_labels, datanum=len(labels))
    
    return selected, precision, recall, specificity, accuracy

In [7]:
def make_pd_list(root, config, log_filename):
    random.seed(config['seed'])
    torch.manual_seed(config['seed'])
    torch.cuda.manual_seed_all(config['seed'])
    torch.backends.cudnn.deterministic = True
    np.random.seed(config['seed'])
    
    # load checkpoint path
    pathlist = os.listdir(root)
    pathlist = [path for path in pathlist if ('.pth' in path) and ('eigen' not in path) and ('kmeans' not in path) and ('c100') not in path]
    
    # initialize model
    model = module_arch.resnet34(num_classes=10)
    
    # make pandas file
    logcolumns = ['noisetype', 'noiserate', 'lossfunction', 'selected', 'precision', 'recall', 'specificity', 'accuracy']
    log_pd = pd.DataFrame(np.zeros([len(pathlist), len(logcolumns)]), columns = logcolumns)
    
    # write pandas file
    noisetypelst = ['']
    for i in range(len(pathlist)):
        noisetype, noiserate, lossfunction = decode(pathlist[i])
        parse, config = make_parse('./rn34/' + pathlist[i], config, noiserate, noisetype)
        
        # load original dataloader
        data_loader = getattr(module_data, config['data_loader']['type'])(
        config['data_loader']['args']['data_dir'],
        batch_size= 100,
        shuffle=False,
        validation_split=0.0,
        num_batches=config['data_loader']['args']['num_batches'],
        training=True,
        num_workers=config['data_loader']['args']['num_workers'],
        pin_memory=config['data_loader']['args']['pin_memory'],
        config=config)
        
        selected, precision, recall, specificity, accuracy = extract_cleanidx(model, data_loader, parse)
        log_pd.loc[i] = [str(noisetype), str(noiserate), lossfunction, selected, precision, recall, specificity, accuracy]
        log_pd.to_csv(log_filename)
        
    return log_pd

In [8]:
make_pd_list(root = './checkpoint/rn34/', config=config, log_filename = 'c10_loss_pretrained_statistics.csv')

Files already downloaded and verified
##############
[3 2 1 1 3 0 0 7 6 5]
[3 2 1 1 3 2 2 7 6 5]
Train: 50000 Val: 0


100%|██████████| 500/500 [00:12<00:00, 40.79it/s]
100%|██████████| 500/500 [00:03<00:00, 162.64it/s]


Noisy: 9226, Clean: 40774
Selected samples: 39673 
Precision: 0.9515 
Recall: 0.9258 
Specificity: 0.7915
Accuracy: 0.901 
Fraction of clean samples/selected samples: 0.9515
Files already downloaded and verified


  0%|          | 0/500 [00:00<?, ?it/s]

##############
[8 9 1 9 4 8 3 6 3 6]
[3 2 1 1 3 2 2 7 6 5]
Train: 50000 Val: 0


100%|██████████| 500/500 [00:12<00:00, 42.46it/s]
100%|██████████| 500/500 [00:03<00:00, 161.51it/s]


Noisy: 36036, Clean: 13964
Selected samples: 21349 
Precision: 0.5865 
Recall: 0.8967 
Specificity: 0.7551
Accuracy: 0.7946 
Fraction of clean samples/selected samples: 0.5865
Files already downloaded and verified


  0%|          | 0/500 [00:00<?, ?it/s]

##############
[3 2 1 1 3 2 2 7 6 5]
[3 2 1 1 3 2 2 7 6 5]
Train: 50000 Val: 0


100%|██████████| 500/500 [00:12<00:00, 41.14it/s]
100%|██████████| 500/500 [00:02<00:00, 169.90it/s]


Noisy: 2458, Clean: 47542
Selected samples: 46524 
Precision: 0.9956 
Recall: 0.9743 
Specificity: 0.9166
Accuracy: 0.9714 
Fraction of clean samples/selected samples: 0.9956
Files already downloaded and verified


  0%|          | 0/500 [00:00<?, ?it/s]

##############
[3 9 1 1 3 8 2 7 6 5]
[3 2 1 1 3 2 2 7 6 5]
Train: 50000 Val: 0


100%|██████████| 500/500 [00:12<00:00, 39.71it/s]
100%|██████████| 500/500 [00:03<00:00, 150.91it/s]


Noisy: 9006, Clean: 40994
Selected samples: 39801 
Precision: 0.9949 
Recall: 0.9659 
Specificity: 0.9773
Accuracy: 0.968 
Fraction of clean samples/selected samples: 0.9949
Files already downloaded and verified
##############
[8 9 1 9 4 8 3 6 3 6]
[3 2 1 1 3 2 2 7 6 5]
Train: 50000 Val: 0


100%|██████████| 500/500 [00:20<00:00, 41.36it/s]
100%|██████████| 500/500 [00:03<00:00, 157.85it/s]


Noisy: 36036, Clean: 13964
Selected samples: 13484 
Precision: 0.873 
Recall: 0.843 
Specificity: 0.9525
Accuracy: 0.9219 
Fraction of clean samples/selected samples: 0.873
Files already downloaded and verified
##############
[8 9 1 9 3 8 2 7 3 5]
[3 2 1 1 3 2 2 7 6 5]
Train: 50000 Val: 0


100%|██████████| 500/500 [00:23<00:00, 21.45it/s]
100%|██████████| 500/500 [00:03<00:00, 151.86it/s]


Noisy: 17975, Clean: 32025
Selected samples: 31321 
Precision: 0.9807 
Recall: 0.9592 
Specificity: 0.9664
Accuracy: 0.9618 
Fraction of clean samples/selected samples: 0.9807
Files already downloaded and verified
##############
[8 9 1 9 3 8 3 7 3 5]
[3 2 1 1 3 2 2 7 6 5]
Train: 50000 Val: 0


100%|██████████| 500/500 [00:20<00:00, 41.46it/s]
100%|██████████| 500/500 [00:03<00:00, 152.35it/s]


Noisy: 27041, Clean: 22959
Selected samples: 17818 
Precision: 0.9778 
Recall: 0.7589 
Specificity: 0.9854
Accuracy: 0.8814 
Fraction of clean samples/selected samples: 0.9778
Files already downloaded and verified
##############
[3 9 1 1 3 8 2 7 6 5]
[3 2 1 1 3 2 2 7 6 5]
Train: 50000 Val: 0


100%|██████████| 500/500 [00:22<00:00, 22.29it/s]
100%|██████████| 500/500 [00:03<00:00, 159.06it/s]


Noisy: 9006, Clean: 40994
Selected samples: 28369 
Precision: 0.9898 
Recall: 0.685 
Specificity: 0.9678
Accuracy: 0.7359 
Fraction of clean samples/selected samples: 0.9898
Files already downloaded and verified
##############
[3 2 1 1 3 0 2 7 6 5]
[3 2 1 1 3 2 2 7 6 5]
Train: 50000 Val: 0


100%|██████████| 500/500 [00:23<00:00, 21.22it/s]
100%|██████████| 500/500 [00:03<00:00, 155.17it/s]


Noisy: 7020, Clean: 42980
Selected samples: 24739 
Precision: 0.934 
Recall: 0.5376 
Specificity: 0.7675
Accuracy: 0.5699 
Fraction of clean samples/selected samples: 0.934
Files already downloaded and verified
##############
[8 9 1 9 3 8 3 7 3 5]
[3 2 1 1 3 2 2 7 6 5]
Train: 50000 Val: 0


100%|██████████| 500/500 [00:24<00:00, 20.65it/s]
100%|██████████| 500/500 [00:03<00:00, 141.51it/s]


Noisy: 27041, Clean: 22959
Selected samples: 15661 
Precision: 0.9812 
Recall: 0.6693 
Specificity: 0.9891
Accuracy: 0.8423 
Fraction of clean samples/selected samples: 0.9812
Files already downloaded and verified
##############
[8 9 1 9 4 8 3 6 3 6]
[3 2 1 1 3 2 2 7 6 5]
Train: 50000 Val: 0


100%|██████████| 500/500 [00:24<00:00, 20.34it/s]
100%|██████████| 500/500 [00:03<00:00, 135.60it/s]


Noisy: 36036, Clean: 13964
Selected samples: 14694 
Precision: 0.614 
Recall: 0.6461 
Specificity: 0.8426
Accuracy: 0.7877 
Fraction of clean samples/selected samples: 0.614
Files already downloaded and verified
##############
[8 9 1 9 4 8 3 6 3 6]
[3 2 1 1 3 2 2 7 6 5]
Train: 50000 Val: 0


100%|██████████| 500/500 [00:23<00:00, 21.02it/s]
100%|██████████| 500/500 [00:03<00:00, 157.89it/s]


Noisy: 36036, Clean: 13964
Selected samples: 14921 
Precision: 0.5786 
Recall: 0.6182 
Specificity: 0.8255
Accuracy: 0.7676 
Fraction of clean samples/selected samples: 0.5786
Files already downloaded and verified
##############
[3 2 1 1 3 0 2 7 6 5]
[3 2 1 1 3 2 2 7 6 5]
Train: 50000 Val: 0


100%|██████████| 500/500 [00:23<00:00, 21.69it/s]
100%|██████████| 500/500 [00:03<00:00, 158.96it/s]


Noisy: 7020, Clean: 42980
Selected samples: 28998 
Precision: 0.9378 
Recall: 0.6327 
Specificity: 0.7432
Accuracy: 0.6482 
Fraction of clean samples/selected samples: 0.9378
Files already downloaded and verified
##############
[3 9 1 1 3 8 2 7 6 5]
[3 2 1 1 3 2 2 7 6 5]
Train: 50000 Val: 0


100%|██████████| 500/500 [00:23<00:00, 21.18it/s]
100%|██████████| 500/500 [00:03<00:00, 151.69it/s]


Noisy: 9006, Clean: 40994
Selected samples: 37294 
Precision: 0.9867 
Recall: 0.8977 
Specificity: 0.945
Accuracy: 0.9062 
Fraction of clean samples/selected samples: 0.9867
Files already downloaded and verified
##############
[3 2 1 1 3 0 2 7 6 5]
[3 2 1 1 3 2 2 7 6 5]
Train: 50000 Val: 0


100%|██████████| 500/500 [00:23<00:00, 21.49it/s]
100%|██████████| 500/500 [00:03<00:00, 156.43it/s]


Noisy: 4786, Clean: 45214
Selected samples: 35236 
Precision: 0.9968 
Recall: 0.7768 
Specificity: 0.9766
Accuracy: 0.796 
Fraction of clean samples/selected samples: 0.9968
Files already downloaded and verified
##############
[8 9 1 9 3 8 3 7 3 5]
[3 2 1 1 3 2 2 7 6 5]
Train: 50000 Val: 0


100%|██████████| 500/500 [00:22<00:00, 22.15it/s]
100%|██████████| 500/500 [00:03<00:00, 158.62it/s]


Noisy: 27041, Clean: 22959
Selected samples: 17842 
Precision: 0.9921 
Recall: 0.771 
Specificity: 0.9948
Accuracy: 0.892 
Fraction of clean samples/selected samples: 0.9921
Files already downloaded and verified
##############
[8 9 1 9 3 8 2 7 3 5]
[3 2 1 1 3 2 2 7 6 5]
Train: 50000 Val: 0


100%|██████████| 500/500 [00:23<00:00, 20.98it/s]
100%|██████████| 500/500 [00:03<00:00, 146.73it/s]


Noisy: 17975, Clean: 32025
Selected samples: 25938 
Precision: 0.9894 
Recall: 0.8013 
Specificity: 0.9846
Accuracy: 0.8672 
Fraction of clean samples/selected samples: 0.9894
Files already downloaded and verified
##############
[3 2 1 1 3 2 2 7 6 5]
[3 2 1 1 3 2 2 7 6 5]
Train: 50000 Val: 0


100%|██████████| 500/500 [00:23<00:00, 40.86it/s]
100%|██████████| 500/500 [00:03<00:00, 160.78it/s]


Noisy: 2458, Clean: 47542
Selected samples: 32084 
Precision: 0.9934 
Recall: 0.6704 
Specificity: 0.9133
Accuracy: 0.6823 
Fraction of clean samples/selected samples: 0.9934
Files already downloaded and verified
##############
[3 2 1 1 3 0 2 7 6 5]
[3 2 1 1 3 2 2 7 6 5]
Train: 50000 Val: 0


100%|██████████| 500/500 [00:23<00:00, 20.97it/s]
100%|██████████| 500/500 [00:03<00:00, 152.84it/s]


Noisy: 7020, Clean: 42980
Selected samples: 27833 
Precision: 0.9237 
Recall: 0.5982 
Specificity: 0.6974
Accuracy: 0.6121 
Fraction of clean samples/selected samples: 0.9237
Files already downloaded and verified
##############
[3 2 1 1 3 0 2 7 6 5]
[3 2 1 1 3 2 2 7 6 5]
Train: 50000 Val: 0


100%|██████████| 500/500 [00:23<00:00, 21.02it/s]
100%|██████████| 500/500 [00:03<00:00, 149.75it/s]


Noisy: 4786, Clean: 45214
Selected samples: 27819 
Precision: 0.9835 
Recall: 0.6051 
Specificity: 0.9039
Accuracy: 0.6337 
Fraction of clean samples/selected samples: 0.9835
Files already downloaded and verified
##############
[8 9 1 9 3 8 3 7 3 5]
[3 2 1 1 3 2 2 7 6 5]
Train: 50000 Val: 0


100%|██████████| 500/500 [00:22<00:00, 41.00it/s]
100%|██████████| 500/500 [00:03<00:00, 157.25it/s]


Noisy: 27041, Clean: 22959
Selected samples: 21397 
Precision: 0.9534 
Recall: 0.8885 
Specificity: 0.9631
Accuracy: 0.9288 
Fraction of clean samples/selected samples: 0.9534
Files already downloaded and verified
##############
[3 2 1 1 3 2 2 7 6 5]
[3 2 1 1 3 2 2 7 6 5]
Train: 50000 Val: 0


100%|██████████| 500/500 [00:23<00:00, 21.16it/s]
100%|██████████| 500/500 [00:03<00:00, 157.30it/s]


Noisy: 2458, Clean: 47542
Selected samples: 29794 
Precision: 0.9886 
Recall: 0.6196 
Specificity: 0.8621
Accuracy: 0.6315 
Fraction of clean samples/selected samples: 0.9886
Files already downloaded and verified
##############
[3 9 1 1 3 8 2 7 6 5]
[3 2 1 1 3 2 2 7 6 5]
Train: 50000 Val: 0


100%|██████████| 500/500 [00:24<00:00, 20.75it/s]
100%|██████████| 500/500 [00:03<00:00, 145.92it/s]


Noisy: 9006, Clean: 40994
Selected samples: 29630 
Precision: 0.9937 
Recall: 0.7182 
Specificity: 0.9792
Accuracy: 0.7652 
Fraction of clean samples/selected samples: 0.9937
Files already downloaded and verified
##############
[8 9 1 9 3 8 2 7 3 5]
[3 2 1 1 3 2 2 7 6 5]
Train: 50000 Val: 0


100%|██████████| 500/500 [00:23<00:00, 39.83it/s]
100%|██████████| 500/500 [00:03<00:00, 151.72it/s]


Noisy: 17975, Clean: 32025
Selected samples: 27880 
Precision: 0.9956 
Recall: 0.8668 
Specificity: 0.9932
Accuracy: 0.9122 
Fraction of clean samples/selected samples: 0.9956
Files already downloaded and verified
##############
[3 2 1 1 3 0 2 7 6 5]
[3 2 1 1 3 2 2 7 6 5]
Train: 50000 Val: 0


100%|██████████| 500/500 [00:22<00:00, 21.80it/s]
100%|██████████| 500/500 [00:03<00:00, 158.24it/s]


Noisy: 4786, Clean: 45214
Selected samples: 32413 
Precision: 0.9647 
Recall: 0.6916 
Specificity: 0.761
Accuracy: 0.6982 
Fraction of clean samples/selected samples: 0.9647
Files already downloaded and verified
##############
[8 9 1 9 3 8 2 7 3 5]
[3 2 1 1 3 2 2 7 6 5]
Train: 50000 Val: 0


100%|██████████| 500/500 [00:23<00:00, 21.24it/s]
100%|██████████| 500/500 [00:03<00:00, 153.98it/s]


Noisy: 17975, Clean: 32025
Selected samples: 24642 
Precision: 0.99 
Recall: 0.7618 
Specificity: 0.9863
Accuracy: 0.8425 
Fraction of clean samples/selected samples: 0.99
Files already downloaded and verified
##############
[3 2 1 1 3 0 0 7 6 5]
[3 2 1 1 3 2 2 7 6 5]
Train: 50000 Val: 0


100%|██████████| 500/500 [00:23<00:00, 40.78it/s]
100%|██████████| 500/500 [00:03<00:00, 152.21it/s]


Noisy: 9226, Clean: 40774
Selected samples: 28778 
Precision: 0.9153 
Recall: 0.646 
Specificity: 0.7357
Accuracy: 0.6626 
Fraction of clean samples/selected samples: 0.9153
Files already downloaded and verified
##############
[3 2 1 1 3 0 0 7 6 5]
[3 2 1 1 3 2 2 7 6 5]
Train: 50000 Val: 0


100%|██████████| 500/500 [00:23<00:00, 21.64it/s]
100%|██████████| 500/500 [00:03<00:00, 154.64it/s]


Noisy: 9226, Clean: 40774
Selected samples: 25229 
Precision: 0.8532 
Recall: 0.5279 
Specificity: 0.5985
Accuracy: 0.5409 
Fraction of clean samples/selected samples: 0.8532
Files already downloaded and verified
##############
[3 2 1 1 3 0 0 7 6 5]
[3 2 1 1 3 2 2 7 6 5]
Train: 50000 Val: 0


100%|██████████| 500/500 [00:22<00:00, 40.86it/s]
100%|██████████| 500/500 [00:03<00:00, 157.75it/s]


Noisy: 9226, Clean: 40774
Selected samples: 25198 
Precision: 0.841 
Recall: 0.5197 
Specificity: 0.5658
Accuracy: 0.5282 
Fraction of clean samples/selected samples: 0.841
Files already downloaded and verified
##############
[3 2 1 1 3 0 2 7 6 5]
[3 2 1 1 3 2 2 7 6 5]
Train: 50000 Val: 0


100%|██████████| 500/500 [00:22<00:00, 22.37it/s]
100%|██████████| 500/500 [00:03<00:00, 154.89it/s]


Noisy: 4786, Clean: 45214
Selected samples: 33597 
Precision: 0.9674 
Recall: 0.7188 
Specificity: 0.771
Accuracy: 0.7238 
Fraction of clean samples/selected samples: 0.9674
Files already downloaded and verified
##############
[3 2 1 1 3 0 2 7 6 5]
[3 2 1 1 3 2 2 7 6 5]
Train: 50000 Val: 0


100%|██████████| 500/500 [00:22<00:00, 22.27it/s]
100%|██████████| 500/500 [00:03<00:00, 159.72it/s]


Noisy: 7020, Clean: 42980
Selected samples: 29682 
Precision: 0.9801 
Recall: 0.6768 
Specificity: 0.9157
Accuracy: 0.7104 
Fraction of clean samples/selected samples: 0.9801
Files already downloaded and verified
##############
[3 2 1 1 3 2 2 7 6 5]
[3 2 1 1 3 2 2 7 6 5]
Train: 50000 Val: 0


100%|██████████| 500/500 [00:23<00:00, 40.10it/s]
100%|██████████| 500/500 [00:03<00:00, 152.16it/s]

Noisy: 2458, Clean: 47542
Selected samples: 31404 
Precision: 0.9839 
Recall: 0.6499 
Specificity: 0.7941
Accuracy: 0.657 
Fraction of clean samples/selected samples: 0.9839





Unnamed: 0,noisetype,noiserate,lossfunction,selected,precision,recall,specificity,accuracy
0,True,0.4,sce,39673.0,0.9515,0.9258,0.7915,0.901
1,False,0.8,elr,21349.0,0.5865,0.8967,0.7551,0.7946
2,True,0.1,gce,46524.0,0.9956,0.9743,0.9166,0.9714
3,False,0.2,elr,39801.0,0.9949,0.9659,0.9773,0.968
4,False,0.8,gce,13484.0,0.873,0.843,0.9525,0.9219
5,False,0.4,gce,31321.0,0.9807,0.9592,0.9664,0.9618
6,False,0.6,sce,17818.0,0.9778,0.7589,0.9854,0.8814
7,False,0.2,sce,28369.0,0.9898,0.685,0.9678,0.7359
8,True,0.3,sce,24739.0,0.934,0.5376,0.7675,0.5699
9,False,0.6,cce,15661.0,0.9812,0.6693,0.9891,0.8423
