In [1]:
import os, sys
os.chdir('../')

In [2]:
import argparse
import torch
from tqdm import tqdm
import data_loader.data_loaders as module_data
import loss as module_loss
import model.metric as module_metric
import model.model as module_arch

import easydict
import torch.nn as nn
import torch.nn.functional as F
import sys
import os
import json
import random
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt 

import data_loader.data_loaders as module_data
import model.model as module_arch

from selection.svd_classifier import *
from selection.gmm import *
from selection.util import *

from utils.parse_config import ConfigParser
from utils.util import *
from utils.args import *

In [3]:
config_file = './hyperparams/multistep/config_cifar100_cce_rn34.json'
with open(config_file, 'r') as f:
    config = json.load(f)

# resume_path = './rn34/multistep_asym_40_elr.pth'

In [4]:
def decode(path):
    items = path.split('_')
    noisetype = True if items[2]=='asym' else False
    noiserate = float(items[3]) * 0.01
    
    return noisetype, noiserate, items[4].split('.')[0]

In [5]:
def make_parse(resume_path, config, noise_rate, noisetype):
    parse = easydict.EasyDict({
    "load_name" : resume_path,
    "reinit": False,
    "distill_mode": 'loss'
    })
    
    config['trainer']['percent'] = noise_rate
    config['trainer']['asym'] = noisetype
    
    return parse, config

In [6]:
def extract_cleanidx(teacher, data_loader, parse, print_statistics = True):
    teacher.load_state_dict(torch.load('./checkpoint/' + parse.load_name)['state_dict'])
    teacher = teacher.cuda()

    if not parse.reinit: teacher.load_state_dict(torch.load('./checkpoint/' + parse.load_name)['state_dict'])
    for params in teacher.parameters(): params.requires_grad = False
    
    if 'fine' in parse.distill_mode:
        features, labels = get_features(teacher, data_loader)
        clean_labels = fine(current_features=features, current_labels=labels, fit = parse.distill_mode)
    elif 'loss' in parse.distill_mode:
        clean_labels, labels = cleansing_loss(teacher, data_loader)
    else:
        raise NotImplemented 
    if print_statistics: 
        selected, precision, recall, specificity, accuracy = return_statistics(data_loader, clean_labels, datanum=len(labels))
    
    return selected, precision, recall, specificity, accuracy

In [7]:
def make_pd_list(root, config, log_filename):
    random.seed(config['seed'])
    torch.manual_seed(config['seed'])
    torch.cuda.manual_seed_all(config['seed'])
    torch.backends.cudnn.deterministic = True
    np.random.seed(config['seed'])
    
    # load checkpoint path
    pathlist = os.listdir(root)
    pathlist = [path for path in pathlist if 'c100' in path]
#     pathlist = [path for path in pathlist if ('.pth' in path) and ('eigen' not in path) and ('kmeans' not in path) and ('c100') not in path]
    
    # initialize model
    model = module_arch.resnet34(num_classes=100)
    
    # make pandas file
    logcolumns = ['noisetype', 'noiserate', 'lossfunction', 'selected', 'precision', 'recall', 'specificity', 'accuracy']
    log_pd = pd.DataFrame(np.zeros([len(pathlist), len(logcolumns)]), columns = logcolumns)
    
    # write pandas file
    noisetypelst = ['']
    for i in range(len(pathlist)):
        noisetype, noiserate, lossfunction = decode(pathlist[i])
        parse, config = make_parse('./rn34/' + pathlist[i], config, noiserate, noisetype)
        
        # load original dataloader
        data_loader = getattr(module_data, config['data_loader']['type'])(
        config['data_loader']['args']['data_dir'],
        batch_size= 100,
        shuffle=False,
        validation_split=0.0,
        num_batches=config['data_loader']['args']['num_batches'],
        training=True,
        num_workers=config['data_loader']['args']['num_workers'],
        pin_memory=config['data_loader']['args']['pin_memory'],
        config=config)
        
        selected, precision, recall, specificity, accuracy = extract_cleanidx(model, data_loader, parse)
        log_pd.loc[i] = [str(noisetype), str(noiserate), lossfunction, selected, precision, recall, specificity, accuracy]
        log_pd.to_csv(log_filename)
        
    return log_pd

In [8]:
make_pd_list(root = './checkpoint/rn34/', config=config, log_filename = 'c100_loss_pretrained_statistics.csv')

Files already downloaded and verified
Train: 50000 Val: 0


100%|██████████| 500/500 [00:20<00:00, 23.86it/s]
100%|██████████| 500/500 [00:03<00:00, 147.33it/s]


Noisy: 9993, Clean: 40007
Selected samples: 39490 
Precision: 0.9473 
Recall: 0.935 
Specificity: 0.7917
Accuracy: 0.9064 
Fraction of clean samples/selected samples: 0.9473
Files already downloaded and verified
Train: 50000 Val: 0


100%|██████████| 500/500 [00:22<00:00, 19.41it/s]
100%|██████████| 500/500 [00:03<00:00, 146.66it/s]


Noisy: 9993, Clean: 40007
Selected samples: 40097 
Precision: 0.9176 
Recall: 0.9197 
Specificity: 0.6696
Accuracy: 0.8697 
Fraction of clean samples/selected samples: 0.9176
Files already downloaded and verified
Train: 50000 Val: 0


100%|██████████| 500/500 [00:21<00:00, 23.73it/s]
100%|██████████| 500/500 [00:03<00:00, 149.10it/s]


Noisy: 9905, Clean: 40095
Selected samples: 48418 
Precision: 0.8261 
Recall: 0.9976 
Specificity: 0.1498
Accuracy: 0.8296 
Fraction of clean samples/selected samples: 0.8261
Files already downloaded and verified
Train: 50000 Val: 0


100%|██████████| 500/500 [00:22<00:00, 19.39it/s]
100%|██████████| 500/500 [00:03<00:00, 148.68it/s]


Noisy: 14857, Clean: 35143
Selected samples: 37514 
Precision: 0.8889 
Recall: 0.9489 
Specificity: 0.7196
Accuracy: 0.8808 
Fraction of clean samples/selected samples: 0.8889
Files already downloaded and verified


  0%|          | 0/500 [00:00<?, ?it/s]

Train: 50000 Val: 0


100%|██████████| 500/500 [00:23<00:00, 21.20it/s]
100%|██████████| 500/500 [00:03<00:00, 148.04it/s]


Noisy: 19856, Clean: 30144
Selected samples: 37394 
Precision: 0.6437 
Recall: 0.7985 
Specificity: 0.329
Accuracy: 0.612 
Fraction of clean samples/selected samples: 0.6437
Files already downloaded and verified


  0%|          | 0/500 [00:00<?, ?it/s]

Train: 50000 Val: 0


100%|██████████| 500/500 [00:24<00:00, 19.39it/s]
100%|██████████| 500/500 [00:03<00:00, 137.35it/s]


Noisy: 14857, Clean: 35143
Selected samples: 37479 
Precision: 0.8188 
Recall: 0.8732 
Specificity: 0.5428
Accuracy: 0.775 
Fraction of clean samples/selected samples: 0.8188
Files already downloaded and verified


  0%|          | 0/500 [00:00<?, ?it/s]

Train: 50000 Val: 0


100%|██████████| 500/500 [00:24<00:00, 19.38it/s]
100%|██████████| 500/500 [00:03<00:00, 136.56it/s]


Noisy: 19856, Clean: 30144
Selected samples: 37831 
Precision: 0.721 
Recall: 0.9049 
Specificity: 0.4685
Accuracy: 0.7316 
Fraction of clean samples/selected samples: 0.721
Files already downloaded and verified


  0%|          | 0/500 [00:00<?, ?it/s]

Train: 50000 Val: 0


100%|██████████| 500/500 [00:23<00:00, 21.04it/s]
100%|██████████| 500/500 [00:03<00:00, 154.51it/s]


Noisy: 29703, Clean: 20297
Selected samples: 23634 
Precision: 0.7678 
Recall: 0.894 
Specificity: 0.8152
Accuracy: 0.8472 
Fraction of clean samples/selected samples: 0.7678
Files already downloaded and verified
Train: 50000 Val: 0


100%|██████████| 500/500 [00:23<00:00, 19.37it/s]
100%|██████████| 500/500 [00:03<00:00, 150.95it/s]


Noisy: 39597, Clean: 10403
Selected samples: 18675 
Precision: 0.4848 
Recall: 0.8702 
Specificity: 0.757
Accuracy: 0.7806 
Fraction of clean samples/selected samples: 0.4848
Files already downloaded and verified


  0%|          | 0/500 [00:00<?, ?it/s]

Train: 50000 Val: 0


100%|██████████| 500/500 [00:23<00:00, 21.17it/s]
100%|██████████| 500/500 [00:03<00:00, 147.55it/s]


Noisy: 4969, Clean: 45031
Selected samples: 41410 
Precision: 0.9974 
Recall: 0.9172 
Specificity: 0.9787
Accuracy: 0.9233 
Fraction of clean samples/selected samples: 0.9974
Files already downloaded and verified


  0%|          | 0/500 [00:00<?, ?it/s]

Train: 50000 Val: 0


100%|██████████| 500/500 [00:23<00:00, 21.54it/s]
100%|██████████| 500/500 [00:03<00:00, 140.85it/s]


Noisy: 29703, Clean: 20297
Selected samples: 15902 
Precision: 0.9496 
Recall: 0.744 
Specificity: 0.973
Accuracy: 0.88 
Fraction of clean samples/selected samples: 0.9496
Files already downloaded and verified


  0%|          | 0/500 [00:00<?, ?it/s]

Train: 50000 Val: 0


100%|██████████| 500/500 [00:22<00:00, 22.07it/s]
100%|██████████| 500/500 [00:03<00:00, 150.19it/s]


Noisy: 19797, Clean: 30203
Selected samples: 30073 
Precision: 0.9518 
Recall: 0.9477 
Specificity: 0.9268
Accuracy: 0.9394 
Fraction of clean samples/selected samples: 0.9518
Files already downloaded and verified


  0%|          | 0/500 [00:00<?, ?it/s]

Train: 50000 Val: 0


100%|██████████| 500/500 [00:23<00:00, 19.38it/s]
100%|██████████| 500/500 [00:03<00:00, 145.48it/s]


Noisy: 4969, Clean: 45031
Selected samples: 44258 
Precision: 0.9892 
Recall: 0.9723 
Specificity: 0.9042
Accuracy: 0.9655 
Fraction of clean samples/selected samples: 0.9892
Files already downloaded and verified
Train: 50000 Val: 0


100%|██████████| 500/500 [00:23<00:00, 21.61it/s]
100%|██████████| 500/500 [00:03<00:00, 149.59it/s]


Noisy: 19797, Clean: 30203
Selected samples: 32053 
Precision: 0.9144 
Recall: 0.9704 
Specificity: 0.8614
Accuracy: 0.9273 
Fraction of clean samples/selected samples: 0.9144
Files already downloaded and verified


  0%|          | 0/500 [00:00<?, ?it/s]

Train: 50000 Val: 0


100%|██████████| 500/500 [00:23<00:00, 19.37it/s]
100%|██████████| 500/500 [00:03<00:00, 138.91it/s]


Noisy: 14857, Clean: 35143
Selected samples: 39001 
Precision: 0.8183 
Recall: 0.9081 
Specificity: 0.523
Accuracy: 0.7937 
Fraction of clean samples/selected samples: 0.8183
Files already downloaded and verified


  0%|          | 0/500 [00:00<?, ?it/s]

Train: 50000 Val: 0


100%|██████████| 500/500 [00:23<00:00, 19.38it/s]
100%|██████████| 500/500 [00:03<00:00, 146.22it/s]


Noisy: 4969, Clean: 45031
Selected samples: 44195 
Precision: 0.9935 
Recall: 0.9751 
Specificity: 0.9424
Accuracy: 0.9718 
Fraction of clean samples/selected samples: 0.9935
Files already downloaded and verified
Train: 50000 Val: 0


100%|██████████| 500/500 [00:22<00:00, 21.75it/s]
100%|██████████| 500/500 [00:03<00:00, 149.85it/s]


Noisy: 9905, Clean: 40095
Selected samples: 40315 
Precision: 0.9714 
Recall: 0.9767 
Specificity: 0.8834
Accuracy: 0.9582 
Fraction of clean samples/selected samples: 0.9714
Files already downloaded and verified


  0%|          | 0/500 [00:00<?, ?it/s]

Train: 50000 Val: 0


100%|██████████| 500/500 [00:23<00:00, 21.15it/s]
100%|██████████| 500/500 [00:03<00:00, 154.36it/s]


Noisy: 9993, Clean: 40007
Selected samples: 39963 
Precision: 0.9694 
Recall: 0.9683 
Specificity: 0.8776
Accuracy: 0.9502 
Fraction of clean samples/selected samples: 0.9694
Files already downloaded and verified


  0%|          | 0/500 [00:00<?, ?it/s]

Train: 50000 Val: 0


100%|██████████| 500/500 [00:24<00:00, 20.75it/s]
100%|██████████| 500/500 [00:03<00:00, 142.26it/s]


Noisy: 19856, Clean: 30144
Selected samples: 42434 
Precision: 0.6397 
Recall: 0.9004 
Specificity: 0.2299
Accuracy: 0.6342 
Fraction of clean samples/selected samples: 0.6397
Files already downloaded and verified
Train: 50000 Val: 0


100%|██████████| 500/500 [00:23<00:00, 19.37it/s]
100%|██████████| 500/500 [00:03<00:00, 150.68it/s]


Noisy: 19797, Clean: 30203
Selected samples: 26120 
Precision: 0.9654 
Recall: 0.8349 
Specificity: 0.9544
Accuracy: 0.8822 
Fraction of clean samples/selected samples: 0.9654
Files already downloaded and verified


  0%|          | 0/500 [00:00<?, ?it/s]

Train: 50000 Val: 0


100%|██████████| 500/500 [00:23<00:00, 21.69it/s]
100%|██████████| 500/500 [00:03<00:00, 150.64it/s]


Noisy: 39597, Clean: 10403
Selected samples: 10041 
Precision: 0.6502 
Recall: 0.6276 
Specificity: 0.9113
Accuracy: 0.8523 
Fraction of clean samples/selected samples: 0.6502
Files already downloaded and verified


  0%|          | 0/500 [00:00<?, ?it/s]

Train: 50000 Val: 0


100%|██████████| 500/500 [00:23<00:00, 21.24it/s]
100%|██████████| 500/500 [00:03<00:00, 150.07it/s]


Noisy: 14857, Clean: 35143
Selected samples: 32346 
Precision: 0.9798 
Recall: 0.9019 
Specificity: 0.9561
Accuracy: 0.918 
Fraction of clean samples/selected samples: 0.9798
Files already downloaded and verified


  0%|          | 0/500 [00:00<?, ?it/s]

Train: 50000 Val: 0


100%|██████████| 500/500 [00:23<00:00, 20.99it/s]
100%|██████████| 500/500 [00:03<00:00, 150.50it/s]


Noisy: 19856, Clean: 30144
Selected samples: 28077 
Precision: 0.9499 
Recall: 0.8848 
Specificity: 0.9291
Accuracy: 0.9024 
Fraction of clean samples/selected samples: 0.9499
Files already downloaded and verified
Train: 50000 Val: 0


100%|██████████| 500/500 [00:23<00:00, 21.54it/s]
100%|██████████| 500/500 [00:03<00:00, 152.38it/s]


Noisy: 29703, Clean: 20297
Selected samples: 16364 
Precision: 0.9282 
Recall: 0.7483 
Specificity: 0.9604
Accuracy: 0.8743 
Fraction of clean samples/selected samples: 0.9282
Files already downloaded and verified
Train: 50000 Val: 0


100%|██████████| 500/500 [00:22<00:00, 22.11it/s]
100%|██████████| 500/500 [00:03<00:00, 153.04it/s]


Noisy: 19797, Clean: 30203
Selected samples: 26860 
Precision: 0.9783 
Recall: 0.87 
Specificity: 0.9706
Accuracy: 0.9098 
Fraction of clean samples/selected samples: 0.9783
Files already downloaded and verified
Train: 50000 Val: 0


100%|██████████| 500/500 [00:22<00:00, 22.23it/s]
100%|██████████| 500/500 [00:03<00:00, 149.75it/s]


Noisy: 39597, Clean: 10403
Selected samples: 8278 
Precision: 0.6827 
Recall: 0.5432 
Specificity: 0.9337
Accuracy: 0.8524 
Fraction of clean samples/selected samples: 0.6827
Files already downloaded and verified


  0%|          | 0/500 [00:00<?, ?it/s]

Train: 50000 Val: 0


100%|██████████| 500/500 [00:22<00:00, 22.16it/s]
100%|██████████| 500/500 [00:03<00:00, 149.52it/s]


Noisy: 29703, Clean: 20297
Selected samples: 24337 
Precision: 0.7904 
Recall: 0.9477 
Specificity: 0.8283
Accuracy: 0.8768 
Fraction of clean samples/selected samples: 0.7904
Files already downloaded and verified


  0%|          | 0/500 [00:00<?, ?it/s]

Train: 50000 Val: 0


100%|██████████| 500/500 [00:23<00:00, 21.09it/s]
100%|██████████| 500/500 [00:03<00:00, 151.39it/s]


Noisy: 4969, Clean: 45031
Selected samples: 44391 
Precision: 0.9799 
Recall: 0.966 
Specificity: 0.8205
Accuracy: 0.9515 
Fraction of clean samples/selected samples: 0.9799
Files already downloaded and verified


  0%|          | 0/500 [00:00<?, ?it/s]

Train: 50000 Val: 0


100%|██████████| 500/500 [00:12<00:00, 41.60it/s]
100%|██████████| 500/500 [00:03<00:00, 163.23it/s]


Noisy: 9905, Clean: 40095
Selected samples: 36048 
Precision: 0.9954 
Recall: 0.8949 
Specificity: 0.9833
Accuracy: 0.9125 
Fraction of clean samples/selected samples: 0.9954
Files already downloaded and verified


  0%|          | 0/500 [00:00<?, ?it/s]

Train: 50000 Val: 0


100%|██████████| 500/500 [00:12<00:00, 40.56it/s]
100%|██████████| 500/500 [00:02<00:00, 167.33it/s]


Noisy: 39597, Clean: 10403
Selected samples: 9677 
Precision: 0.6027 
Recall: 0.5606 
Specificity: 0.9029
Accuracy: 0.8317 
Fraction of clean samples/selected samples: 0.6027
Files already downloaded and verified


  0%|          | 0/500 [00:00<?, ?it/s]

Train: 50000 Val: 0


100%|██████████| 500/500 [00:12<00:00, 40.60it/s]
100%|██████████| 500/500 [00:03<00:00, 165.12it/s]


Noisy: 9993, Clean: 40007
Selected samples: 36696 
Precision: 0.9914 
Recall: 0.9093 
Specificity: 0.9684
Accuracy: 0.9211 
Fraction of clean samples/selected samples: 0.9914
Files already downloaded and verified


  0%|          | 0/500 [00:00<?, ?it/s]

Train: 50000 Val: 0


100%|██████████| 500/500 [00:12<00:00, 40.58it/s]
100%|██████████| 500/500 [00:03<00:00, 165.89it/s]

Noisy: 9905, Clean: 40095
Selected samples: 39616 
Precision: 0.9883 
Recall: 0.9765 
Specificity: 0.9532
Accuracy: 0.9719 
Fraction of clean samples/selected samples: 0.9883





Unnamed: 0,noisetype,noiserate,lossfunction,selected,precision,recall,specificity,accuracy
0,True,0.2,cce,39490.0,0.9473,0.935,0.7917,0.9064
1,True,0.2,sce,40097.0,0.9176,0.9197,0.6696,0.8697
2,False,0.2,sce,48418.0,0.8261,0.9976,0.1498,0.8296
3,True,0.3,gce,37514.0,0.8889,0.9489,0.7196,0.8808
4,True,0.4,sce,37394.0,0.6437,0.7985,0.329,0.612
5,True,0.3,sce,37479.0,0.8188,0.8732,0.5428,0.775
6,True,0.4,gce,37831.0,0.721,0.9049,0.4685,0.7316
7,False,0.6,elr,23634.0,0.7678,0.894,0.8152,0.8472
8,False,0.8,gce,18675.0,0.4848,0.8702,0.757,0.7806
9,True,0.1,elr,41410.0,0.9974,0.9172,0.9787,0.9233
