In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from config import config
import datasets
import feature_extractors
import torch
from esvit_swin import extract_features
import os
from sklearn.model_selection import train_test_split
from sklearn.metrics import make_scorer
from models import ocsvm, isolation_forest, lof
from evaluate import avg_auc, avg_cs, custom_scoring

In [3]:
cfg = config.get_cfg_defaults()
cfg.merge_from_file('experiments/cifar100_esvit_base.yaml')
cfg.freeze()

In [4]:
model_name = cfg.FEATURE_EXTRACTOR.NAME.lower()
if model_name == 'esvit_swin_base':
    model = feature_extractors.EsVitBase(num_blocks=cfg.FEATURE_EXTRACTOR.NUM_BLOCKS)
elif model_name == 'esvit_swin_tiny':
    model = feature_extractors.EsVitTiny(num_blocks=cfg.FEATURE_EXTRACTOR.NUM_BLOCKS)
else:
    raise NameError('Feature extractor name invalid.')
model.cuda();

=> merge config from esvit/experiments/imagenet/swin/swin_base_patch4_window14_224.yaml


  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


Take key teacher in provided checkpoint dict
Pretrained weights found at checkpoints/swin_base_w14/checkpoint_best.pth and loaded with msg: _IncompatibleKeys(missing_keys=[], unexpected_keys=['head_dense.mlp.0.weight', 'head_dense.mlp.0.bias', 'head_dense.mlp.2.weight', 'head_dense.mlp.2.bias', 'head_dense.mlp.4.weight', 'head_dense.mlp.4.bias', 'head_dense.last_layer.weight_g', 'head_dense.last_layer.weight_v', 'head.mlp.0.weight', 'head.mlp.0.bias', 'head.mlp.2.weight', 'head.mlp.2.bias', 'head.mlp.4.weight', 'head.mlp.4.bias', 'head.last_layer.weight_g', 'head.last_layer.weight_v'])


In [5]:
dataset_name = cfg.DATASET.NAME.lower()
if dataset_name == 'cifar10':
    train_data = datasets.CIFAR10(root='data/', train=True)
    test_data = datasets.CIFAR10(root='data/', train=False)
elif dataset_name == 'cifar100':
    train_data = datasets.CIFAR100(root='data/', train=True)
    test_data = datasets.CIFAR100(root='data/', train=False)
elif dataset_name == 'fmnist':
    train_data = datasets.FMNIST(root='data/', train=True)
    test_data = datasets.FMNIST(root='data/', train=False)
elif dataset_name == 'catsvsdogs':
    train_data = datasets.CatsVsDogs(root='data/cats_vs_dogs', train=True)
    test_data = datasets.CatsVsDogs(root='data/cats_vs_dogs', train=False)
elif dataset_name == 'bloodcells':
    train_data = datasets.BloodCells(root='data/blood_cells', train=True)
    test_data = datasets.BloodCells(root='data/blood_cells', train=False)
elif dataset_name == 'viewprediction':
    train_data = datasets.ViewPrediction(root='data/view_prediction', train=True)
    test_data = datasets.ViewPrediction(root='data/view_prediction', train=False)
elif dataset_name == 'covid19':
    train_data = datasets.COVID19(root='data/covid19', train=True)
    test_data = datasets.COVID19(root='data/covid19', train=False)
elif dataset_name == 'weatherprediction':
    train_data = datasets.WeatherPrediction(root='data/weather', train=True)
    test_data = datasets.WeatherPrediction(root='data/weather', train=False)
elif dataset_name == 'concretecrack':
    train_data = datasets.ConcreteCrack(root='data/crack', train=True)
    test_data = datasets.ConcreteCrack(root='data/crack', train=False)
elif dataset_name == 'dior':
    train_data = datasets.DIOR(root='data/dior', train=True)
    test_data = datasets.DIOR(root='data/dior', train=False)
else:
    raise NameError('Dataset name invalid')

batch_size = cfg.EXTRACT.BATCH_SIZE
num_workers = cfg.SYSTEM.NUM_WORKERS
pin_memory = cfg.SYSTEM.PIN_MEMORY
num_classes = len(train_data.class_to_idx.keys())
data_loader_train = torch.utils.data.DataLoader(
        train_data,
        batch_size=batch_size,
        num_workers=num_workers,
        pin_memory=True,
        drop_last=False,
        shuffle=False
    )
data_loader_test = torch.utils.data.DataLoader(
        test_data,
        batch_size=batch_size,
        num_workers=num_workers,
        pin_memory=True,
        drop_last=False,
        shuffle=False
    )



Files already downloaded and verified
Files already downloaded and verified


In [6]:
print(len(train_data), len(test_data))

50000 10000


In [7]:
print(train_data.class_to_idx)

{'class_0': 0, 'class_1': 1, 'class_2': 2, 'class_3': 3, 'class_4': 4, 'class_5': 5, 'class_6': 6, 'class_7': 7, 'class_8': 8, 'class_9': 9, 'class_10': 10, 'class_11': 11, 'class_12': 12, 'class_13': 13, 'class_14': 14, 'class_15': 15, 'class_16': 16, 'class_17': 17, 'class_18': 18, 'class_19': 19}


In [10]:
features_path = cfg.SYSTEM.FEATURES_PATH
model_path = model_name + '_last_' + str(cfg.FEATURE_EXTRACTOR.NUM_BLOCKS) + '_blocks'
features_path = os.path.join(features_path, model_path)
prefix_path = os.path.join(features_path, dataset_name)
train_feat_path = prefix_path + '_train.pkl'
test_feat_path = prefix_path + '_test.pkl'

if os.path.exists(train_feat_path):
    print('Loading features...')
    features, labels = torch.load(train_feat_path)
    features_test, labels_test = torch.load(test_feat_path)
else:
    print('Extracting features...')
    features, labels = extract_features(model, data_loader_train)
    features_test, labels_test = extract_features(model, data_loader_test)
    os.makedirs(features_path, exist_ok=True)
    print('Saving features...')
    torch.save([features, labels], train_feat_path)
    torch.save([features_test, labels_test], test_feat_path)

Extracting features...
Saving features...


In [11]:
features_train, features_val, labels_train, labels_val = train_test_split(features.numpy(), labels.numpy(), test_size=0.33, random_state=42)
classes = list(range(num_classes))

In [12]:
if cfg.TRAIN.METRIC == 'roc_auc':
    scoring = 'roc_auc'
elif cfg.TRAIN.METRIC == 'custom':
    cost_model = cfg.TRAIN.COST_MODEL[0]
    scoring = make_scorer(custom_scoring, greater_is_better=False, needs_threshold=True, cost_model=cost_model, sample_weight='balanced')
else:
    raise NameError('Scoring name invalid.')

verbose = cfg.TRAIN.VERBOSE
log = ''
if cfg.TRAIN.SGDOCSVM:
    params = cfg.TRAIN.SGDOCSVM.HYPERPARAMS[0]
    best_params = ocsvm.tune_sgd_ocsvm(classes, [features_train, labels_train], [features_val, labels_val], params=params, verbose=verbose, scoring=scoring)
    classifiers = ocsvm.train_sgd_ocsvm(classes, [features, labels], best_params)
    if scoring == 'roc_auc':
        score, scorings = avg_auc(classifiers, [features_test, labels_test])
    else:
        score, scorings = avg_cs(classifiers, [features_test, labels_test])
    log += 'SGD OCSVM\nHyperparameters:\n' + str(best_params) + '\nMetric: ' + cfg.TRAIN.METRIC + '\nScorings:\n' + str(scorings) + '\nAVG: ' + str(score) + '\n\n'
if cfg.TRAIN.OCSVM:
    best_params = ocsvm.tune_ocsvm(classes, [features_train, labels_train], [features_val, labels_val], verbose=0, scoring=scoring)
    classifiers = ocsvm.train_ocsvm(classes, [features, labels], best_params)
    if scoring == 'roc_auc':
        score, scorings = avg_auc(classifiers, [features_test, labels_test])
    else:
        score, scorings = avg_cs(classifiers, [features_test, labels_test])
    log += 'OCSVM\nHyperparameters:\n' + str(best_params) + '\nMetric: ' + cfg.TRAIN.METRIC + '\nScorings:\n' + str(scorings) + '\nAVG: ' + str(score) + '\n\n'
if cfg.TRAIN.ISOLATION_FOREST:
    best_params = isolation_forest.tune_isolation_forest(classes, [features_train, labels_train], [features_val, labels_val], verbose=0, scoring=scoring)
    classifiers = isolation_forest.train_isolation_forest(classes, [features, labels], best_params)
    if scoring == 'roc_auc':
        score, scorings = avg_auc(classifiers, [features_test, labels_test])
    else:
        score, scorings = avg_cs(classifiers, [features_test, labels_test])
    log += 'Isolation Forest\nHyperparameters:\n' + str(best_params) + '\nMetric: ' + cfg.TRAIN.METRIC + '\nScorings:\n' + str(scorings) + '\nAVG: ' + str(score) + '\n\n'
if cfg.TRAIN.LOF:
    best_params = lof.tune_lof(classes, [features_train, labels_train], [features_val, labels_val], verbose=0, scoring=scoring)
    classifiers = lof.train_lof(classes, [features, labels], best_params)
    if scoring == 'roc_auc':
        score, scorings = avg_auc(classifiers, [features_test, labels_test])
    else:
        score, scorings = avg_cs(classifiers, [features_test, labels_test])
    log += 'Local Outliers Factor\nHyperparameters:\n' + str(best_params) + '\nMetric: ' + cfg.TRAIN.METRIC + '\nScorings:\n' + str(scorings) + '\nAVG: ' + str(score) + '\n\n'

log_path = os.path.join(cfg.SYSTEM.LOG_PATH, model_path)
os.makedirs(log_path, exist_ok=True)
log_path = os.path.join(log_path, dataset_name)
with open(log_path + '.txt', 'w') as f:
    f.write(log)
    f.flush()
    f.close()