In [1]:
%config Completer.use_jedi = False

Perform ensemble evaluation from logits files

## Import stuff and create arguments

In [2]:
import os
import numpy as np
import seaborn as sns
import pandas as pd
import torch
import matplotlib.pyplot as plt

In [3]:
import sys
sys.path.append('../')

from models import model_selector
from utils.data_augmentation import data_augmentation_selector
from utils.datasets import dataset_selector
from utils.neural import *
from utils.metrics import compute_accuracy

In [4]:
class Arguments(object):

    def __init__(self):
        
        self.gpu = 0
        self.seed = 301220201
        self.output_dir = ""
        self.problem_type = "classification"
        self.epochs = -1
        self.dataset = "CIFAR10"
        self.defrost_epoch = -1
        self.batch_size = 128
        self.data_augmentation = "cifar10"
        self.img_size = 32
        self.crop_size = 32
        self.normalization = "statistics"
        self.add_depth = False
        self.model_name = "kuangliu_resnet18"
        self.num_classes = 1
        self.metrics = ['accuracy']
        self.generated_overlays = -1
        self.optimizer = "sgd"
        self.scheduler = "steps"
        self.plateau_metric = None
        self.learning_rate = 0.1
        self.min_lr = 0.0001
        self.max_lr = 0.01
        self.scheduler_steps = [150, 250]
        self.criterion = "ce"
        self.weights_criterion = 1.0
        self.model_checkpoint = None
        self.swa_checkpoint = False
        self.swa_freq = 1
        self.swa_start = 999
        self.swa_lr = 0.256
        self.notify = False
        
args = Arguments()

## Load Test Data

Create data augmentations

In [5]:
_, val_aug = data_augmentation_selector(
    args.data_augmentation, args.img_size, args.crop_size
)

Using CIFAR10 Data Augmentation Combinations


Create test dataloader

In [6]:
test_loader = dataset_selector(None, val_aug, args, is_test=True, data_prefix="..")

In [7]:
test_labels = test_loader.dataset.labels
test_labels = torch.tensor(test_labels).cpu()

## Import test logits

Logits will be stored inside 'logits' directory with subfolders named 'modelX' with the logits for each model

In [8]:
logits_dir = "../logits"

In [9]:
logits_paths = []
for subdir, dirs, files in os.walk(logits_dir):
    for file in files:
        file_path = os.path.join(subdir, file)
        if "test_logits" in file_path:
            logits_paths.append(file_path)

In [11]:
logits_list = []
for lp in logits_paths:
    logits_name = "/".join(lp.split("/")[-2:])
    logits = torch.load(lp).cpu()
    logits_accuracy = compute_accuracy(test_labels, logits)
    print(f"{logits_name}: {logits_accuracy}")
    logits_list.append(logits)

logits_list = torch.stack(logits_list)
print(f"\nlogits_list shape: {logits_list.shape}")

logits_vote = logits_list.sum(dim=0)
logits_vote_accuracy = compute_accuracy(test_labels, logits_vote)
print(f"list logits sum: {logits_vote_accuracy}")

model1/test_logits_model_kuangliu_resnet18_best_accuracy.pt: 0.9483
model2/test_logits_model_kuangliu_resnet18_best_accuracy.pt: 0.9463
model3/test_logits_model_kuangliu_resnet18_best_accuracy.pt: 0.9474
model4/test_logits_model_kuangliu_resnet18_best_accuracy.pt: 0.9486
model5/test_logits_model_kuangliu_resnet18_best_accuracy.pt: 0.9465

logits_list shape: torch.Size([5, 10000, 10])
list logits sum: 0.9561
