In [35]:
####################################################################
# Scenario0: static하게 앙상블
# 모든 컴비네이션으로 경우의 수를 완탐해보고 마지막 출구보다 성능 좋은것 유무
####################################################################
# Scenario1: Entropy vs Temperature Scaling + Entropy 
# make dynamic ensemble of models which entropy is less than threshold
####################################################################
# Scenario2: MC Dropout -> find confident EE -> static ensemble 
#step1: make MC Dropout model
#step2: find confident EE from experiment
#step3: sum of softmax vector of each good model(under threshold) -> final inference from softmax vector sum
####################################################################
# Scenario3: train new block to choose which exit to inference 
# (JUST SCENARIO, NOT VERIFIED)
####################################################################

##### 모델 로드 및 캐시 저장

In [36]:
import torch
import torch.nn.functional as F
from torchvision import datasets
from tqdm import tqdm
from itertools import combinations
import numpy as np
from scipy.stats import entropy
from collections import defaultdict
from temperature_scaling import TemperatureScaling
from matrix_scaling import MatrixScaling
from Dloaders import Dloaders

IMG_SIZE = 224
device = 'cuda' if torch.cuda.is_available() else 'cpu'
dataset_name = {'cifar10':datasets.CIFAR10, 'cifar100':datasets.CIFAR100,'imagenet':None}
dataset_outdim = {'cifar10':10, 'cifar100':100,'imagenet':1000}
################ 0. Hyperparameters ##########################
batch_size = 1024
data_choice='cifar10'
model_choice = 'resnet' # ['vit', 'resnet']
exit_num=11
cache_file_path = f'cache_result_{model_choice}_{data_choice}.pt'
precision_each_label = f'cache_precision_each_label_{data_choice}.pt'
choosen_exit_distribution = f'cache_choosen_exit_distribution_{data_choice}.pt'
exp_result_path = f'cache_exp_{model_choice}_{data_choice}.pt'
t_scalers_path = f"models/{model_choice}/{data_choice}/temperature_scaler.pth"
m_scalers_path = f"models/{model_choice}/{data_choice}/matrix_scaler.pth"

exp_save_file = dict()

In [37]:
dloaders=Dloaders(data_choice=data_choice,batch_size=batch_size,IMG_SIZE=IMG_SIZE)
train_loader,test_loader = dloaders.get_loaders()

# 저장한 파일을 다시 불러오기
output_tensor = torch.load(cache_file_path).to(device)
_, test_dataset = dloaders.get_datasets()
labels_list = test_dataset.targets
labels=torch.tensor(labels_list).to(device)
# 데이터 확인
print(output_tensor.shape)  # <class 'list'>
print(output_tensor[0].shape)  # torch.Size([10000, 100])

Files already downloaded and verified
Files already downloaded and verified
torch.Size([11, 10000, 10])
torch.Size([10000, 10])


  output_tensor = torch.load(cache_file_path).to(device)


##### 이전 출구들 전부를 앙상블

In [38]:
prefix_sum_ensemble_acc = []
for i in range(exit_num):
    new_output_list = output_tensor[:i + 1,:,:]
    ensemble_logits = new_output_list.sum(axis=0)
    ensemble_probabilities = F.softmax(ensemble_logits, dim=1)
    _, predicted_labels = torch.max(ensemble_probabilities, dim=1)
    correct_predictions = (predicted_labels == labels).sum().item()
    total_predictions = labels.size(0)
    accuracy = correct_predictions / total_predictions * 100
    prefix_sum_ensemble_acc.append(round(accuracy,2))
exp_save_file['prefix_sum_ensemble_acc'] = prefix_sum_ensemble_acc
print(*prefix_sum_ensemble_acc, sep = ', ')

64.47, 71.15, 80.36, 84.39, 87.08, 90.14, 92.5, 94.14, 95.54, 96.18, 96.53


##### 모든 경우의 수에 대해서 소프트맥스 전에 정적 앙상블 하고 소프트맥스 진행

In [39]:
combinations_list = []
for i in range(2, exit_num+1):combinations_list.extend(combinations(range(exit_num), i))

In [40]:
# sum before softmax (best acc: 97.63)
bef_softmax = dict()
for choosed_exits in combinations_list:
    new_output_list = output_tensor[choosed_exits,:,:]
    ensemble_logits = new_output_list.sum(axis=0)
    ensemble_probabilities = F.softmax(ensemble_logits, dim=1)
    _, predicted_labels = torch.max(ensemble_probabilities, dim=1)
    correct_predictions = (predicted_labels == labels).sum().item()
    total_predictions = labels.size(0)
    accuracy = correct_predictions / total_predictions * 100
    bef_softmax[choosed_exits]=accuracy

tmp = sorted(list(bef_softmax.items()), key=lambda x: x[1], reverse=True)
Static = tmp[0][1]
print(tmp)

[((1, 10), 97.23), ((0, 10), 97.21), ((2, 10), 97.21), ((3, 10), 97.17), ((7, 10), 97.07000000000001), ((2, 3, 8, 10), 97.04), ((5, 10), 97.02), ((3, 5, 8, 10), 97.0), ((4, 8, 10), 96.98), ((3, 8, 10), 96.97), ((5, 8, 10), 96.97), ((0, 3, 8, 10), 96.97), ((8, 10), 96.96000000000001), ((6, 10), 96.95), ((2, 7, 8, 10), 96.95), ((7, 8, 10), 96.94), ((2, 8, 10), 96.93), ((0, 2, 8, 10), 96.92), ((2, 5, 8, 10), 96.92), ((4, 10), 96.91), ((1, 8, 10), 96.91), ((2, 7, 10), 96.91), ((1, 3, 8, 10), 96.91), ((1, 5, 8, 10), 96.91), ((0, 4, 8, 10), 96.89999999999999), ((5, 7, 8, 10), 96.89999999999999), ((0, 5, 8, 10), 96.89), ((1, 2, 8, 10), 96.89), ((3, 6, 8, 10), 96.89), ((3, 7, 8, 10), 96.89), ((4, 5, 8, 10), 96.89), ((4, 7, 8, 10), 96.89), ((6, 8, 10), 96.88), ((1, 6, 8, 10), 96.88), ((0, 2, 3, 8, 10), 96.88), ((1, 2, 7, 8, 10), 96.88), ((0, 8, 10), 96.87), ((0, 7, 8, 10), 96.87), ((1, 7, 8, 10), 96.87), ((2, 3, 7, 8, 10), 96.87), ((3, 7, 10), 96.86), ((1, 4, 8, 10), 96.86), ((2, 4, 8, 10), 96.

##### 모든 경우의 수에 대해서 소프트맥스 하고 정적 앙상블 하고 맥스값으로 추론 진행

In [41]:
# sum after softmax (best acc: 97.63)
aft_softmax = dict()
for choosed_exits in combinations_list:
    new_output_list = output_tensor[choosed_exits,:,:]
    softmax_vector_list = F.softmax(new_output_list, dim=2)
    ensemble_probabilities = softmax_vector_list.sum(axis=0)
    _, predicted_labels = torch.max(ensemble_probabilities, dim=1)
    correct_predictions = (predicted_labels == labels).sum().item()
    total_predictions = labels.size(0)
    accuracy = correct_predictions / total_predictions * 100
    aft_softmax[choosed_exits]=accuracy

tmp = sorted(list(aft_softmax.items()), key=lambda x: x[1], reverse=True)
Static = max(Static, tmp[0][1])
print(tmp)

[((1, 10), 97.42), ((0, 10), 97.41), ((3, 10), 97.28999999999999), ((2, 10), 97.22), ((5, 10), 97.11999999999999), ((7, 10), 97.08), ((6, 10), 97.05), ((8, 10), 97.04), ((3, 8, 10), 97.04), ((5, 8, 10), 97.0), ((7, 8, 10), 96.99), ((5, 8, 9, 10), 96.99), ((7, 8, 9, 10), 96.97), ((3, 5, 7, 8, 9, 10), 96.96000000000001), ((2, 8, 9, 10), 96.95), ((3, 7, 8, 10), 96.95), ((3, 8, 9, 10), 96.95), ((4, 10), 96.94), ((3, 6, 7, 8, 9, 10), 96.92), ((5, 7, 8, 10), 96.91), ((5, 7, 8, 9, 10), 96.91), ((1, 8, 10), 96.89999999999999), ((0, 8, 9, 10), 96.89999999999999), ((2, 6, 8, 10), 96.89999999999999), ((3, 6, 8, 10), 96.89999999999999), ((0, 7, 8, 10), 96.89), ((1, 8, 9, 10), 96.89), ((2, 7, 8, 10), 96.89), ((4, 7, 8, 10), 96.89), ((6, 7, 8, 10), 96.89), ((3, 5, 6, 8, 9, 10), 96.88), ((0, 8, 10), 96.87), ((8, 9, 10), 96.87), ((3, 5, 8, 10), 96.87), ((2, 5, 8, 9, 10), 96.87), ((3, 7, 8, 9, 10), 96.87), ((2, 8, 10), 96.86), ((0, 5, 8, 10), 96.86), ((4, 8, 9, 10), 96.86), ((3, 5, 8, 9, 10), 96.86), (

In [42]:
exp_save_file['static_ensemble_acc'] = Static

##### 동적 앙상블: 모든 exit에서 특정 엔트로피 이하인 것들을 소프트맥스 전에 앙상블 하고 소프트맥스 진행 한 샘플에서 임계값보다 작은게 없으면 제일 뒤의 블록 선택만으로 추론

In [43]:
# 모든 exit에서 특정 엔트로피 이하인 것들을 모아서 앙상블을 해본다.
# case 2가지 소프트맥스 전에 합칠지 후에 합칠지;;

step_range = 10000
aft_sftmx = F.softmax(output_tensor,dim=2)
entropy_array= torch.tensor(entropy(aft_sftmx.to('cpu'), base=exit_num, axis=2))

min_entropy = entropy_array.min()
median_entropy = torch.median(torch.tensor(entropy_array))

step_size = (median_entropy - min_entropy) / step_range
d_bef_softmax=(0,0);d_aft_softmax=(0,0)
for mul in tqdm(range(step_range)):
    threshold = min_entropy + mul * step_size
    mask = (entropy_array[:, :] <= threshold).to(device)
    
    column_sums = mask.sum(dim=0)  
    zero_columns = (column_sums == 0)

    last_row = torch.zeros_like(mask)
    last_row[-1, :] = 1
    mask = mask | (last_row & zero_columns)

    mask.unsqueeze_(dim=2)
    masked_array=mask*output_tensor

    # sum before softmax (best acc: 87.75)
    ensemble_logits = masked_array.sum(axis=0)
    ensemble_probabilities = F.softmax(ensemble_logits, dim=1)
    _, predicted_labels = torch.max(ensemble_probabilities, dim=1)
    correct_predictions = (predicted_labels == labels).sum().item()
    total_predictions = labels.size(0)
    accuracy = correct_predictions / total_predictions * 100
    d_bef_softmax=max(d_bef_softmax,(threshold,accuracy),key=lambda x:x[1])

    # sum after softmax (best acc: 87.75)
    softmax_vector_list = F.softmax(masked_array, dim=2)
    ensemble_probabilities = softmax_vector_list.sum(axis=0)
    _, predicted_labels = torch.max(ensemble_probabilities, dim=1)
    correct_predictions = (predicted_labels == labels).sum().item()
    total_predictions = labels.size(0)
    accuracy = correct_predictions / total_predictions * 100
    d_aft_softmax=max(d_aft_softmax,(threshold,accuracy),key=lambda x:x[1])
print(f"d_bef_softmax: {d_bef_softmax}\nd_aft_softmax: {d_aft_softmax}")
Dynamic_No_Scaling = max(d_bef_softmax[1], d_aft_softmax[1])

  median_entropy = torch.median(torch.tensor(entropy_array))
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10000/10000 [00:04<00:00, 2190.88it/s]

d_bef_softmax: (tensor(0.), 97.52)
d_aft_softmax: (tensor(0.), 97.52)





##### 동적 앙상블: 모든 exit에서 특정 엔트로피 이하인 것들을 소프트맥스 전에 앙상블 하고 소프트맥스 진행 한 샘플에서 임계값보다 작은게 없으면 제일 엔트로피 작은 것을 선택해서 그것의 선택만으로 추론

In [44]:
# 모든 exit에서 특정 엔트로피 이하인 것들을 모아서 앙상블을 해본다.
# case 2가지 소프트맥스 전에 합칠지 후에 합칠지;;

step_range = 10000
aft_sftmx = F.softmax(output_tensor,dim=2)
entropy_array= torch.tensor(entropy(aft_sftmx.to('cpu'), base=exit_num, axis=2)).to(device)

min_entropy = entropy_array.min()
median_entropy = torch.median(torch.tensor(entropy_array))

step_size = (median_entropy - min_entropy) / step_range
d_bef_softmax=(0,0);d_aft_softmax=(0,0)

########################################
# 각 열의 최소값 인덱스 찾기
min_indices = entropy_array.argmin(dim=0)
# 2D 행렬 생성 및 초기화 (모든 값 0)
result_matrix = torch.zeros_like(entropy_array.int())
# 최소값 인덱스 위치에 1 설정
for col, row in enumerate(min_indices):
    result_matrix[row, col] = 1
########################################

for mul in tqdm(range(step_range)):
    threshold = min_entropy + mul * step_size
    mask = (entropy_array[:, :] <= threshold).to(device)
    
    column_sums = mask.sum(dim=0)
    zero_columns = (column_sums == 0)
    mask = mask | (result_matrix & zero_columns)

    mask.unsqueeze_(dim=2)
    masked_array=mask*output_tensor

    # sum before softmax (best acc: 87.75)
    ensemble_logits = masked_array.sum(axis=0)
    ensemble_probabilities = F.softmax(ensemble_logits, dim=1)
    _, predicted_labels = torch.max(ensemble_probabilities, dim=1)
    correct_predictions = (predicted_labels == labels).sum().item()
    total_predictions = labels.size(0)
    accuracy = correct_predictions / total_predictions * 100
    d_bef_softmax=max(d_bef_softmax,(threshold,accuracy),key=lambda x:x[1])

    # sum after softmax (best acc: 87.75)
    softmax_vector_list = F.softmax(masked_array, dim=2)
    ensemble_probabilities = softmax_vector_list.sum(axis=0)
    _, predicted_labels = torch.max(ensemble_probabilities, dim=1)
    correct_predictions = (predicted_labels == labels).sum().item()
    total_predictions = labels.size(0)
    accuracy = correct_predictions / total_predictions * 100
    d_aft_softmax=max(d_aft_softmax,(threshold,accuracy),key=lambda x:x[1])
print(f"d_bef_softmax: {d_bef_softmax}\nd_aft_softmax: {d_aft_softmax}")

  median_entropy = torch.median(torch.tensor(entropy_array))
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10000/10000 [00:03<00:00, 3128.78it/s]

d_bef_softmax: (tensor(0.0014, device='cuda:0'), 96.50999999999999)
d_aft_softmax: (tensor(0.0014, device='cuda:0'), 96.46000000000001)





In [45]:
exp_save_file['Dynamic_No_Scaling'] = Dynamic_No_Scaling

##### 동적 앙상블할 때 각 샘플들마다 어떤 출구들로 앙상블하는지 찍기 //시각화는 나중에

In [46]:
threshold = d_bef_softmax[0]
mask = (entropy_array[:, :] <= threshold).to(device)

indices = mask.nonzero()
column_indices = defaultdict(list)
for row, col in indices:
    column_indices[col.item()].append(row.item())

# 각 샘플별로 앙상블할 exit들의 인덱스를 저장
ensemble_exits_per_sample =[[] for _ in range(len(labels_list))]
for col in sorted(column_indices.keys()):
    ensemble_exits_per_sample[col] = column_indices[col]

USE_LAST_EXIT = 1
if USE_LAST_EXIT:
    # 앙상블할때 엔트로피가 임계값 이하인 exit가 하나도 없는 경우 마지막 exit의 추론 결과를 사용하도록 설정
    for i in range(len(labels_list)):
        if len(ensemble_exits_per_sample[i])==0:
            ensemble_exits_per_sample[i].append(exit_num-1) #10
else:
    # 엔트로피가 임계값 이하인 exit가 없는 경우, 가장 작은 엔트로피를 가진 exit 선택
    for i in range(len(labels_list)):
        if len(ensemble_exits_per_sample[i]) == 0:
            # i번째 열(sample)의 모든 엔트로피 값
            column_entropies = entropy_array[:, i]
            # 가장 작은 엔트로피를 가진 exit의 인덱스
            min_entropy_exit = torch.argmin(column_entropies)
            ensemble_exits_per_sample[i].append(min_entropy_exit.item())

exp_save_file[f'choosen_exit_distribution'] = ensemble_exits_per_sample
ensemble_exits_per_sample

[[6, 7, 8, 9, 10],
 [3, 4, 5, 6, 7, 8, 9, 10],
 [6, 7, 8, 9, 10],
 [5, 6, 7, 8, 9, 10],
 [3, 4, 6, 7, 8, 9, 10],
 [2, 3, 5, 6, 7, 8, 9, 10],
 [6, 7, 8, 10],
 [5, 6, 7, 8, 9, 10],
 [4, 5, 6, 7, 8, 9, 10],
 [8, 9],
 [3, 4, 5, 6, 7, 8, 9, 10],
 [2, 4, 5, 6, 7, 8, 9, 10],
 [4, 6, 7, 8, 9, 10],
 [3, 4, 5, 6, 7, 8, 9, 10],
 [2, 3, 4, 5, 6, 7, 8, 9, 10],
 [6, 8, 9],
 [6, 7, 8, 9],
 [4, 5, 6, 7, 8, 9, 10],
 [4, 5, 6, 7, 8, 9, 10],
 [5, 6, 7, 8, 9, 10],
 [7, 9, 10],
 [5, 7, 8, 9, 10],
 [5, 6, 7, 8, 9, 10],
 [2, 3, 4, 5, 6, 7, 8, 9, 10],
 [6, 7, 8, 9, 10],
 [7, 8, 9],
 [6, 7, 8, 9, 10],
 [4, 5, 6, 7, 8, 9, 10],
 [2, 3, 4, 5, 6, 7, 8, 9, 10],
 [3, 4, 5, 6, 7, 8, 9, 10],
 [6, 7, 8, 9, 10],
 [5, 8, 9, 10],
 [8],
 [6, 7, 8, 9, 10],
 [2, 3, 4, 5, 6, 7, 8, 9, 10],
 [7, 8, 9, 10],
 [6, 7, 8, 9, 10],
 [7, 9],
 [2, 3, 4, 5, 6, 7, 8, 9, 10],
 [2, 3, 4, 5, 6, 7, 8, 9, 10],
 [6, 8, 9],
 [2, 4, 5, 6, 7, 8, 9, 10],
 [8, 9, 10],
 [4, 5, 6, 7, 8, 9, 10],
 [5, 6, 7, 8, 9, 10],
 [2, 3, 4, 5, 6, 7, 8, 9, 10],
 [7,

##### Temperature Scaling 한 각 출구들에서의 T 값

In [47]:

t_scalers = torch.load(t_scalers_path)
ts_output_tensor = torch.zeros_like(output_tensor)
print(*[f"{round(t_scalers[i].temperature.item(),3)}" for i in range(exit_num)])
with torch.no_grad():
    for i in range(exit_num):
        t_scalers[i].to(device)
        t_scalers[i].eval()
        ts_output_tensor[i] = t_scalers[i](output_tensor[i])

1.037 1.007 1.275 1.354 1.545 1.634 1.912 2.053 2.154 2.28 1.493


  t_scalers = torch.load(t_scalers_path)


##### TS동적 앙상블: 모든 exit에서 특정 엔트로피 이하인 것들을 앙상블 하고 소프트맥스 진행하고 맥스값으로 추론

In [48]:
# 모든 exit에서 특정 엔트로피 이하인 것들을 모아서 앙상블을 해본다.
# case 2가지 소프트맥스 전에 합칠지 후에 합칠지;;

step_range = 10000
aft_sftmx = F.softmax(ts_output_tensor,dim=2)
entropy_array= torch.tensor(entropy(aft_sftmx.to('cpu'), base=exit_num, axis=2))

min_entropy = entropy_array.min()
median_entropy = torch.median(torch.tensor(entropy_array))

step_size = (median_entropy - min_entropy) / step_range
d_bef_softmax=(0,0);d_aft_softmax=(0,0)
for mul in tqdm(range(step_range)):
    threshold = min_entropy + mul * step_size
    mask = (entropy_array[:, :] <= threshold).to(device)
    
    column_sums = mask.sum(dim=0)  
    zero_columns = (column_sums == 0)

    last_row = torch.zeros_like(mask)
    last_row[-1, :] = 1
    mask = mask | (last_row & zero_columns)

    mask.unsqueeze_(dim=2)
    masked_array=mask*output_tensor

    # sum before softmax (best acc: 87.75)
    ensemble_logits = masked_array.sum(axis=0)
    ensemble_probabilities = F.softmax(ensemble_logits, dim=1)
    _, predicted_labels = torch.max(ensemble_probabilities, dim=1)
    correct_predictions = (predicted_labels == labels).sum().item()
    total_predictions = labels.size(0)
    accuracy = correct_predictions / total_predictions * 100
    d_bef_softmax=max(d_bef_softmax,(threshold,accuracy),key=lambda x:x[1])

    # sum after softmax (best acc: 87.75)
    softmax_vector_list = F.softmax(masked_array, dim=2)
    ensemble_probabilities = softmax_vector_list.sum(axis=0)
    _, predicted_labels = torch.max(ensemble_probabilities, dim=1)
    correct_predictions = (predicted_labels == labels).sum().item()
    total_predictions = labels.size(0)
    accuracy = correct_predictions / total_predictions * 100
    d_aft_softmax=max(d_aft_softmax,(threshold,accuracy),key=lambda x:x[1])
print(f"d_bef_softmax: {d_bef_softmax}\nd_aft_softmax: {d_aft_softmax}")
Dynamic_TS_Scaling = 0
Dynamic_TS_Scaling = max(Dynamic_TS_Scaling, d_bef_softmax[1], d_aft_softmax[1])

  median_entropy = torch.median(torch.tensor(entropy_array))
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10000/10000 [00:04<00:00, 2205.14it/s]

d_bef_softmax: (tensor(8.8418e-28), 97.52)
d_aft_softmax: (tensor(8.8418e-28), 97.52)





##### TS동적 앙상블: 모든 exit에서 특정 엔트로피 이하인 것들을 앙상블 하고 소프트맥스 진행하고 맥스값으로 추론 임계값 이하가 없으면 제일 엔트로피 낮은거

In [49]:
# 모든 exit에서 특정 엔트로피 이하인 것들을 모아서 앙상블을 해본다.
# case 2가지 소프트맥스 전에 합칠지 후에 합칠지;;

step_range = 10000
aft_sftmx = F.softmax(ts_output_tensor,dim=2)
entropy_array= torch.tensor(entropy(aft_sftmx.to('cpu'), base=exit_num, axis=2)).to(device)

min_entropy = entropy_array.min()
median_entropy = torch.median(torch.tensor(entropy_array))

step_size = (median_entropy - min_entropy) / step_range
d_bef_softmax=(0,0);d_aft_softmax=(0,0)

########################################
# 각 열의 최소값 인덱스 찾기
min_indices = entropy_array.argmin(dim=0)
# 2D 행렬 생성 및 초기화 (모든 값 0)
result_matrix = torch.zeros_like(entropy_array.int())
# 최소값 인덱스 위치에 1 설정
for col, row in enumerate(min_indices):
    result_matrix[row, col] = 1
########################################

for mul in tqdm(range(step_range)):
    threshold = min_entropy + mul * step_size
    mask = (entropy_array[:, :] <= threshold).to(device)
    
    column_sums = mask.sum(dim=0)
    zero_columns = (column_sums == 0)
    mask = mask | (result_matrix & zero_columns)

    mask.unsqueeze_(dim=2)
    masked_array=mask*output_tensor

    # sum before softmax (best acc: 87.75)
    ensemble_logits = masked_array.sum(axis=0)
    ensemble_probabilities = F.softmax(ensemble_logits, dim=1)
    _, predicted_labels = torch.max(ensemble_probabilities, dim=1)
    correct_predictions = (predicted_labels == labels).sum().item()
    total_predictions = labels.size(0)
    accuracy = correct_predictions / total_predictions * 100
    d_bef_softmax=max(d_bef_softmax,(threshold,accuracy),key=lambda x:x[1])

    # sum after softmax (best acc: 87.75)
    softmax_vector_list = F.softmax(masked_array, dim=2)
    ensemble_probabilities = softmax_vector_list.sum(axis=0)
    _, predicted_labels = torch.max(ensemble_probabilities, dim=1)
    correct_predictions = (predicted_labels == labels).sum().item()
    total_predictions = labels.size(0)
    accuracy = correct_predictions / total_predictions * 100
    d_aft_softmax=max(d_aft_softmax,(threshold,accuracy),key=lambda x:x[1])
print(f"d_bef_softmax: {d_bef_softmax}\nd_aft_softmax: {d_aft_softmax}")
Dynamic_TS_Scaling = max(Dynamic_TS_Scaling, d_bef_softmax[1], d_aft_softmax[1])

  median_entropy = torch.median(torch.tensor(entropy_array))
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10000/10000 [00:03<00:00, 3154.61it/s]

d_bef_softmax: (tensor(0.0257, device='cuda:0'), 96.61999999999999)
d_aft_softmax: (tensor(0.0257, device='cuda:0'), 96.61)





In [50]:
exp_save_file["Dynamic_TS_Scaling"] = Dynamic_TS_Scaling

##### 각 출구마다 라벨 별로 Precision 확인 후 각 출구의 mAP 확인

In [51]:
exits_precision = np.array([[0.0] * dataset_outdim[data_choice] for _ in range(exit_num)])
for exit in [i for i in range(exit_num)]:
    label_correct = defaultdict(int)
    label_total = defaultdict(int)
    
    new_output_list = output_tensor[exit]
    logits = new_output_list
    probabilities = F.softmax(logits, dim=1)
    _, predicted = torch.max(probabilities, dim=1)
    for label, pred in zip(labels, predicted):
            if label == pred:
                label_correct[label.item()] += 1
            label_total[label.item()] += 1
    
    label_accuracy = {label: round(label_correct[label] / label_total[label] * 100, 2) for label in range(dataset_outdim[data_choice])}
    exits_precision[exit] = [label_accuracy[label] for label in range(dataset_outdim[data_choice])]
exp_save_file[f'exits_precision'] = exits_precision
exits_precision

array([[71.4, 69.5, 41.5, 56.1, 69.2, 45.7, 76.1, 64.6, 76.8, 73.8],
       [75. , 83.2, 64.9, 49.1, 64.3, 65. , 81.5, 76.1, 82.4, 80.9],
       [83.2, 90.4, 72.2, 67.5, 76.9, 77. , 88.4, 84. , 88.4, 86.5],
       [88.8, 92.7, 76.3, 72.8, 82.4, 76. , 90.5, 84. , 90.7, 88.3],
       [89. , 93.7, 77.7, 79.4, 85.6, 73.2, 91.5, 87.6, 93.6, 88.8],
       [91.2, 95.3, 87.5, 80.7, 88.1, 84.6, 94.8, 91.9, 94.5, 93.8],
       [94.7, 95.8, 89.4, 89.3, 93. , 83.7, 96.5, 93.5, 95.4, 94.7],
       [96.7, 96.6, 92.9, 90.6, 94. , 89.6, 96.6, 95.1, 96.7, 94.3],
       [95.7, 97.6, 94.9, 92.4, 94.8, 93.3, 97.8, 97.8, 97.2, 96.2],
       [96.3, 96.9, 94.9, 90.4, 93.8, 92. , 95.9, 97.3, 97.9, 95.4],
       [98. , 98.7, 97. , 94.9, 97.7, 95.8, 99. , 98. , 98.9, 97.2]])

In [52]:
m_scalers = torch.load(m_scalers_path)
ms_output_tensor = torch.zeros_like(output_tensor)
with torch.no_grad():
    for i in range(exit_num):
        m_scalers[i].to(device)
        m_scalers[i].eval()
        ms_output_tensor[i] = m_scalers[i](output_tensor[i])

  m_scalers = torch.load(m_scalers_path)


In [53]:
# 모든 exit에서 특정 엔트로피 이하인 것들을 모아서 앙상블을 해본다.
# case 2가지 소프트맥스 전에 합칠지 후에 합칠지;;

step_range = 10000
aft_sftmx = F.softmax(ms_output_tensor,dim=2)
entropy_array= torch.tensor(entropy(aft_sftmx.to('cpu'), base=exit_num, axis=2))

min_entropy = entropy_array.min()
median_entropy = torch.median(torch.tensor(entropy_array))

step_size = (median_entropy - min_entropy) / step_range
d_bef_softmax=(0,0);d_aft_softmax=(0,0)
for mul in tqdm(range(step_range)):
    threshold = min_entropy + mul * step_size
    mask = (entropy_array[:, :] <= threshold).to(device)
    
    column_sums = mask.sum(dim=0)  
    zero_columns = (column_sums == 0)

    last_row = torch.zeros_like(mask)
    last_row[-1, :] = 1
    mask = mask | (last_row & zero_columns)

    mask.unsqueeze_(dim=2)
    masked_array=mask*output_tensor

    # sum before softmax (best acc: 87.75)
    ensemble_logits = masked_array.sum(axis=0)
    ensemble_probabilities = F.softmax(ensemble_logits, dim=1)
    _, predicted_labels = torch.max(ensemble_probabilities, dim=1)
    correct_predictions = (predicted_labels == labels).sum().item()
    total_predictions = labels.size(0)
    accuracy = correct_predictions / total_predictions * 100
    d_bef_softmax=max(d_bef_softmax,(threshold,accuracy),key=lambda x:x[1])

    # sum after softmax (best acc: 87.75)
    softmax_vector_list = F.softmax(masked_array, dim=2)
    ensemble_probabilities = softmax_vector_list.sum(axis=0)
    _, predicted_labels = torch.max(ensemble_probabilities, dim=1)
    correct_predictions = (predicted_labels == labels).sum().item()
    total_predictions = labels.size(0)
    accuracy = correct_predictions / total_predictions * 100
    d_aft_softmax=max(d_aft_softmax,(threshold,accuracy),key=lambda x:x[1])
Dynamic_MS_Scaling = 0
Dynamic_MS_Scaling = max(Dynamic_MS_Scaling, d_bef_softmax[1], d_aft_softmax[1])

  median_entropy = torch.median(torch.tensor(entropy_array))
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10000/10000 [00:04<00:00, 2126.92it/s]


In [54]:
exp_save_file["Dynamic_MS_Scaling"] = Dynamic_MS_Scaling

In [55]:
torch.save(exp_save_file, exp_result_path)