In [5]:
from scipy.stats import entropy
import numpy as np

def calculate_avg_entropy(real_probs):
    # check validity: all real probabilities >=0 and sum==1
    if np.all(real_probs>=0) and np.allclose(real_probs.sum(axis=1), 1.0, atol=1e-5):
        print("Valid Real Probability Data")
        # calculate Shannon Entropy
        entropies = entropy(real_probs, axis=1, base=2)
        return entropies.mean(), entropies.std()
    else:
        print("Invalid Real Probability Data, check again!")
        return None    
    
def valid_imagenet(imagenet_real):
    valid_mask = imagenet_real.sum(axis=1) > 0
    return imagenet_real[valid_mask]

In [7]:
from src import load_synthetic_data

# Real Probability for CIFAR10-H and ImageNet-Real
cifar10_h = np.load("C:\\Users\\jiayang\\ipynb\\APS_Thesis\\data\\cifar10h-probs.npy")
imagenet_real = np.load("C:\\Users\\jiayang\\ipynb\\APS_Thesis\\data\\imagenet_count_normalize.npy")
imagenet_real = valid_imagenet(imagenet_real)

synthetic_3_path = "C:\\Users\\jiayang\\ipynb\\APS_Thesis\\data\\synthetic_data\\synthetic_3class_test.pkl"
synthetic_3_NoTemp_path = "C:\\Users\\jiayang\\ipynb\\APS_Thesis\\data\\synthetic_data\\synthetic_3class_NoTemp_test.pkl"
synthetic_5_path = "C:\\Users\\jiayang\\ipynb\\APS_Thesis\\data\\synthetic_data\\synthetic_5class_test.pkl"
synthetic_5_NoTemp_path = "C:\\Users\\jiayang\\ipynb\\APS_Thesis\\data\\synthetic_data\\synthetic_5class_NoTemp_test.pkl"
synthetic_10_path = "C:\\Users\\jiayang\\ipynb\\APS_Thesis\\data\\synthetic_data\\synthetic_10class_test.pkl"
synthetic_10_NoTemp_path = "C:\\Users\\jiayang\\ipynb\\APS_Thesis\\data\\synthetic_data\\synthetic_10class_NoTemp_test.pkl"
# Real Probability for Synthetic Datasets with Temperature Scaling
_, _, synthetic_3 = load_synthetic_data(synthetic_3_path)
_, _, synthetic_5 = load_synthetic_data(synthetic_5_path)
_, _, synthetic_10 = load_synthetic_data(synthetic_10_path)
# Real Probability for Synthetic Datasets without Temperature Scaling
_, _, synthetic_3_NoTemp = load_synthetic_data(synthetic_3_NoTemp_path)
_, _, synthetic_5_NoTemp = load_synthetic_data(synthetic_5_NoTemp_path)
_, _, synthetic_10_NoTemp = load_synthetic_data(synthetic_10_NoTemp_path)

cifar10_h_entropy, cifar10_h_std = calculate_avg_entropy(cifar10_h)
imagenet_entropy, imagenet_std = calculate_avg_entropy(imagenet_real)
syn_3_entropy, syn_3_std = calculate_avg_entropy(synthetic_3)
syn_5_entropy, syn_5_std = calculate_avg_entropy(synthetic_5)
syn_10_entropy, syn_10_std = calculate_avg_entropy(synthetic_10)
syn_3_NoTemp_entropy, syn_3_NoTemp_std = calculate_avg_entropy(synthetic_3_NoTemp)
syn_5_NoTemp_entropy, syn_5_NoTemp_std = calculate_avg_entropy(synthetic_5_NoTemp)
syn_10_NoTemp_entropy, syn_10_NoTemp_std = calculate_avg_entropy(synthetic_10_NoTemp)

print("\n=====Entropy Data for CIFAR10-H and ImageNet-Real=====")
print(f"CIFAR10-H Average Entropy: {cifar10_h_entropy:.4f} ± {cifar10_h_std:.4f} bits ")
print(f"ImageNet-Real Average Entropy: {imagenet_entropy:.4f} ± {imagenet_std:.4f} bits ")
print("\n=====Entropy Data for Synthetic Datasets with Temperature Scaling=====")
print(f"K=3 Average Entropy: {syn_3_entropy:.4f} ± {syn_3_std:.4f} bits ")
print(f"K=5 Average Entropy: {syn_5_entropy:.4f} ± {syn_5_std:.4f} bits ")
print(f"K=10 Average Entropy: {syn_10_entropy:.4f} ± {syn_10_std:.4f} bits ")
print("\n=====Entropy Data for Synthetic Datasets without Temperature Scaling=====")
print(f"K=3 Average Entropy: {syn_3_NoTemp_entropy:.4f} ± {syn_3_NoTemp_std:.4f} bits ")
print(f"K=5 Average Entropy: {syn_5_NoTemp_entropy:.4f} ± {syn_5_NoTemp_std:.4f} bits ")
print(f"K=10 Average Entropy: {syn_10_NoTemp_entropy:.4f} ± {syn_10_NoTemp_std:.4f} bits ")

Valid Real Probability Data
Valid Real Probability Data
Valid Real Probability Data
Valid Real Probability Data
Valid Real Probability Data
Valid Real Probability Data
Valid Real Probability Data
Valid Real Probability Data

=====Entropy Data for CIFAR10-H and ImageNet-Real=====
CIFAR10-H Average Entropy: 0.2228 ± 0.3395 bits 
ImageNet-Real Average Entropy: 0.8828 ± 0.6933 bits 

=====Entropy Data for Synthetic Datasets with Temperature Scaling=====
K=3 Average Entropy: 0.6908 ± 0.4464 bits 
K=5 Average Entropy: 1.5018 ± 0.4532 bits 
K=10 Average Entropy: 1.9459 ± 0.6154 bits 

=====Entropy Data for Synthetic Datasets without Temperature Scaling=====
K=3 Average Entropy: 0.2466 ± 0.3575 bits 
K=5 Average Entropy: 0.3564 ± 0.4384 bits 
K=10 Average Entropy: 0.5114 ± 0.5400 bits 
