In [1]:
import numpy as np

npz_path = "C:\\Users\\jiayang\\ipynb\\APS_Thesis\\data\\scores.npz"
data = np.load(npz_path)

scores = data['tensor']  # the score(real probability) of the label
info = data['info']      # (fname, label): Sample "fname" has a prediction label with ...(probability from scores)

# initial numpy list: 50000 samples, each sample has 1000 classes
imagenet_real_probs = np.zeros((50000, 1000))

# info_i = (fname_i, label_i) match with scores_i
for (fname, label), score in zip(info, scores):
    # e.g. ILSVRC2012_val_00000001.JPEG -> index(image_id) = 1
    image_id = int(fname.split("_")[-1].split(".")[0]) - 1
    # add label and real probability in corresponding image
    label = int(label)
    imagenet_real_probs[image_id, label] = score

# quick check
print("New NPY shape:", imagenet_real_probs.shape)
print("Real Probability of 00001.JPG", imagenet_real_probs[0])

New NPY shape: (50000, 1000)
Real Probability of 00001.JPG [0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.00058204 0.00058204
 0.00414081 0.         0.00058204 0.00058204 0.15452416 0.
 0.06140263 0.         0.00058204 0.00058204 0.         0.01583705
 0.         0.01458662 0.03100282 0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.    

In [4]:
import os

sorted_imagenet_path = "D:\\Download\\ImageNet-1K\\Validation_Set\\sorted_ImageNet_val"

image_order_list = []  # store the order of current ImageNet(sorted)

# go through all the sub-folder in sorted_imagenet_path(0000 - 0999)
for class_folder in sorted(os.listdir(sorted_imagenet_path)):
    class_path = os.path.join(sorted_imagenet_path, class_folder)
    if not os.path.isdir(class_path):
        continue
    
    # go through all the images in this sub-folder
    for img_name in sorted(os.listdir(class_path)):  
        if img_name.endswith(".JPEG"):
            # extract the last five figures of image names
            img_id = int(img_name.split("_")[-1].split(".")[0]) # e.g. ILSVRC2012_val_00000001.JPEG -> imag_id = 1
            image_order_list.append(img_id) 
            
sorted_imagenet_real_probs = np.zeros((50000, 1000))

# go through imagenet_real_probs
for i in range(50000):
    # imagenet_real_probs[i] == the element with img_id=i+1 in image_order_list
    img_id = i + 1  
    
    if img_id in image_order_list:
        # get the index(position) of element with img_id=i+1 in image_order_list
        new_index = image_order_list.index(img_id) 
        # save in the correct position in new numpy list
        sorted_imagenet_real_probs[new_index] = imagenet_real_probs[i]
        
print("Shape :", sorted_imagenet_real_probs.shape)
print("Real Probability first image(ILSVRC2012_val_00000293.JPG) :", sorted_imagenet_real_probs[0])


Shape : (50000, 1000)
Real Probability first image(ILSVRC2012_val_00000293.JPG) : [9.98930273e-01 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.

In [11]:
save_path = "C:\\Users\\jiayang\\ipynb\\APS_Thesis\\data\\imagenet_count.npy"
np.save(save_path, sorted_imagenet_real_probs)
print(f"npy file saved at {save_path}")

npy file saved at C:\Users\jiayang\ipynb\APS_Thesis\data\imagenet_count.npy
