In [1]:
import os

root_path = '/datasets/CUB/CUB_200_2011'
root_image_path = os.path.join(root_path, 'images')
assert os.path.exists(root_image_path), '{} root image path is not exists...'.format(root_image_path)
assert os.path.exists(root_path), '{} root path is not exists...'.format(root_path)

train_test_path = os.path.join(root_path, 'train_test_split.txt')
images_txt_path = os.path.join(root_path, 'images.txt')
images_labels_path = os.path.join(root_path, 'image_class_labels.txt')
classes_txt_path = os.path.join(root_path, 'classes.txt')
assert os.path.exists(train_test_path), '{} train_test_split.txt path is not exists...'.format(train_test_path)
assert os.path.exists(images_txt_path), '{} image path is not exists...'.format(images_txt_path)
assert os.path.exists(images_labels_path), '{} image_class_labels.txt path is not exists...'.format(images_labels_path)
assert os.path.exists(classes_txt_path), '{} classes.txt path is not exists...'.format(classes_txt_path)

train_val_id = []
test_id = []

with open(train_test_path) as f:
    for line in f:
        image_id, is_train = line.split()
        if int(is_train) == 1:
            train_val_id.append(image_id)
        else:
            test_id.append(image_id)

images_path = {}
labels_dict = {}
with open(images_txt_path) as f:
    for line in f:
        image_id, file_path = line.split()
        images_path[image_id] = file_path
with open(images_labels_path) as f:
    for line in f:
        image_id, label = line.split()
        labels_dict[image_id] = label

train_image_path = []
train_label = []
test_image_path = []
test_label = []
for idx in train_val_id:
    train_image_path.append(images_path[idx])
    train_label.append(int(labels_dict[idx]) - 1)
for idx in test_id:
    test_image_path.append(images_path[idx])
    test_label.append(int(labels_dict[idx]) - 1)

print('train_val image num: {}'.format(len(train_image_path)))
print('test image num: {}'.format(len(test_image_path)))

train_val image num: 5994
test image num: 5794


In [2]:
import numpy as np

def compute_mean_std(dataset):
    
    mean_b = 0.0
    mean_g = 0.0
    mean_r = 0.0
    
    for img, _ in dataset:
        img = np.array(img)
        mean_b += np.mean(img[:, :, 0])
        mean_g += np.mean(img[:, :, 1])
        mean_r += np.mean(img[:, :, 2])
    
    mean_b /= len(dataset)
    mean_g /= len(dataset)
    mean_r /= len(dataset)
    
    diff_b = 0.0
    diff_g = 0.0
    diff_r = 0.0
    N = 0
    
    for img, _ in dataset:
        img = np.array(img)
        diff_b += np.sum(np.power(img[:, :, 0]-mean_b, 2))
        diff_g += np.sum(np.power(img[:, :, 1]-mean_g, 2))
        diff_r += np.sum(np.power(img[:, :, 2]-mean_r, 2))
        
        N += np.prod(img[:, :, 0].shape)
    
    std_r = np.sqrt(diff_r / N)
    std_g = np.sqrt(diff_g / N)
    std_b = np.sqrt(diff_b / N)
    
    mean = [mean_r / 255.0, mean_g / 255.0, mean_b / 255.0]
    std = [std_r / 255.0, std_g / 255.0, std_b / 255]
    return mean, std
    

In [3]:
from dataset import CUB_dataset

train_dataset = CUB_dataset(root_image_path, train_image_path, train_label)
test_dataset = CUB_dataset(root_image_path, test_image_path, test_label)

train_mean, train_std = compute_mean_std(train_dataset)
print('train mean: {}\n train std: {}\n'.format(train_mean, train_std))
test_mean, test_std = compute_mean_std(test_dataset)
print('test mean: {}\n test std: {}\n'.format(test_mean, test_std))

'''
bgr:
train mean: [0.4856074889829789, 0.49941621333172476, 0.43237721533416357]
 train std: [0.23210242423464963, 0.22770540127125152, 0.2665100731524232]

bgr:
test mean: [0.48621705603298476, 0.4998155767200096, 0.43114317679080444]
 test std: [0.23264259781393923, 0.2278108523010932, 0.26667242411915676]
'''

train mean: [0.4856074889829789, 0.49941621333172476, 0.43237721533416357]
 train std: [0.23210242423464963, 0.22770540127125152, 0.2665100731524232]

test mean: [0.48621705603298476, 0.4998155767200096, 0.43114317679080444]
 test std: [0.23264259781393923, 0.2278108523010932, 0.26667242411915676]

