In [55]:
import os
from PIL import Image
import cv2
import numpy as np
import pandas as pd
import pickle
from tensorflow import keras

In [36]:
# creates the dataframe
avg_df = pd.DataFrame()
empty_df = {'Filename':'', 'R Average':'', 'G Average':'', 'B Average':''}
avg_df = avg_df.append(empty_df, ignore_index=True)
avg_df = avg_df.reindex(columns=['Filename','R Average', 'G Average','B Average'])
avg_df

Unnamed: 0,Filename,R Average,G Average,B Average
0,,,,


In [37]:
# opencv reads grayscale images as 3 channels and copies the first layer twice
def is_grayscale(r, g, b):
    return b.all() == g.all() and b.all() == r.all()

In [53]:
def parse_visual_genome(avg_df):
    # read in all images and calculate the average RGB
    data_path = 'data/images/'

    for image in os.listdir(data_path):
            full_path = data_path + image
            img = cv2.imread(full_path)
            if img is not None:
                b,g,r = cv2.split(img)
                if not is_grayscale(r, g, b):
                    avg_dict = {'Filename':image, 'R Average':np.mean(r), 'G Average':np.mean(g), 'B Average':np.mean(b)}
                    avg_df = avg_df.append(avg_dict, ignore_index=True)

    return avg_df

In [54]:
def parse_cifar_100(avg_df, images, file_prefix, ext):
    # data_size = images.shape[0]
    img_idx = 0
    
    for image in images:
        # get file name
        filename = file_prefix+str(img_idx)+ext
        # save cifar img
        pil_img = Image.fromarray(np.uint8(image))
        pil_img.save('data/images/'+filename)
        
        # obtain channels
        r,g,b = pil_img.split()
        
        # calculate and save avgs
        avg_dict = {'Filename':filename, 'R Average':np.mean(r), 'G Average':np.mean(g), 'B Average':np.mean(b)}
        
        # append to df
        avg_df = avg_df.append(avg_dict, ignore_index=True)
        
        # move to next file
        img_idx += 1
        
    return avg_df

In [None]:
avg_df = parse_visual_genome(avg_df)

In [57]:
(x_train, y_train), (x_test, y_test) = keras.datasets.cifar100.load_data()
avg_df = parse_cifar_100(avg_df, x_train, 'cifar_train_', '.jpeg')
avg_df = parse_cifar_100(avg_df, x_test, 'cifar_test_', '.jpeg')

avg_df.drop(index=avg_df.index[0], axis=0, inplace=True)

In [58]:
avg_df

Unnamed: 0,Filename,R Average,G Average,B Average
1,1593190.jpg,126.914235,102.113993,102.803323
2,2336660.jpg,88.919509,92.358368,94.949701
3,2364278.jpg,90.594624,77.809323,78.683701
4,2324905.jpg,163.152584,152.23982,136.983652
5,2410442.jpg,104.139373,114.29944,113.638524
...,...,...,...,...
67680,cifar_test_9995.jpeg,93.085938,123.327148,83.829102
67681,cifar_test_9996.jpeg,120.177734,132.110352,79.393555
67682,cifar_test_9997.jpeg,75.832031,106.240234,64.107422
67683,cifar_test_9998.jpeg,104.074219,95.823242,79.599609


In [59]:
avg_df.to_csv('data/avg_database.csv', index=False)

In [30]:
img = cv2.imread('data/images/2338347.jpg')
# print(len(cv2.split(img)))
# print(cv2.split(img))
b,g,r = cv2.split(img)
print(img.shape)
if b.all() == g.all() and b.all() == r.all():
    print(True)

# im = Image.open('data/images/2338347.jpg')
# im.show()
# r = im.split()
# print(r)

(396, 500, 3)
True


In [45]:
def unpickle(file):
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
    return dict

In [46]:
images = unpickle('data/cifar-10-batches-py/data_batch_1')

In [48]:
print(images.keys())

dict_keys([b'batch_label', b'labels', b'data', b'filenames'])
