**Purpose**: To compare image dimensions from various portrait datasets.

In [None]:
from os import listdir
from os.path import isfile, join
from PIL import Image

import matplotlib.pyplot as plt
import numpy as np
from random import shuffle

%matplotlib inline

In [None]:
def get_fnames(d, random=False):
    fnames = [d + f for f in listdir(d) if isfile(join(d, f))]
    print("Number of files found in %s: %s" % (d, len(fnames)))
    if random: shuffle(fnames)
    return fnames

def plots(imgs, figsize=(12, 12), rows=1, cols=1,
          interp=None, titles=None, cmap='gray'):
    if not isinstance(imgs, list):
        imgs = [imgs]
    if not isinstance(cmap, list):
        if imgs[0].ndim == 2:
            cmap = 'gray'
        cmap = [cmap] * len(imgs)
    if not isinstance(interp, list):
        interp = [interp] * len(imgs)
    fig = plt.figure(figsize=figsize)
    for i in range(len(imgs)):
        sp = fig.add_subplot(rows, cols, i+1)
        if titles:
            sp.set_title(titles[i], fontsize=18)
        plt.imshow(imgs[i], interpolation=interp[i], cmap=cmap[i])
        plt.axis('off')


def plot(im, f=6, r=1, c=1, t=None):
    fs = f if isinstance(f, tuple) else (f, f)
    plots(im, figsize=fs, rows=r, cols=c, titles=t)
    
    
def loadim(path):
    '''Returns np.array. Loaded with PIL.'''
    return np.array(Image.open(path))

def hist_img_dims(fnames):
    heights = []
    widths = []
    for fname in fnames:
        im = loadim(fname)
        height = im.shape[0]
        width = im.shape[1]
        widths.append(width)
        heights.append(height) 
    fig = plt.figure(figsize=(6, 6))
    ax1 = fig.add_subplot(211)
    ax1.hist(heights)
    ax1.set_ylabel('Heights')
    ax2 = fig.add_subplot(212)
    ax2.hist(widths)
    ax2.set_ylabel('Widths')
    plt.show()

In [None]:
data_dir = '../data/portraits/'
!ls $data_dir

# Globals

In [None]:
N = 1000

# CelebA dataset - cropped

178x218 (i.e. Smaller than Flickr-cropped)

In [None]:
aligned_dir = data_dir + 'celeba_crop/'
fnames = get_fnames(aligned_dir, random=True)[:N]
hist_img_dims(fnames)

# CelebA dataset - raw

In [None]:
wild_dir = data_dir + 'celeba_raw/'
fnames = get_fnames(wild_dir, random=True)[:N]
hist_img_dims(fnames)

# Flickr dataset - cropped

600x800 (i.e. bigger than CelebA-cropped)

# Flickr dataset - raw

Paper: Automatic Portrait Segmentation for Image Stylization

We have masks for these images. We don't have masks for the CelebA images.

In [None]:
flickr_dir = data_dir + 'flickr_raw/'
fnames = get_fnames(flickr_dir, random=True)[:N]
hist_img_dims(fnames)