In [1]:
import glob
from pylab import *
import os
import numpy as np

In [2]:
def ln(fns, dst_dir):
    for p in fns:
        fn = os.path.basename(p)
        d = os.path.dirname(p)
        name = fn[:-len('.png')]
        os.symlink(p, os.path.join(dst_dir, fn))
        os.symlink(os.path.join(d, name + '.npy'), os.path.join(dst_dir, name + '.npy'))
        os.symlink(os.path.join(d, name + '.txt'), os.path.join(dst_dir, name + '.txt'))

In [3]:
np.random.seed(0)

In [4]:
DATA_DIR = os.getcwd().rsplit('/', 1)[0] + '/data'

In [5]:
# create train/val splits
pos_fns = glob.glob(DATA_DIR+'/processedData/pos/*.png')
neg_fns = glob.glob(DATA_DIR+'/processedData/neg/*.png')
pos_fns.sort()
neg_fns.sort()
shuffle(pos_fns)
shuffle(neg_fns)

In [6]:
# using 4:1 train:val ratio
pos_split = int(floor(len(pos_fns) * 4. / 5.))
neg_split = int(floor(len(neg_fns) * 4. / 5.))

In [7]:
train_pos = pos_fns[:pos_split]
train_neg = neg_fns[:neg_split]
val_pos = pos_fns[pos_split:]
val_neg = neg_fns[neg_split:]

In [8]:
# set up train/val split folders
try:
    os.mkdir(DATA_DIR+'/splits')
    os.makedirs(DATA_DIR+'/splits/train/pos')
    os.makedirs(DATA_DIR+'/splits/train/neg')
    os.makedirs(DATA_DIR+'/splits/val/pos')
    os.makedirs(DATA_DIR+'/splits/val/neg')
except:
    pass

In [9]:
# make symbolic links
ln(train_pos, DATA_DIR+'/splits/train/pos')
ln(train_neg, DATA_DIR+'/splits/train/neg')
ln(val_pos, DATA_DIR+'/splits/val/pos')
ln(val_neg, DATA_DIR+'/splits/val/neg')

In [10]:
# create a randomly permutated list - training
train_fn = train_pos + train_neg
train_y = [1] * len(train_pos) + [0] * len(train_neg)
ii = np.random.permutation(len(train_fn))
train_objcat = []
obj_cat = []
with open(DATA_DIR+'/splits/train_fn.txt', 'w') as f:
    for i in ii:
        p = train_fn[i]
        name = p[:-len('.png')]
        with open(name+'.txt', 'r') as f1:
            content = f1.readline()
        obj_cat = content.strip("\n").split(",")
        train_objcat.append(int(obj_cat[0]))
        f.write('%s %s %s %s\n' % (p, name+'.npy', obj_cat[0], obj_cat[1]))        
np.save(DATA_DIR+'/splits/train_y.npy', asarray(train_y)[ii])
np.save(DATA_DIR+'/splits/train_objcat_y.npy', asarray(train_objcat))

In [11]:
# create a randomly permutated list - validation
val_fn = val_pos + val_neg
val_y = [1] * len(val_pos) + [0] * len(val_neg)
ii = np.random.permutation(len(val_fn))
val_objcat = []
obj_cat = []
with open(DATA_DIR+'/splits/val_fn.txt', 'w') as f:
    for i in ii:
        p = val_fn[i]
        name = p[:-len('.png')]
        with open(name+'.txt', 'r') as f1:
            content = f1.readline()
        obj_cat = content.strip("\n").split(",")
        val_objcat.append(int(obj_cat[0]))        
        f.write('%s %s %s %s\n' % (p, name+'.npy', obj_cat[0], obj_cat[1]))  
np.save(DATA_DIR+'/splits/val_y.npy', asarray(val_y)[ii])
np.save(DATA_DIR+'/splits/val_objcat_y.npy', asarray(val_objcat))

In [12]:
print ('Train/Validation split')
n_train = pos_split + neg_split
print ('Train: %i Validation: %i' % (n_train, len(pos_fns)+len(neg_fns)-n_train))

Train/Validation split
Train: 6415 Validation: 1604
