In [None]:
from os import listdir
from os.path import isfile, join, isdir
from PIL import Image
from scipy.misc import imread, imresize

import operator
import random
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import pickle

%matplotlib inline

# mypath ='data/101_ObjectCategories/'
# folders = [f for f in listdir(mypath) if os.path.isdir(join(mypath, f))]

In [None]:
def get_folders(path, categories):
    
    folders = [f for f in listdir(mypath) if isdir(join(mypath, f))]
    
    if len(categories) != 0:
        folders = [f for f in folders if f in categories]

    return folders
    

def get_files(folders, path):
    
    categories_dict = {}
    for folder in folders:
        path = mypath + folder 
        categories_dict[folder] = [f for f in listdir(path) if isfile(join(path, f))]
        
    return categories_dict




def create_dataset(folders, categories_dict, categories, num_of_stories, percentage_of_negatives, path):
    
    dataset = set()

    while len(dataset) < num_of_stories:

        story = []

        for category in categories:
            full_path = path + category + "/"
            story.append(full_path+random.choice(categories_dict[category]))


        dataset.add(tuple(story))
        
    dataset = [list(f) for f in list(dataset)]
        
    negative_fifth = set()
    
    while len(negative_fifth) < int(len(dataset)*percentage_of_negatives):
        path = mypath+random.choice(folders)+"/"
        
        negative_fifth.add(path+random.choice([f for f in listdir(path) if isfile(join(path, f))]))
    
    for i, story in enumerate(dataset[int(len(dataset)*(1-percentage_of_negatives)):]):
        story[4] = list(negative_fifth)[i]
    
    return dataset
                    
            
        
def demo_visualize_stories(dataset, path, categories, num_of_stories):
                    
    for i, story in enumerate(dataset[:num_of_stories]):
        
        print ("Story #{}".format(i+1))
        
        for category, _ in enumerate(categories):
            img = imread(story[category])
            print (story[category])
            plt.imshow(img)
            plt.axis("off")
            plt.show()
            
        print ("-------------------------------------------------------------------------------")
   

        
def get_sets(dataset, training_size, validation_size, test_size, percentage_of_negatives):

    


    positive = dataset[:int(len(dataset) * (1 - percentage_of_negatives))]
    negative = dataset[int(len(dataset) * (1 - percentage_of_negatives)):]

    
    training = positive[:int(len(positive)*training_size/len(dataset))]
    training.extend(negative[:int(len(negative)*training_size/len(dataset))])

    
    validation = positive[int(len(positive)*training_size/len(dataset))
                          :int(len(positive)*training_size/len(dataset))
                          +int(len(positive)*validation_size/len(dataset))]
    validation.extend(negative[int(len(negative)*training_size/len(dataset))
                               :int(len(negative)*training_size/len(dataset))
                               +int(len(negative)*validation_size/len(dataset))])

    
    test = positive[int(len(positive)*training_size/len(dataset))
                    +int(len(positive)*validation_size/len(dataset)):]
    test.extend(negative[int(len(negative)*training_size/len(dataset))
                         +int(len(negative)*validation_size/len(dataset)):])
        
    return training, validation, test





    

In [None]:
mypath ='data/101_ObjectCategories/'

categories = ['airplanes', 'cars_brad', 'Faces', 'Motorbikes', 'Leopards']


print ("Getting folders.......................")

folders = get_folders(mypath, categories)

a, b = folders.index('Motorbikes'), folders.index('Leopards')
folders[b], folders[a] = folders[a], folders[b]


print ("Getting Images per Category.......................")

categories_dict = get_files(folders, mypath)


print ("Creating the Dataset.......................")

dataset_size = 12000
negatives_ratio = 0.4

folders = get_folders(mypath, [])
folders.remove('Leopards')

dataset = create_dataset(folders, categories_dict, categories, dataset_size, negatives_ratio, mypath)


print ("Getting Sets.......................")

#dataset = pickle.load(open('dataset','rb'))

training_size = 10000
validation_size = 1000
test_size = 1000
negatives_ratio = 0.4

training, validation, test = get_sets(dataset, training_size, validation_size, test_size, negatives_ratio)



In [None]:
#demo_visualize_stories(dataset, mypath, categories, 2)