**About this notebook**

The purpose of this notebook is to split the flower dataset into training, validation, and test sets, and for each of these, set-up five working directories (one for each of the five flowers).  

In [1]:
import os, shutil

### 1 - Setup working directories

In [2]:
#where to store your dataset
base_dir = '../dataset/flowers'
os.mkdir(base_dir)

#### - directories for the training, validation, and test splits

In [3]:
train_dir = os.path.join(base_dir, 'train')
os.mkdir(train_dir)
validation_dir = os.path.join(base_dir, 'validation')
os.mkdir(validation_dir)
test_dir = os.path.join(base_dir, 'test')
os.mkdir(test_dir)

#### - training

In [4]:
train_daisy_dir = os.path.join(train_dir, 'daisy')
os.mkdir(train_daisy_dir)

train_dandelion_dir = os.path.join(train_dir, 'dandelion')
os.mkdir(train_dandelion_dir)

train_roses_dir = os.path.join(train_dir, 'roses')
os.mkdir(train_roses_dir)

train_sunflowers_dir = os.path.join(train_dir, 'sunflowers')
os.mkdir(train_sunflowers_dir)

train_tulips_dir = os.path.join(train_dir, 'tulips')
os.mkdir(train_tulips_dir)

#### - validation

In [5]:
validation_daisy_dir = os.path.join(validation_dir, 'daisy')
os.mkdir(validation_daisy_dir)

validation_dandelion_dir = os.path.join(validation_dir, 'dandelion')
os.mkdir(validation_dandelion_dir)

validation_roses_dir = os.path.join(validation_dir, 'roses')
os.mkdir(validation_roses_dir)

validation_sunflowers_dir = os.path.join(validation_dir, 'sunflowers')
os.mkdir(validation_sunflowers_dir)

validation_tulips_dir = os.path.join(validation_dir, 'tulips')
os.mkdir(validation_tulips_dir)

#### - test

In [6]:
#directory with test cat, dog pictures
test_daisy_dir = os.path.join(test_dir, 'daisy')
os.mkdir(test_daisy_dir)

test_dandelion_dir = os.path.join(test_dir, 'dandelion')
os.mkdir(test_dandelion_dir)

test_roses_dir = os.path.join(test_dir, 'roses')
os.mkdir(test_roses_dir)

test_sunflowers_dir = os.path.join(test_dir, 'sunflowers')
os.mkdir(test_sunflowers_dir)

test_tulips_dir = os.path.join(test_dir, 'tulips')
os.mkdir(test_tulips_dir)

### 2 - Transfer downloaded images to working directories

In [7]:
import numpy as np
from random import shuffle

In [8]:
orig_dir = '../datasets/flowers/flower_photos'

for flower in ['daisy', 'dandelion', 'roses', 'sunflowers', 'tulips']:

    path = os.path.join(orig_dir, flower)
    flower_list = [files for _,__,files in os.walk(path)]

    #split into train, validation, and test sets (70%, 20%, 10%)
    #number of photos per set
    n_train, n_validation = int(0.7*len(flower_list[0])), int(0.2*len(flower_list[0]))
    n_test = len(flower_list[0]) - n_train - n_validation
    
    #assign photos to respective sets
    shuffle(flower_list[0])
    train = flower_list[0][:n_train]
    validation = flower_list[0][n_train: n_train + n_validation]
    test = flower_list[0][n_train + n_validation:]    

    #destination directories for the training, validation, and test splits
    train_dir_flower = os.path.join(train_dir, flower)
    validation_dir_flower = os.path.join(validation_dir, flower)
    test_dir_flower = os.path.join(test_dir, flower)
 
    for fname in train:
        src = os.path.join(path, fname)
        dst = os.path.join(train_dir_flower, fname)
        shutil.copyfile(src, dst)    
    
    for fname in validation:
        src = os.path.join(path, fname)
        dst = os.path.join(validation_dir_flower, fname)
        shutil.copyfile(src, dst)
        
    for fname in test:
        src = os.path.join(path, fname)
        dst = os.path.join(test_dir_flower, fname)
        shutil.copyfile(src, dst)    

### 3 - Check number of pictures 

In [9]:
print('total training tulips images:', len(os.listdir(train_dir_flower))) 
print('total validation tulips images:', len(os.listdir(validation_dir_flower))) 
print('total test tulips images:', len(os.listdir(test_dir_flower)))  

total training tulips images: 559
total validation tulips images: 159
total test tulips images: 81
