Skip to content

Commit

Permalink
Add shuffling of files prior to splitting for validation
Browse files Browse the repository at this point in the history
  • Loading branch information
Sanqui committed Oct 24, 2020
1 parent 3ba9933 commit e5d0dc0
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 10 deletions.
1 change: 1 addition & 0 deletions ImageDataAugmentor/directory_iterator.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ def __init__(self,
results.append(
pool.apply_async(_list_valid_filenames_in_directory,
(dirpath, self.white_list_formats, self.split,
shuffle, seed,
self.class_indices, follow_links)))
classes_list = []
for res in results:
Expand Down
27 changes: 17 additions & 10 deletions ImageDataAugmentor/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

import os
import warnings
import random

import numpy as np
import cv2
Expand Down Expand Up @@ -146,8 +147,8 @@ def _recursive_list(subpath):
yield root, fname


def _list_valid_filenames_in_directory(directory, white_list_formats, split,
class_indices, follow_links):
def _list_valid_filenames_in_directory(directory, white_list_formats, split, shuffle,
seed, class_indices, follow_links):
"""Lists paths of files in `subdir` with extensions in `white_list_formats`.
# Arguments
Expand All @@ -171,16 +172,22 @@ def _list_valid_filenames_in_directory(directory, white_list_formats, split,
`["class1/file1.jpg", "class1/file2.jpg", ...]`).
"""
dirname = os.path.basename(directory)

valid_files = _iter_valid_files(
directory, white_list_formats, follow_links)

if split:
num_files = len(list(
_iter_valid_files(directory, white_list_formats, follow_links)))
valid_files = list(valid_files)
num_files = len(valid_files)
start, stop = int(split[0] * num_files), int(split[1] * num_files)
valid_files = list(
_iter_valid_files(
directory, white_list_formats, follow_links))[start: stop]
else:
valid_files = _iter_valid_files(
directory, white_list_formats, follow_links)
if shuffle:
if seed is not None:
random.seed(seed)
valid_files.sort()
random.shuffle(valid_files)

valid_files = valid_files[start:stop]

classes = []
filenames = []
for root, fname in valid_files:
Expand Down

0 comments on commit e5d0dc0

Please sign in to comment.