Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix batch accumulation #1262

Merged
merged 2 commits into from
Nov 16, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 6 additions & 10 deletions digits/dataset/images/classification/test_imageset_creator.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,22 +15,18 @@
import PIL.Image


IMAGE_SIZE = 10
IMAGE_COUNT = 10 # per category


def create_classification_imageset(folder, image_size=None, image_count=None, add_unbalanced_category=False):
def create_classification_imageset(
folder,
image_size=10,
image_count=10,
add_unbalanced_category=False,
):
"""
Creates a folder of folders of images for classification

If requested to add an unbalanced category then a category is added with
half the number of samples of other categories
"""
if image_size is None:
image_size = IMAGE_SIZE
if image_count is None:
image_count = IMAGE_COUNT

# Stores the relative path of each image of the dataset
paths = defaultdict(list)

Expand Down
20 changes: 12 additions & 8 deletions digits/dataset/images/classification/test_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from bs4 import BeautifulSoup
import PIL.Image

from .test_imageset_creator import create_classification_imageset, IMAGE_COUNT as DUMMY_IMAGE_COUNT
from .test_imageset_creator import create_classification_imageset
from digits import test_utils
import digits.test_views

Expand Down Expand Up @@ -64,6 +64,7 @@ class BaseViewsTestWithImageset(BaseViewsTest):
Provides an imageset and some functions
"""
# Inherited classes may want to override these default attributes
IMAGE_COUNT = 10 # per class
IMAGE_HEIGHT = 10
IMAGE_WIDTH = 10
IMAGE_CHANNELS = 3
Expand All @@ -78,8 +79,11 @@ def setUpClass(cls):
super(BaseViewsTestWithImageset, cls).setUpClass()
cls.imageset_folder = tempfile.mkdtemp()
# create imageset
cls.imageset_paths = create_classification_imageset(cls.imageset_folder,
add_unbalanced_category=cls.UNBALANCED_CATEGORY)
cls.imageset_paths = create_classification_imageset(
cls.imageset_folder,
image_count=cls.IMAGE_COUNT,
add_unbalanced_category=cls.UNBALANCED_CATEGORY,
)
cls.created_datasets = []

@classmethod
Expand Down Expand Up @@ -363,7 +367,7 @@ def check_image_count(self, type):
assert parse_info['val_count'] == 0
image_count = parse_info['test_count']
assert self.categoryCount() == parse_info['label_count']
assert image_count == DUMMY_IMAGE_COUNT * parse_info['label_count'], 'image count mismatch'
assert image_count == self.IMAGE_COUNT * parse_info['label_count'], 'image count mismatch'
assert self.delete_dataset(job_id) == 200, 'delete failed'
assert not self.dataset_exists(job_id), 'dataset exists after delete'

Expand All @@ -375,9 +379,9 @@ def test_max_per_class(self):
yield self.check_max_per_class, type

def check_max_per_class(self, type):
# create dataset, asking for at most DUMMY_IMAGE_COUNT/2 images per class
assert DUMMY_IMAGE_COUNT % 2 == 0
max_per_class = DUMMY_IMAGE_COUNT / 2
# create dataset, asking for at most IMAGE_COUNT/2 images per class
assert self.IMAGE_COUNT % 2 == 0
max_per_class = self.IMAGE_COUNT / 2
data = {'folder_pct_val': 0}
if type == 'train':
data['folder_train_max_per_class'] = max_per_class
Expand Down Expand Up @@ -418,7 +422,7 @@ def test_min_per_class(self):
def check_min_per_class(self, type):
# create dataset, asking for one more image per class
# than available in the "unbalanced" category
min_per_class = DUMMY_IMAGE_COUNT / 2 + 1
min_per_class = self.IMAGE_COUNT / 2 + 1
data = {'folder_pct_val': 0}
if type == 'train':
data['folder_train_min_per_class'] = min_per_class
Expand Down
38 changes: 38 additions & 0 deletions digits/model/images/classification/test_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import itertools
import json
import math
import os
import shutil
import tempfile
Expand All @@ -16,13 +17,16 @@
from StringIO import StringIO

from bs4 import BeautifulSoup
from google.protobuf import text_format

from digits.config import config_value
import digits.dataset.images.classification.test_views
from digits.frameworks import CaffeFramework
import digits.test_views
from digits import test_utils
import digits.webapp

import caffe_pb2

# May be too short on a slow system
TIMEOUT_DATASET = 45
Expand Down Expand Up @@ -101,6 +105,10 @@ def model_exists(cls, job_id):
def model_status(cls, job_id):
return cls.job_status(job_id, 'models')

@classmethod
def model_info(cls, job_id):
return cls.job_info(job_id, 'models')

@classmethod
def abort_model(cls, job_id):
return cls.abort_job(job_id, job_type='models')
Expand Down Expand Up @@ -1254,3 +1262,33 @@ def test_sweep(self):
assert self.model_wait_completion(job_id) == 'Done', 'create failed'
assert self.delete_model(job_id) == 200, 'delete failed'
assert not self.model_exists(job_id), 'model exists after delete'


@unittest.skipIf(
not CaffeFramework().can_accumulate_gradients(),
'This version of Caffe cannot accumulate gradients')
class TestBatchAccumulationCaffe(BaseViewsTestWithDataset, test_utils.CaffeMixin):
TRAIN_EPOCHS = 1
IMAGE_COUNT = 10 # per class

def test_batch_accumulation_calculations(self):
batch_size = 10
batch_accumulation = 2

job_id = self.create_model(
batch_size=batch_size,
batch_accumulation=batch_accumulation,
)
assert self.model_wait_completion(job_id) == 'Done', 'create failed'
info = self.model_info(job_id)
solver = caffe_pb2.SolverParameter()
with open(os.path.join(info['directory'], info['solver file']), 'r') as infile:
text_format.Merge(infile.read(), solver)
assert solver.iter_size == batch_accumulation, \
'iter_size is %d instead of %d' % (solver.iter_size, batch_accumulation)
max_iter = int(math.ceil(
float(self.TRAIN_EPOCHS * self.IMAGE_COUNT * 3) /
(batch_size * batch_accumulation)
))
assert solver.max_iter == max_iter,\
'max_iter is %d instead of %d' % (solver.max_iter, max_iter)
16 changes: 10 additions & 6 deletions digits/model/tasks/caffe_train.py
Original file line number Diff line number Diff line change
Expand Up @@ -525,8 +525,10 @@ def save_files_classification(self):
solver.iter_size = self.batch_accumulation

# Epochs -> Iterations
train_iter = int(math.ceil(float(self.dataset.get_entry_count(
constants.TRAIN_DB)) / train_data_layer.data_param.batch_size))
train_iter = int(math.ceil(
float(self.dataset.get_entry_count(constants.TRAIN_DB)) /
(train_data_layer.data_param.batch_size * solver.iter_size)
))
solver.max_iter = train_iter * self.train_epochs
snapshot_interval = self.snapshot_interval * train_iter
if 0 < snapshot_interval <= 1:
Expand Down Expand Up @@ -598,7 +600,7 @@ def save_files_classification(self):
# Display 8x per epoch, or once per 5000 images, whichever is more frequent
solver.display = max(1, min(
int(math.floor(float(solver.max_iter) / (self.train_epochs * 8))),
int(math.ceil(5000.0 / train_data_layer.data_param.batch_size))
int(math.ceil(5000.0 / (train_data_layer.data_param.batch_size * solver.iter_size)))
))

if self.random_seed is not None:
Expand Down Expand Up @@ -753,8 +755,10 @@ def save_files_generic(self):
solver.iter_size = self.batch_accumulation

# Epochs -> Iterations
train_iter = int(math.ceil(float(self.dataset.get_entry_count(constants.TRAIN_DB)) /
train_image_data_layer.data_param.batch_size))
train_iter = int(math.ceil(
float(self.dataset.get_entry_count(constants.TRAIN_DB)) /
(train_image_data_layer.data_param.batch_size * solver.iter_size)
))
solver.max_iter = train_iter * self.train_epochs
snapshot_interval = self.snapshot_interval * train_iter
if 0 < snapshot_interval <= 1:
Expand Down Expand Up @@ -821,7 +825,7 @@ def save_files_generic(self):
# Display 8x per epoch, or once per 5000 images, whichever is more frequent
solver.display = max(1, min(
int(math.floor(float(solver.max_iter) / (self.train_epochs * 8))),
int(math.ceil(5000.0 / train_image_data_layer.data_param.batch_size))
int(math.ceil(5000.0 / (train_image_data_layer.data_param.batch_size * solver.iter_size)))
))

if self.random_seed is not None:
Expand Down