Skip to content

Commit

Permalink
Merge pull request #409 from EducationalTestingService/fix-output-dir…
Browse files Browse the repository at this point in the history
…ectory-creation

Fix output directory creation
  • Loading branch information
desilinguist committed Jan 29, 2018
2 parents 322749b + 9d0c8a9 commit dc9644c
Show file tree
Hide file tree
Showing 2 changed files with 81 additions and 17 deletions.
40 changes: 23 additions & 17 deletions skll/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -481,10 +481,11 @@ def _parse_config_file(config_path, log_level=logging.INFO):
# next, get the log path before anything else since we need to
# save all logging messages to a log file in addition to displaying
# them on the console
log_path = _locate_file(config.get("Output", "log"), config_dir)
if log_path:
log_path = join(config_dir, log_path)
if not exists(log_path):
try:
log_path = _locate_file(config.get("Output", "log"), config_dir)
except IOError as e:
if e.errno == errno.ENOENT:
log_path = e.filename
os.makedirs(log_path)

# Create a top-level log file under the log path
Expand Down Expand Up @@ -731,24 +732,29 @@ def _parse_config_file(config_path, log_level=logging.INFO):
probability = config.getboolean("Output", "probability")

# do we want to keep the predictions?
prediction_dir = _locate_file(config.get("Output", "predictions"),
config_dir)
if prediction_dir:
if not exists(prediction_dir):
# make sure the predictions path exists and if not create it
try:
prediction_dir = _locate_file(config.get("Output", "predictions"),
config_dir)
except IOError as e:
if e.errno == errno.ENOENT:
prediction_dir = e.filename
os.makedirs(prediction_dir)

# make sure model path exists
model_path = _locate_file(config.get("Output", "models"), config_dir)
if model_path:
model_path = join(config_dir, model_path)
if not exists(model_path):
# make sure model path exists and if not, create it
try:
model_path = _locate_file(config.get("Output", "models"), config_dir)
except IOError as e:
if e.errno == errno.ENOENT:
model_path = e.filename
os.makedirs(model_path)

# make sure results path exists
results_path = _locate_file(config.get("Output", "results"), config_dir)
if results_path:
results_path = join(config_dir, results_path)
if not exists(results_path):
try:
results_path = _locate_file(config.get("Output", "results"), config_dir)
except IOError as e:
if e.errno == errno.ENOENT:
results_path = e.filename
os.makedirs(results_path)

# what are the output metrics?
Expand Down
58 changes: 58 additions & 0 deletions tests/test_input.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,10 @@ def tearDown():
config_dir = join(_my_dir, 'configs')
for config_file in glob(join(config_dir, 'test_config_parsing_*.cfg')):
os.unlink(config_file)
for auto_dir in glob(join(_my_dir, 'auto*')):
for auto_dir_file in os.listdir(auto_dir):
os.unlink(join(auto_dir, auto_dir_file))
os.rmdir(auto_dir)


def check_safe_float_conversion(converted_val, expected_val):
Expand Down Expand Up @@ -1119,6 +1123,60 @@ def test_config_parsing_relative_input_paths():
learning_curve_train_sizes, output_metrics) = _parse_config_file(config_path)


def test_config_parsing_automatic_output_directory_creation():

train_dir = '../train'
train_file = join(train_dir, 'f0.jsonlines')
test_file = join(train_dir, 'f1.jsonlines')
output_dir = '../output'

# make a simple config file that has new directories that should
# be automatically created
new_log_path = join(_my_dir, 'autolog')
new_results_path = join(_my_dir, 'autoresults')
new_models_path = join(_my_dir, 'automodels')
new_predictions_path = join(_my_dir, 'autopredictions')

ok_(not(exists(new_log_path)))
ok_(not(exists(new_results_path)))
ok_(not(exists(new_models_path)))
ok_(not(exists(new_predictions_path)))

values_to_fill_dict = {'experiment_name': 'auto_dir_creation',
'task': 'evaluate',
'train_file': train_file,
'test_file': test_file,
'learners': "['LogisticRegression']",
'log': new_log_path,
'results': new_results_path,
'models': new_models_path,
'predictions': new_predictions_path,
'objective': 'f1_score_micro'}

config_template_path = join(_my_dir, 'configs',
'test_relative_paths.template.cfg')
config_path = fill_in_config_options(config_template_path,
values_to_fill_dict,
'auto_dir_creation')

(experiment_name, task, sampler, fixed_sampler_parameters,
feature_hasher, hasher_features, id_col, label_col, train_set_name,
test_set_name, suffix, featuresets, do_shuffle, model_path,
do_grid_search, grid_objective, probability, results_path,
pos_label_str, feature_scaling, min_feature_count, folds_file,
grid_search_jobs, grid_search_folds, cv_folds, save_cv_folds,
use_folds_file_for_grid_search, do_stratified_folds,
fixed_parameter_list, param_grid_list, featureset_names, learners,
prediction_dir, log_path, train_path, test_path, ids_to_floats,
class_map, custom_learner_path, learning_curve_cv_folds_list,
learning_curve_train_sizes, output_metrics) = _parse_config_file(config_path)

ok_(exists(new_log_path))
ok_(exists(new_results_path))
ok_(exists(new_models_path))
ok_(exists(new_predictions_path))


def check_config_parsing_metrics_and_objectives_overlap(task,
metrics,
objectives):
Expand Down

0 comments on commit dc9644c

Please sign in to comment.