Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
…/skll into feature/skll-261-pandas-dataframe-helper
  • Loading branch information
Diane Napolitano committed May 19, 2016
2 parents 7b874ed + 23b8d2b commit f941db2
Show file tree
Hide file tree
Showing 9 changed files with 117 additions and 13 deletions.
4 changes: 2 additions & 2 deletions conda-recipe/unix/skll/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ requirements:
- beautiful-soup
- numpy
- scipy
- pyyaml
- ruamel_yaml
- configparser [py2k]
- futures [py2k]
- logutils [py2k]
Expand All @@ -66,7 +66,7 @@ requirements:
- beautiful-soup
- numpy
- scipy
- pyyaml
- ruamel_yaml
- configparser [py2k]
- futures [py2k]
- logutils [py2k]
Expand Down
4 changes: 2 additions & 2 deletions conda-recipe/windows/skll/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ requirements:
- beautiful-soup
- numpy
- scipy
- pyyaml
- ruamel_yaml
- configparser [py2k]
- futures [py2k]
- logutils [py2k]
Expand All @@ -62,7 +62,7 @@ requirements:
- beautiful-soup
- numpy
- scipy
- pyyaml
- ruamel_yaml
- configparser [py2k]
- futures [py2k]
- logutils [py2k]
Expand Down
8 changes: 8 additions & 0 deletions conda_requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
scikit-learn==0.17.1
six
PrettyTable
beautifulsoup4
numpy
scipy
joblib>=0.8
ruamel_yaml
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@ beautifulsoup4
numpy
scipy
joblib>=0.8
PyYAML
ruamel.yaml
2 changes: 1 addition & 1 deletion requirements_rtd.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,4 @@ beautifulsoup4
numpy
scipy
joblib>=0.8
PyYAML
ruamel.yaml
7 changes: 6 additions & 1 deletion skll/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,12 @@
isabs, join, normpath, realpath)

import configparser # Backported version from Python 3
import yaml

try:
import ruamel_yaml as yaml # conda package
except ImportError:
import ruamel.yaml as yaml # pypi package

from six import string_types, iteritems # Python 2/3
from sklearn.metrics import SCORERS

Expand Down
8 changes: 6 additions & 2 deletions skll/experiments.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,11 @@
from itertools import combinations
from os.path import basename, exists, isfile, join

import yaml
try:
import ruamel_yaml as yaml # conda package
except ImportError:
import ruamel.yaml as yaml # pypi package

from prettytable import PrettyTable, ALL
from six import iterkeys, iteritems # Python 2/3
from six.moves import zip
Expand Down Expand Up @@ -799,7 +803,7 @@ def run_configuration(config_file, local=False, overwrite=True, queue='all.q',
if len(grid_objectives) == 1:
job_name_components = [experiment_name, featureset_name,
learner_name]
else:
else:
job_name_components = [experiment_name, featureset_name,
learner_name, grid_objective]

Expand Down
90 changes: 88 additions & 2 deletions tests/test_input.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ def check_config_parsing_file_not_found_error(config_path):

@raises(IOError)
def test_empty_config_name_raises_file_not_found_error():
"""
"""
Assert that calling _parse_config_file on an empty string raises IOError
"""
_parse_config_file("")
Expand Down Expand Up @@ -650,7 +650,7 @@ def test_config_parsing_bad_objectives():

def test_config_parsing_bad_objective_and_objectives():
"""
Test to ensure config file parsing raises an error with
Test to ensure config file parsing raises an error with
a grid objectives and objective both given non default values
"""

Expand Down Expand Up @@ -1046,3 +1046,89 @@ def test_setting_number_of_cv_folds():
class_map, custom_learner_path) = _parse_config_file(config_path)

eq_(cv_folds, 5)


def test_setting_param_grids():

train_dir = join(_my_dir, 'train')
test_dir = join(_my_dir, 'test')
output_dir = join(_my_dir, 'output')

# make a simple config file that does not set cv_folds

values_to_fill_dict = {'experiment_name': 'config_parsing',
'task': 'evaluate',
'train_directory': train_dir,
'test_directory': test_dir,
'featuresets': "[['f1', 'f2', 'f3']]",
'learners': "['LinearSVC']",
'log': output_dir,
'results': output_dir,
'param_grids': "[{'C': [1e-6, 0.001, 1, 10, 100, 1e5]}]",
'objective': 'f1_score_macro'}

config_template_path = join(_my_dir, 'configs',
'test_config_parsing.template.cfg')
config_path = fill_in_config_options(config_template_path,
values_to_fill_dict,
'param_grids')

(experiment_name, task, sampler, fixed_sampler_parameters,
feature_hasher, hasher_features, id_col, label_col, train_set_name,
test_set_name, suffix, featuresets, do_shuffle, model_path,
do_grid_search, grid_objective, probability, results_path,
pos_label_str, feature_scaling, min_feature_count,
grid_search_jobs, grid_search_folds, cv_folds, save_cv_folds, do_stratified_folds,
fixed_parameter_list, param_grid_list, featureset_names, learners,
prediction_dir, log_path, train_path, test_path, ids_to_floats,
class_map, custom_learner_path) = _parse_config_file(config_path)

eq_(param_grid_list[0]['C'][0], 1e-6)
eq_(param_grid_list[0]['C'][1], 1e-3)
eq_(param_grid_list[0]['C'][2], 1)
eq_(param_grid_list[0]['C'][3], 10)
eq_(param_grid_list[0]['C'][4], 100)
eq_(param_grid_list[0]['C'][5], 1e5)


def test_setting_fixed_parameters():

train_dir = join(_my_dir, 'train')
test_dir = join(_my_dir, 'test')
output_dir = join(_my_dir, 'output')

# make a simple config file that does not set cv_folds

values_to_fill_dict = {'experiment_name': 'config_parsing',
'task': 'evaluate',
'train_directory': train_dir,
'test_directory': test_dir,
'featuresets': "[['f1', 'f2', 'f3']]",
'learners': "['LinearSVC']",
'log': output_dir,
'results': output_dir,
'fixed_parameters': "[{'C': [1e-6, 0.001, 1, 10, 100, 1e5]}]",
'objective': 'f1_score_macro'}

config_template_path = join(_my_dir, 'configs',
'test_config_parsing.template.cfg')
config_path = fill_in_config_options(config_template_path,
values_to_fill_dict,
'fixed_parameters')

(experiment_name, task, sampler, fixed_sampler_parameters,
feature_hasher, hasher_features, id_col, label_col, train_set_name,
test_set_name, suffix, featuresets, do_shuffle, model_path,
do_grid_search, grid_objective, probability, results_path,
pos_label_str, feature_scaling, min_feature_count,
grid_search_jobs, grid_search_folds, cv_folds, save_cv_folds, do_stratified_folds,
fixed_parameter_list, param_grid_list, featureset_names, learners,
prediction_dir, log_path, train_path, test_path, ids_to_floats,
class_map, custom_learner_path) = _parse_config_file(config_path)

eq_(fixed_parameter_list[0]['C'][0], 1e-6)
eq_(fixed_parameter_list[0]['C'][1], 1e-3)
eq_(fixed_parameter_list[0]['C'][2], 1)
eq_(fixed_parameter_list[0]['C'][3], 10)
eq_(fixed_parameter_list[0]['C'][4], 100)
eq_(fixed_parameter_list[0]['C'][5], 1e5)
5 changes: 3 additions & 2 deletions tests/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,9 +145,10 @@ def fill_in_config_options(config_template_path,
'test_file', 'featuresets', 'featureset_names',
'feature_hasher', 'hasher_features', 'learners',
'sampler', 'shuffle', 'feature_scaling',
'num_cv_folds', 'bad_option', 'duplicate_option'],
'fixed_parameters', 'num_cv_folds',
'bad_option', 'duplicate_option'],
'Tuning': ['probability', 'grid_search', 'objective',
'objectives', 'duplicate_option'],
'param_grids', 'objectives', 'duplicate_option'],
'Output': ['results', 'log', 'models',
'predictions']}

Expand Down

0 comments on commit f941db2

Please sign in to comment.