Skip to content

Commit

Permalink
Merge branch 'release/0.22.1'
Browse files Browse the repository at this point in the history
  • Loading branch information
dan-blanchard committed Dec 5, 2013
2 parents 8784263 + f005dbc commit 75351dc
Show file tree
Hide file tree
Showing 18 changed files with 100 additions and 64 deletions.
1 change: 1 addition & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
* text=auto
12 changes: 6 additions & 6 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,15 @@ before_install:
- sudo mkdir /scratch/
- sudo chmod 777 /scratch/
- travis/miniconda.sh -b
- mv travis/.condarc $HOME
- export PATH=/home/travis/anaconda/bin:$PATH
- conda update --yes conda
install:
- conda install --yes pip python=$TRAVIS_PYTHON_VERSION atlas numpy scipy beautiful-soup six scikit-learn
- if [ ${TRAVIS_PYTHON_VERSION:0:1} == "2" ]; then pip install --use-mirrors configparser; fi
- pip install -r requirements.txt --use-mirrors
- pip install python-coveralls --use-mirrors
- pip install nose-cov --use-mirrors
- if [ $GRIDMAP == "true" ]; then pip install --use-mirrors git+git://github.com/dan-blanchard/drmaa-python gridmap; fi
- conda install --yes pip python=$TRAVIS_PYTHON_VERSION atlas numpy scipy beautiful-soup six scikit-learn joblib prettytable python-coveralls
- if [ ${TRAVIS_PYTHON_VERSION:0:1} == "2" ]; then conda install --yes configparser futures logutils; fi
- if [ $GRIDMAP == "true" ]; then conda install --yes drmaa gridmap; fi
# Have to use pip for nose-cov because its entry points are not supported by conda yet
- pip install --use-mirrors nose-cov
- sudo rm -rf /dev/shm
- sudo ln -s /run/shm /dev/shm
- python setup.py install
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ PrettyTable
beautifulsoup4
numpy
scipy
joblib
14 changes: 9 additions & 5 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,15 @@ def requirements():
author='Daniel Blanchard',
author_email='dblanchard@ets.org',
license='BSD 3 clause',
packages=['skll'],
scripts=['scripts/filter_megam', 'scripts/generate_predictions',
'scripts/join_megam', 'scripts/megam_to_libsvm',
'scripts/print_model_weights', 'scripts/run_experiment',
'scripts/skll_convert', 'scripts/summarize_results'],
packages=['skll', 'skll.utilities'],
entry_points={'console_scripts': ['filter_megam = skll.utilities.filter_megam:main',
'generate_predictions = skll.utilities.generate_predictions:main',
'join_megam = skll.utilities.join_megam:main',
'megam_to_libsvm = skll.utilities.megam_to_libsvm:main',
'print_model_weights = skll.utilities.print_model_weights:main',
'run_experiment = skll.utilities.run_experiment:main',
'skll_convert = skll.utilities.skll_convert:main',
'summarize_results = skll.utilities.summarize_results:main']},
install_requires=requirements(),
classifiers=['Intended Audience :: Science/Research',
'Intended Audience :: Developers',
Expand Down
49 changes: 26 additions & 23 deletions skll/experiments.py
Original file line number Diff line number Diff line change
Expand Up @@ -587,30 +587,39 @@ def _classify_featureset(args):
featureset),
file=log_file)

# check whether a trained model on the same data with the same
# featureset already exists if so, load it and then use it on test data
modelfile = os.path.join(model_path, '{}.model'.format(job_name))

# load the training and test examples
train_examples = _load_featureset(train_path, featureset, suffix,
label_col=label_col,
ids_to_floats=ids_to_floats,
quiet=quiet, class_map=class_map)
if task == 'cross_validate' or (not os.path.exists(modelfile) or
overwrite):
train_examples = _load_featureset(train_path, featureset, suffix,
label_col=label_col,
ids_to_floats=ids_to_floats,
quiet=quiet, class_map=class_map)
# initialize a classifer object
learner = Learner(learner_name,
probability=probability,
feature_scaling=feature_scaling,
model_kwargs=fixed_parameters,
pos_label_str=pos_label_str,
min_feature_count=min_feature_count)
# load the model if it already exists
else:
if os.path.exists(modelfile) and not overwrite:
print(('\tloading pre-existing {} ' +
'model: {}').format(learner_name, modelfile))
learner = Learner.from_file(modelfile)

# Load test set if there is one
if task == 'evaluate' or task == 'predict':
test_examples = _load_featureset(test_path, featureset, suffix,
label_col=label_col,
ids_to_floats=ids_to_floats,
quiet=quiet, class_map=class_map,
unlabelled=True)

# initialize a classifer object
learner = Learner(learner_name,
probability=probability,
feature_scaling=feature_scaling,
model_kwargs=fixed_parameters,
pos_label_str=pos_label_str,
min_feature_count=min_feature_count)

# check whether a trained model on the same data with the same
# featureset already exists if so, load it (and the feature
# vocabulary) and then use it on the test data
modelfile = os.path.join(model_path, '{}.model'.format(job_name))

# create a list of dictionaries of the results information
learner_result_dict_base = {'experiment_name': experiment_name,
Expand Down Expand Up @@ -639,14 +648,8 @@ def _classify_featureset(args):
param_grid=param_grid,
grid_jobs=grid_search_jobs)
else:
# load the model if it already exists
if os.path.exists(modelfile) and not overwrite:
print(('\tloading pre-existing {} ' +
'model: {}').format(learner_name, modelfile))
learner.load(modelfile)

# if we have do not have a saved model, we need to train one.
else:
if not os.path.exists(modelfile) or overwrite:
print(('\tfeaturizing and training new ' +
'{} model').format(learner_name),
file=log_file)
Expand Down
37 changes: 18 additions & 19 deletions skll/learner.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from functools import wraps
from multiprocessing import cpu_count

import joblib
import numpy as np
import scipy.sparse as sp
from six import iteritems, itervalues
Expand Down Expand Up @@ -443,23 +444,22 @@ def from_file(cls, learner_path):
'''
:returns: New instance of Learner from the pickle at the specified path.
'''
with open(learner_path, "rb") as f:
skll_version, learner = pickle.load(f)
# Check that we've actually loaded a Learner (or sub-class)
if not isinstance(learner, cls):
raise ValueError(('The pickle stored at {} does not contain ' +
'a {} object.').format(learner_path, cls))
# Check that versions are compatible. (Currently, this just checks
# that major versions match)
elif skll_version[0] == VERSION[0]:
return learner
else:
raise Exception(("{} stored in pickle file {} was " +
"created with version {} of SKLL, which is " +
"incompatible with the current version " +
"{}").format(cls, learner_path,
'.'.join(skll_version),
'.'.join(VERSION)))
skll_version, learner = joblib.load(learner_path)
# Check that we've actually loaded a Learner (or sub-class)
if not isinstance(learner, cls):
raise ValueError(('The pickle stored at {} does not contain ' +
'a {} object.').format(learner_path, cls))
# Check that versions are compatible. (Currently, this just checks
# that major versions match)
elif skll_version[0] == VERSION[0]:
return learner
else:
raise ValueError(("{} stored in pickle file {} was " +
"created with version {} of SKLL, which is " +
"incompatible with the current version " +
"{}").format(cls, learner_path,
'.'.join(skll_version),
'.'.join(VERSION)))

@property
def model_type(self):
Expand Down Expand Up @@ -548,8 +548,7 @@ def save(self, learner_path):
if not os.path.exists(learner_dir):
os.makedirs(learner_dir)
# write out the files
with open(learner_path, "wb") as f:
pickle.dump((VERSION, self), f, -1)
joblib.dump((VERSION, self), learner_path)

def _create_estimator(self):
'''
Expand Down
Empty file added skll/utilities/__init__.py
Empty file.
9 changes: 8 additions & 1 deletion scripts/filter_megam → skll/utilities/filter_megam.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,10 @@
from skll.version import __version__


if __name__ == '__main__':
def main():
'''
Handles command line arguments and gets things started.
'''
# Get command line arguments
parser = argparse.ArgumentParser(description="Filter MegaM file to remove\
features with names in stop\
Expand Down Expand Up @@ -98,3 +101,7 @@
print(" ", end='')
print('{} {}'.format(feature, value), end="")
print()


if __name__ == '__main__':
main()
File renamed without changes.
9 changes: 8 additions & 1 deletion scripts/join_megam → skll/utilities/join_megam.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,10 @@ def get_unique_name(feature_name, prev_feature_set, filename):
return new_feature_name


if __name__ == '__main__':
def main():
'''
Handles command line arguments and gets things started.
'''
# Get command line arguments
parser = argparse.ArgumentParser(description="Combine MegaM files that \
contain features for the same\
Expand Down Expand Up @@ -213,3 +216,7 @@ def get_unique_name(feature_name, prev_feature_set, filename):
print("# {}".format(curr_filename).encode('utf-8'))
print("{}\t{}".format(class_dict[curr_filename],
feature_dict[curr_filename].strip()).encode('utf-8'))


if __name__ == '__main__':
main()
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,10 @@
from skll.version import __version__


if __name__ == '__main__':
def main():
'''
Handles command line arguments and gets things started.
'''
parser = argparse.ArgumentParser(description="Prints out the weights of a \
given model.",
conflict_handler='resolve',
Expand All @@ -63,3 +66,7 @@

for feat, val in sorted(iteritems(weights), key=lambda x: -abs(x[1]))[:k]:
print("{:.12f}\t{}".format(val, feat))


if __name__ == '__main__':
main()
File renamed without changes.
8 changes: 7 additions & 1 deletion scripts/skll_convert → skll/utilities/skll_convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,10 @@
from skll.version import __version__


if __name__ == '__main__':
def main():
'''
Handles command line arguments and gets things started.
'''
# Get command line arguments
parser = argparse.ArgumentParser(description="Takes an input feature file \
and converts it to another \
Expand Down Expand Up @@ -111,3 +114,6 @@
write_feature_file(args.outfile, ids, classes, feature_dicts,
arff_regression=args.arff_regression,
arff_relation=args.arff_relation)

if __name__ == '__main__':
main()
File renamed without changes.
2 changes: 1 addition & 1 deletion skll/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,5 @@
:organization: ETS
'''

__version__ = '0.22.0'
__version__ = '0.22.1'
VERSION = tuple(int(x) for x in __version__.split('.'))
8 changes: 2 additions & 6 deletions tests/test_skll.py
Original file line number Diff line number Diff line change
Expand Up @@ -912,9 +912,6 @@ def check_convert_featureset(from_suffix, to_suffix):
# the path to the unmerged feature files
dirpath = os.path.join(_my_dir, 'train', 'test_conversion')

# get the path to the conversion script
converter_path = os.path.abspath(os.path.join(_my_dir, '..', 'scripts', 'skll_convert'))

# get the feature name prefix
feature_name_prefix = '{}_to_{}'.format(from_suffix.lstrip('.'), to_suffix.lstrip('.'))

Expand All @@ -925,9 +922,8 @@ def check_convert_featureset(from_suffix, to_suffix):
feature, from_suffix))
output_file_path = os.path.join(dirpath, '{}_{}{}'.format(feature_name_prefix,
feature, to_suffix))
convert_cmd = shlex.split('{} --quiet {} {}'.format(converter_path,
input_file_path,
output_file_path))
convert_cmd = shlex.split('skll_convert --quiet {} {}'.format(input_file_path,
output_file_path))
subprocess.check_call(convert_cmd)

# now load and merge all unmerged, converted features in the `to_suffix` format
Expand Down
5 changes: 5 additions & 0 deletions travis/.condarc
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# a condarc file should be placed in $HOME/.condarc

channels:
- https://conda.binstar.org/dan_blanchard
- defaults

0 comments on commit 75351dc

Please sign in to comment.