Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
…/skll into feature/skll-261-pandas-dataframe-helper
  • Loading branch information
Diane Napolitano committed May 16, 2016
2 parents 89dc0ad + 5e40b4c commit 99012a9
Show file tree
Hide file tree
Showing 30 changed files with 502 additions and 160 deletions.
2 changes: 1 addition & 1 deletion .travis.yml
Expand Up @@ -22,7 +22,7 @@ before_install:
- conda config --add channels desilinguist
- conda update --yes conda
install:
- conda install --yes python=$TRAVIS_PYTHON_VERSION nomkl numpy scipy beautiful-soup six scikit-learn==0.17.0 joblib prettytable python-coveralls pyyaml
- conda install --yes python=$TRAVIS_PYTHON_VERSION nomkl numpy scipy beautiful-soup six scikit-learn==0.17.1 joblib prettytable python-coveralls pyyaml
- if [ ${TRAVIS_PYTHON_VERSION:0:1} == "2" ]; then conda install --yes configparser logutils mock; fi
- if [ ${WITH_PANDAS} == "true" ]; then conda install --yes pandas; fi
# Have to use pip for nose-cov because its entry points are not supported by conda yet
Expand Down
2 changes: 1 addition & 1 deletion README.rst
Expand Up @@ -62,7 +62,7 @@ like:
# Should we tune parameters of all learners by searching provided parameter grids?
grid_search = true
# Function to maximize when performing grid search
objective = accuracy
objectives = ['accuracy']
[Output]
# again, these can/should be absolute paths
Expand Down
16 changes: 10 additions & 6 deletions conda.yaml → conda-recipe/unix/skll/meta.yaml
@@ -1,14 +1,16 @@
package:
name: skll
version: {{ environ.get('GIT_DESCRIBE_TAG', '').replace('v', '') }}
version: 1.2

source:
git_url: ./
path: ../../../../skll

build:
number: {{ environ.get('GIT_DESCRIBE_NUMBER', 0) }}
number: 0
track_features:
- nomkl
script:
- cd $RECIPE_DIR
- cd $SRC_DIR
- $PYTHON setup.py install
entry_points:
- compute_eval_from_predictions = skll.utilities.compute_eval_from_predictions:main
Expand Down Expand Up @@ -39,9 +41,10 @@ build:
requirements:
build:
- python
- nomkl
- joblib
- setuptools
- scikit-learn 0.17.0
- scikit-learn 0.17.1
- six
- prettytable
- beautiful-soup
Expand All @@ -55,8 +58,9 @@ requirements:

run:
- python
- nomkl
- joblib
- scikit-learn 0.17.0
- scikit-learn 0.17.1
- six
- prettytable
- beautiful-soup
Expand Down
88 changes: 88 additions & 0 deletions conda-recipe/windows/skll/meta.yaml
@@ -0,0 +1,88 @@
package:
name: skll
version: 1.2

source:
path: ../../../../skll

build:
number: 0
script:
- cd $SRC_DIR
- $PYTHON setup.py install
entry_points:
- compute_eval_from_predictions = skll.utilities.compute_eval_from_predictions:main
- filter_features = skll.utilities.filter_features:main
- generate_predictions = skll.utilities.generate_predictions:main
- join_features = skll.utilities.join_features:main
- print_model_weights = skll.utilities.print_model_weights:main
- run_experiment = skll.utilities.run_experiment:main
- skll_convert = skll.utilities.skll_convert:main
- summarize_results = skll.utilities.summarize_results:main
- compute_eval_from_predictions2 = skll.utilities.compute_eval_from_predictions:main [py2k]
- filter_features2 = skll.utilities.filter_features:main [py2k]
- generate_predictions2 = skll.utilities.generate_predictions:main [py2k]
- join_features2 = skll.utilities.join_features:main [py2k]
- print_model_weights2 = skll.utilities.print_model_weights:main [py2k]
- run_experiment2 = skll.utilities.run_experiment:main [py2k]
- skll_convert2 = skll.utilities.skll_convert:main [py2k]
- summarize_results2 = skll.utilities.summarize_results:main [py2k]
- compute_eval_from_predictions3 = skll.utilities.compute_eval_from_predictions:main [py3k]
- filter_features3 = skll.utilities.filter_features:main [py3k]
- generate_predictions3 = skll.utilities.generate_predictions:main [py3k]
- join_features3 = skll.utilities.join_features:main [py3k]
- print_model_weights3 = skll.utilities.print_model_weights:main [py3k]
- run_experiment3 = skll.utilities.run_experiment:main [py3k]
- skll_convert3 = skll.utilities.skll_convert:main [py3k]
- summarize_results3 = skll.utilities.summarize_results:main [py3k]

requirements:
build:
- python
- joblib
- setuptools
- scikit-learn 0.17.1
- six
- prettytable
- beautiful-soup
- numpy
- scipy
- pyyaml
- configparser [py2k]
- futures [py2k]
- logutils [py2k]
- mock [py2k]

run:
- python
- joblib
- scikit-learn 0.17.1
- six
- prettytable
- beautiful-soup
- numpy
- scipy
- pyyaml
- configparser [py2k]
- futures [py2k]
- logutils [py2k]
- mock [py2k]

test:
# Python imports
imports:
- skll

commands:
- compute_eval_from_predictions --help
- filter_features --help
- generate_predictions --help
- join_features --help
- print_model_weights --help
- run_experiment --help
- skll_convert --help
- summarize_results --help

about:
home: http://github.com/EducationalTestingService/skll
license: BSD 3-clause
31 changes: 20 additions & 11 deletions doc/run_experiment.rst
Expand Up @@ -414,7 +414,7 @@ custom_learner_path *(Optional)*
""""""""""""""""""""""""""""""""

Path to a ``.py`` file that defines a custom learner. This file will be
imported dynamically. This is only required if a custom learner in specified
imported dynamically. This is only required if a custom learner is specified
in the list of :ref:`learners`.

All Custom learners must implement the ``fit`` and
Expand Down Expand Up @@ -562,7 +562,7 @@ SVR
{'class_weight': 'balanced'}
The second option allows you to assign an specific weight per each
The second option allows you to assign a specific weight per each
class. The default weight per class is 1. For example:

.. code-block:: python
Expand All @@ -581,7 +581,7 @@ scale by mean, your data will automatically be converted to dense, so use
caution when you have a very large dataset. Valid options are:

none
perform no feature scaling at all.
Perform no feature scaling at all.

with_std
Scale feature values by their standard deviation.
Expand All @@ -590,7 +590,7 @@ with_mean
Center features by subtracting their mean.

both
perform both centering and scaling.
Perform both centering and scaling.

Defaults to ``none``.

Expand Down Expand Up @@ -630,12 +630,12 @@ min_feature_count *(Optional)*
The minimum number of examples for which the value of a feature must be nonzero
to be included in the model. Defaults to 1.

.. _objective:
.. _objectives:

objective *(Optional)*
objectives *(Optional)*
""""""""""""""""""""""

The objective function to use for tuning. Valid options are:
The objective functions to use for tuning. This is a list of one or more objective functions. Valid options are:

.. _classification_obj:

Expand Down Expand Up @@ -694,7 +694,9 @@ Regression:
* **mean_squared_error**: `Mean squared error regression loss <http://scikit-learn.org/stable/modules/generated/sklearn.metrics.mean_squared_error.html>`__


Defaults to ``f1_score_micro``.
Defaults to ``['f1_score_micro']``.

**Note**: Using ``objective=x`` instead of ``objectives=['x']`` is also acceptable, for backward-compatibility.

.. _param_grids:

Expand Down Expand Up @@ -839,7 +841,7 @@ Using run_experiment
--------------------
.. program:: run_experiment

Once you have create the :ref:`configuration file <create_config>` for your
Once you have created the :ref:`configuration file <create_config>` for your
experiment, you can usually just get your experiment started by running
``run_experiment CONFIGFILE``. That said, there are a few options that are
specified via command-line arguments instead of in the configuration file:
Expand Down Expand Up @@ -919,8 +921,8 @@ Output files
^^^^^^^^^^^^

The result, log, model, and prediction files generated by run_experiment will
all share the following automatically generated prefix
``EXPERIMENT_FEATURESET_LEARNER``, where the following definitions hold:
all share the automatically generated prefix
``EXPERIMENT_FEATURESET_LEARNER_OBJECTIVE``, where the following definitions hold:

``EXPERIMENT``
The name specified as :ref:`experiment_name` in the configuration file.
Expand All @@ -931,6 +933,13 @@ all share the following automatically generated prefix
``LEARNER``
The learner the current results/model/etc. was generated using.

``OBJECTIVE``
The objective function the current results/model/etc. was generated using.

However, if ``objectives`` contains only one objective function,
the result, log, model, and prediction files will share the prefix
``EXPERIMENT_FEATURESET_LEARNER``. For backward-compatibility, the same applies when a single objective is specified using ``objective=x``.

For every experiment you run, there will also be a result summary file
generated that is a tab-delimited file summarizing the results for each
learner-featureset combination you have in your configuration file. It is named
Expand Down
13 changes: 7 additions & 6 deletions doc/tutorial.rst
Expand Up @@ -90,9 +90,10 @@ The :ref:`Tuning` section defines how we want our model to be tuned. Setting
`GridSearchCV <http://scikit-learn.org/stable/modules/generated/sklearn.grid_search.GridSearchCV.html#sklearn.grid_search.GridSearchCV>`_
class, which is an implementation of the
`standard, brute-force approach to hyperparameter optimization <http://en.wikipedia.org/wiki/Hyperparameter_optimization#Grid_search>`_.
:ref:`objective <objective>` refers to the desired objective function; here,
``accuracy`` will optimize for overall accuracy. You can see a list of all of
all the available objective functions :ref:`here <objective>`.

:ref:`objectives <objectives>` refers to the desired objective functions; here,
``accuracy`` will optimize for overall accuracy. You can see a list of all the
available objective functions :ref:`here <objectives>`.

In the :ref:`Output` section, the arguments to each of these are directories
where you'd like all of the relevant output from your experiment to go.
Expand Down Expand Up @@ -156,9 +157,9 @@ That should produce output like::
We could squelch the warnings about shuffling by setting
:ref:`shuffle <shuffle>` to ``True`` in the :ref:`Input` section.

The reason we see the loading messages repeated is that we are running the
The reason we see the loading messages repeated is that we are running the
different learners sequentially, whereas SKLL is designed to take advantage
of a cluster to execute everything in parallel via GridMap.
of a cluster to execute everything in parallel via GridMap.


Examine the results
Expand Down Expand Up @@ -255,7 +256,7 @@ Training and testing directories
""""""""""""""""""""""""""""""""

At minimum you will probably want to work with a training set and a testing
set. If you have multiple feature files that you would SKLL to join together
set. If you have multiple feature files that you would like SKLL to join together
for you automatically, you will need to create feature files with the exact
same names and store them in training and testing directories. You can
specifiy these directories in your config file using
Expand Down
2 changes: 1 addition & 1 deletion examples/boston/cross_val.cfg
Expand Up @@ -13,7 +13,7 @@ suffix = .jsonlines

[Tuning]
grid_search = true
objective = pearson
objectives = ['pearson']

[Output]
# again, these can be absolute paths
Expand Down
2 changes: 1 addition & 1 deletion examples/boston/evaluate.cfg
Expand Up @@ -15,7 +15,7 @@ suffix = .jsonlines
[Tuning]
grid_search = true
#objective = f1_score_micro
objective = unweighted_kappa
objectives = ['unweighted_kappa']

[Output]
# again, these can be absolute paths
Expand Down
2 changes: 1 addition & 1 deletion examples/iris/cross_val.cfg
Expand Up @@ -14,7 +14,7 @@ suffix = .jsonlines

[Tuning]
grid_search = true
objective = f1_score_micro
objectives = ['f1_score_micro']

[Output]
# again, these can be absolute paths
Expand Down
2 changes: 1 addition & 1 deletion examples/iris/evaluate.cfg
Expand Up @@ -15,7 +15,7 @@ suffix = .jsonlines

[Tuning]
grid_search = true
objective = f1_score_micro
objectives = ['f1_score_micro']

[Output]
# again, these can be absolute paths
Expand Down
2 changes: 1 addition & 1 deletion examples/titanic/cross_validate.cfg
Expand Up @@ -12,7 +12,7 @@ id_col = PassengerId

[Tuning]
grid_search = true
objective = accuracy
objectives = ['accuracy']

[Output]
# again, these can be absolute paths
Expand Down
2 changes: 1 addition & 1 deletion examples/titanic/evaluate.cfg
Expand Up @@ -13,7 +13,7 @@ id_col = PassengerId

[Tuning]
grid_search = false
objective = accuracy
objectives = ['accuracy']

[Output]
# again, these can be absolute paths
Expand Down
2 changes: 1 addition & 1 deletion examples/titanic/evaluate_tuned.cfg
Expand Up @@ -14,7 +14,7 @@ id_col = PassengerId

[Tuning]
grid_search = true
objective = accuracy
objectives = ['accuracy']

[Output]
# again, these can be absolute paths
Expand Down
2 changes: 1 addition & 1 deletion examples/titanic/predict_train+dev.cfg
Expand Up @@ -14,7 +14,7 @@ id_col = PassengerId

[Tuning]
grid_search = false
objective = accuracy
objectives = ['accuracy']

[Output]
# again, these can be absolute paths
Expand Down
2 changes: 1 addition & 1 deletion examples/titanic/predict_train+dev_tuned.cfg
Expand Up @@ -14,7 +14,7 @@ id_col = PassengerId

[Tuning]
grid_search = true
objective = accuracy
objectives = ['accuracy']

[Output]
# again, these can be absolute paths
Expand Down
2 changes: 1 addition & 1 deletion examples/titanic/predict_train_only.cfg
Expand Up @@ -14,7 +14,7 @@ id_col = PassengerId

[Tuning]
grid_search = false
objective = accuracy
objectives = ['accuracy']

[Output]
# again, these can be absolute paths
Expand Down
2 changes: 1 addition & 1 deletion examples/titanic/predict_train_only_tuned.cfg
Expand Up @@ -14,7 +14,7 @@ id_col = PassengerId

[Tuning]
grid_search = true
objective = accuracy
objectives = ['accuracy']

[Output]
# again, these can be absolute paths
Expand Down
2 changes: 1 addition & 1 deletion examples/titanic/train.cfg
Expand Up @@ -13,7 +13,7 @@ id_col = PassengerId

[Tuning]
grid_search = true
objective = accuracy
objectives = ['accuracy']

[Output]
# again, these can be absolute paths
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
@@ -1,4 +1,4 @@
scikit-learn==0.17.0
scikit-learn==0.17.1
six
PrettyTable
beautifulsoup4
Expand Down

0 comments on commit 99012a9

Please sign in to comment.