Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve docstrings and type hints (Part 3) #739

Merged
merged 5 commits into from
Jun 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ repos:
rev: 'v0.0.264'
hooks:
- id: ruff
args: [--line-length=100, --select, "D,E,F,I", --ignore, "D212", --per-file-ignores, "tests/test*.py:D103,skll/data/featureset.py:E501"]
args: [--line-length=100, --select, "D,E,F,I", --ignore, "D212", --per-file-ignores, "tests/test*.py:D103,skll/data/featureset.py:E501,skll/learner/__init__.py:E501,skll/learner/voting.py:E501,skll/learner/utils.py:E501"]
- repo: https://github.com/pre-commit/mirrors-mypy
rev: 'v1.2.0'
hooks:
Expand Down
2 changes: 1 addition & 1 deletion skll/data/readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ def _sub_read(self, file):

Parameters
----------
file
file : Ignored
Not used.

Raises
Expand Down
24 changes: 12 additions & 12 deletions skll/data/writers.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,13 +218,13 @@ def _write_header(self, feature_set, output_file, filter_features):

Parameters
----------
feature_set
feature_set : Ignored
Not used.

output_file
output_file : Ignored
Not used.

filter_features
filter_features : Ignored
Not used.
"""
pass
Expand All @@ -235,16 +235,16 @@ def _write_line(self, id_, label_, feat_dict, output_file):

Parameters
----------
id_ :
id_ : Ignored
Not used.

label_
label_ : Ignored
Not used.

feat_dict
feat_dict : Ignored
Not used.

output_file
output_file : Ignored
Not used.

Raises
Expand All @@ -260,13 +260,13 @@ def _write_data(self, feature_set, output_file, filter_features):

Parameters
----------
feature_set
feature_set : Ignored
Not used.

output_file
output_file : Ignored
Not used.

filter_features
filter_features : Ignored
Not used.

Raises
Expand Down Expand Up @@ -679,7 +679,7 @@ def _write_header(

Parameters
----------
feature_set
feature_set : Ignored
Not used.

output_file : IO[str]
Expand Down Expand Up @@ -739,7 +739,7 @@ def _write_line(
feat_dict : :class:`skll.types.FeatureDict`
The feature dictionary for the current instance.

output_file
output_file : Ignored.
Not used.

Raises
Expand Down
39 changes: 15 additions & 24 deletions skll/experiments/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
:author: Chee Wee Leong (cleong@ets.org)
"""

from __future__ import annotations

import datetime
import json
import logging
Expand Down Expand Up @@ -61,19 +63,17 @@

def _classify_featureset(args: Dict[str, Any]) -> List[Dict[str, Any]]:
"""
Classification job to be submitted to grid.
Run classification job.

Parameters
----------
args : Dict[str, Any]
A dictionary with arguments for classifying the
``FeatureSet`` instance.
A dictionary with arguments for classifying the ``FeatureSet`` instance.

Returns
-------
res : List[Dict[str, Any]]
The results of the classification, in the format
of a list of dictionaries.
The results of the classification, in the format of a list of dictionaries.

Raises
------
Expand Down Expand Up @@ -620,44 +620,35 @@ def run_configuration(

Parameters
----------
config_file : PathOrStr
config_file : :class:`skll.types.PathOrStr`
Path to the configuration file we would like to use.
local : bool
local : bool, default=False
Should this be run locally instead of on the cluster?
Defaults to ``False``.
overwrite : bool
overwrite : bool, default=True
If the model files already exist, should we overwrite
them instead of re-using them?
Defaults to ``True``.
queue : str
queue : str, default="all.q"
The DRMAA queue to use if we're running on the cluster.
Defaults to ``'all.q'``.
hosts : Optional[List[str]]
hosts : Optional[List[str]], default=None
If running on the cluster, these are the machines we should use.
Defaults to ``None``.
write_summary : bool
write_summary : bool, default=True
Write a TSV file with a summary of the results.
Defaults to ``True``.
quiet : bool
quiet : bool, default=False
Suppress printing of "Loading..." messages.
Defaults to ``False``.
ablation : int
ablation : int, default=0
Number of features to remove when doing an ablation
experiment. If positive, we will perform repeated ablation
runs for all combinations of features removing the
specified number at a time. If ``None``, we will use all
combinations of all lengths. If 0, the default, no
ablation is performed. If negative, a ``ValueError`` is
raised.
Defaults to 0.
resume : bool
resume : bool, default=False
If result files already exist for an experiment, do not
overwrite them. This is very useful when doing a large
ablation experiment and part of it crashes.
Defaults to ``False``.
log_level : int
log_level : int, default=logging.INFO
The level for logging messages.
Defaults to ``logging.INFO``.

Returns
-------
Expand Down
28 changes: 10 additions & 18 deletions skll/experiments/input.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,50 +34,42 @@ def load_featureset(

Parameters
----------
dir_path : PathOrStr
dir_path : :class:`skll.types.PathOrStr`
Path to the directory that contains the feature files.
feat_files : List[str]
A list of feature file prefixes.
suffix : str
The suffix to add to feature file prefixes to get the full filenames.
id_col : str
id_col : str, default="id"
Name of the column which contains the instance IDs.
If no column with that name exists, or `None` is
specified, example IDs will be automatically generated.
Defaults to ``'id'``.
label_col : str
label_col : str, default="y"
Name of the column which contains the class labels.
If no column with that name exists, or `None` is
specified, the data is considered to be unlabeled.
Defaults to ``'y'``.
ids_to_floats : bool
ids_to_floats : bool, default=False
Whether to convert the IDs to floats to save memory. Will raise error
if we encounter non-numeric IDs.
Defaults to ``False``.
quiet : bool
quiet : bool, default=False
Do not print "Loading..." status message to stderr.
Defaults to ``False``.
class_map : Optional[ClassMap]
class_map : Optional[:class:`skll.types.ClassMap`], default=None
Mapping from original class labels to new ones. This is
mainly used for collapsing multiple labels into a single
class. Anything not in the mapping will be kept the same.
Defaults to ``None``.
feature_hasher : bool
feature_hasher : bool, default=False
Should we use a FeatureHasher when vectorizing features?
Defaults to ``False``.
num_features : Optional[int]
num_features : Optional[int], default=None
The number of features to use with the ``FeatureHasher``.
This should always be set to the power of 2 greater
than the actual number of features you're using.
Defaults to ``None``.
logger : Optional[logging.Logger]
logger : Optional[logging.Logger], default=None
A logger instance to use to log messages instead of creating
a new one by default.
Defaults to ``None``.

Returns
-------
merged_set : skll.data.FeatureSet
merged_set : :class:`skll.data.featureset.FeatureSet`
A ``FeatureSet`` instance containing the specified labels, IDs, features,
and feature vectorizer.
"""
Expand Down
12 changes: 6 additions & 6 deletions skll/experiments/output.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
:author: Chee Wee Leong (cleong@ets.org)
"""

from __future__ import annotations

import csv
import json
import math
Expand Down Expand Up @@ -91,9 +93,9 @@ def generate_learning_curve_plots(
----------
experiment_name : str
The name of the experiment.
output_dir : PathOrStr
output_dir : :class:`skll.types.PathOrStr`
Path to the output directory for the plots.
learning_curve_tsv_file : PathOrStr
learning_curve_tsv_file : :class:`skll.types.PathOrStr`
The path to the learning curve TSV file.
"""
# convert output_dir to Path object
Expand Down Expand Up @@ -219,9 +221,8 @@ def _print_fancy_output(
----------
learner_result_dicts : List[Dict[str, Any]]
List of result dictionaries.
output_file : IO[str]
output_file : IO[str], default=sys.stdout
The file buffer to print to.
Defaults to ``sys.stdout``.
"""
if not learner_result_dicts:
raise ValueError("Result dictionary list is empty!")
Expand Down Expand Up @@ -411,9 +412,8 @@ def _write_summary_file(result_json_paths: List[str], output_file: IO[str], abla
A list of paths to the individual result JSON files.
output_file : IO[str]
The file buffer to write to.
ablation : int
ablation : int, default=0
The number of features to remove when doing ablation experiment.
Defaults to 0.
"""
learner_result_dicts = []
# Map from feature set names to all features in them
Expand Down
11 changes: 4 additions & 7 deletions skll/experiments/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ def _create_learner_result_dicts(

Parameters
----------
task_results : List[EvaluateTaskResults]
task_results : List[:class:`skll.types.EvaluateTaskResults`]
The task results list.
grid_scores : Union[List[None], List[float]]
List of grid scores or a list of ``None`` instances for tasks that do not
Expand All @@ -99,8 +99,7 @@ def _create_learner_result_dicts(
Returns
-------
res : List[Dict[str, Any]]
The results of the learners, as a list of
dictionaries.
The results of the learners, as a list of dictionaries.
"""
res = []

Expand Down Expand Up @@ -233,16 +232,14 @@ def _get_stat_float(label_result_dict: Dict[str, float], stat: str) -> float:
Parameters
----------
label_result_dict : Dict[str, float]
Dictionary containing the stat we'd like
to retrieve for a particular label.
Dictionary containing the stats to retrieve for a particular label.
stat : str
The statistic we're looking for in the dictionary.

Returns
-------
float
The value of the stat if it's in the dictionary, and NaN
otherwise.
The value of the stat if it's in the dictionary, and NaN otherwise.
"""
if stat in label_result_dict and label_result_dict[stat] is not None:
return label_result_dict[stat]
Expand Down