EducationalTestingService · desilinguist · Jun 14, 2023 · Jun 13, 2023 · Jun 13, 2023 · Jun 13, 2023
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -31,7 +31,7 @@ repos:
     rev: 'v0.0.264'
     hooks:
       - id: ruff
-        args: [--line-length=100, --select, "D,E,F,I", --ignore, "D212", --per-file-ignores, "tests/test*.py:D103,skll/data/featureset.py:E501"]
+        args: [--line-length=100, --select, "D,E,F,I", --ignore, "D212", --per-file-ignores, "tests/test*.py:D103,skll/data/featureset.py:E501,skll/learner/__init__.py:E501,skll/learner/voting.py:E501,skll/learner/utils.py:E501"]
   - repo: https://github.com/pre-commit/mirrors-mypy
     rev: 'v1.2.0'
     hooks:

diff --git a/skll/data/readers.py b/skll/data/readers.py
@@ -205,7 +205,7 @@ def _sub_read(self, file):
 
         Parameters
         ----------
-        file
+        file : Ignored
             Not used.
 
         Raises

diff --git a/skll/data/writers.py b/skll/data/writers.py
@@ -218,13 +218,13 @@ def _write_header(self, feature_set, output_file, filter_features):
 
         Parameters
         ----------
-        feature_set
+        feature_set : Ignored
             Not used.
 
-        output_file
+        output_file : Ignored
             Not used.
 
-        filter_features
+        filter_features : Ignored
            Not used.
         """
         pass
@@ -235,16 +235,16 @@ def _write_line(self, id_, label_, feat_dict, output_file):
 
         Parameters
         ----------
-        id_ :
+        id_ : Ignored
             Not used.
 
-        label_
+        label_ : Ignored
             Not used.
 
-        feat_dict
+        feat_dict : Ignored
             Not used.
 
-        output_file
+        output_file : Ignored
              Not used.
 
         Raises
@@ -260,13 +260,13 @@ def _write_data(self, feature_set, output_file, filter_features):
 
         Parameters
         ----------
-        feature_set
+        feature_set : Ignored
             Not used.
 
-        output_file
+        output_file : Ignored
             Not used.
 
-        filter_features
+        filter_features : Ignored
             Not used.
 
         Raises
@@ -679,7 +679,7 @@ def _write_header(
 
         Parameters
         ----------
-        feature_set
+        feature_set : Ignored
             Not used.
 
         output_file : IO[str]
@@ -739,7 +739,7 @@ def _write_line(
         feat_dict : :class:`skll.types.FeatureDict`
             The feature dictionary for the current instance.
 
-        output_file
+        output_file : Ignored.
             Not used.
 
         Raises

diff --git a/skll/experiments/__init__.py b/skll/experiments/__init__.py
@@ -8,6 +8,8 @@
 :author: Chee Wee Leong (cleong@ets.org)
 """
 
+from __future__ import annotations
+
 import datetime
 import json
 import logging
@@ -61,19 +63,17 @@
 
 def _classify_featureset(args: Dict[str, Any]) -> List[Dict[str, Any]]:
     """
-    Classification job to be submitted to grid.
+    Run classification job.
 
     Parameters
     ----------
     args : Dict[str, Any]
-        A dictionary with arguments for classifying the
-        ``FeatureSet`` instance.
+        A dictionary with arguments for classifying the ``FeatureSet`` instance.
 
     Returns
     -------
     res : List[Dict[str, Any]]
-        The results of the classification, in the format
-        of a list of dictionaries.
+        The results of the classification, in the format of a list of dictionaries.
 
     Raises
     ------
@@ -620,44 +620,35 @@ def run_configuration(
 
     Parameters
     ----------
-    config_file : PathOrStr
+    config_file : :class:`skll.types.PathOrStr`
         Path to the configuration file we would like to use.
-    local : bool
+    local : bool, default=False
         Should this be run locally instead of on the cluster?
-        Defaults to ``False``.
-    overwrite : bool
+    overwrite : bool, default=True
         If the model files already exist, should we overwrite
         them instead of re-using them?
-        Defaults to ``True``.
-    queue : str
+    queue : str, default="all.q"
         The DRMAA queue to use if we're running on the cluster.
-        Defaults to ``'all.q'``.
-    hosts : Optional[List[str]]
+    hosts : Optional[List[str]], default=None
         If running on the cluster, these are the machines we should use.
-        Defaults to ``None``.
-    write_summary : bool
+    write_summary : bool, default=True
         Write a TSV file with a summary of the results.
-        Defaults to ``True``.
-    quiet : bool
+    quiet : bool, default=False
         Suppress printing of "Loading..." messages.
-        Defaults to ``False``.
-    ablation : int
+    ablation : int, default=0
         Number of features to remove when doing an ablation
         experiment. If positive, we will perform repeated ablation
         runs for all combinations of features removing the
         specified number at a time. If ``None``, we will use all
         combinations of all lengths. If 0, the default, no
         ablation is performed. If negative, a ``ValueError`` is
         raised.
-        Defaults to 0.
-    resume : bool
+    resume : bool, default=False
         If result files already exist for an experiment, do not
         overwrite them. This is very useful when doing a large
         ablation experiment and part of it crashes.
-        Defaults to ``False``.
-    log_level : int
+    log_level : int, default=logging.INFO
         The level for logging messages.
-        Defaults to ``logging.INFO``.
 
     Returns
     -------

diff --git a/skll/experiments/input.py b/skll/experiments/input.py
@@ -34,50 +34,42 @@ def load_featureset(
 
     Parameters
     ----------
-    dir_path : PathOrStr
+    dir_path : :class:`skll.types.PathOrStr`
         Path to the directory that contains the feature files.
     feat_files : List[str]
         A list of feature file prefixes.
     suffix : str
         The suffix to add to feature file prefixes to get the full filenames.
-    id_col : str
+    id_col : str, default="id"
         Name of the column which contains the instance IDs.
         If no column with that name exists, or `None` is
         specified, example IDs will be automatically generated.
-        Defaults to ``'id'``.
-    label_col : str
+    label_col : str, default="y"
         Name of the column which contains the class labels.
         If no column with that name exists, or `None` is
         specified, the data is considered to be unlabeled.
-        Defaults to ``'y'``.
-    ids_to_floats : bool
+    ids_to_floats : bool, default=False
         Whether to convert the IDs to floats to save memory. Will raise error
         if we encounter non-numeric IDs.
-        Defaults to ``False``.
-    quiet : bool
+    quiet : bool, default=False
         Do not print "Loading..." status message to stderr.
-        Defaults to ``False``.
-    class_map : Optional[ClassMap]
+    class_map : Optional[:class:`skll.types.ClassMap`], default=None
         Mapping from original class labels to new ones. This is
         mainly used for collapsing multiple labels into a single
         class. Anything not in the mapping will be kept the same.
-        Defaults to ``None``.
-    feature_hasher : bool
+    feature_hasher : bool, default=False
         Should we use a FeatureHasher when vectorizing features?
-        Defaults to ``False``.
-    num_features : Optional[int]
+    num_features : Optional[int], default=None
         The number of features to use with the ``FeatureHasher``.
         This should always be set to the power of 2 greater
         than the actual number of features you're using.
-        Defaults to ``None``.
-    logger : Optional[logging.Logger]
+    logger : Optional[logging.Logger], default=None
         A logger instance to use to log messages instead of creating
         a new one by default.
-        Defaults to ``None``.
 
     Returns
     -------
-    merged_set : skll.data.FeatureSet
+    merged_set : :class:`skll.data.featureset.FeatureSet`
         A ``FeatureSet`` instance containing the specified labels, IDs, features,
         and feature vectorizer.
     """

diff --git a/skll/experiments/output.py b/skll/experiments/output.py
@@ -8,6 +8,8 @@
 :author: Chee Wee Leong (cleong@ets.org)
 """
 
+from __future__ import annotations
+
 import csv
 import json
 import math
@@ -91,9 +93,9 @@ def generate_learning_curve_plots(
     ----------
     experiment_name : str
         The name of the experiment.
-    output_dir : PathOrStr
+    output_dir : :class:`skll.types.PathOrStr`
         Path to the output directory for the plots.
-    learning_curve_tsv_file : PathOrStr
+    learning_curve_tsv_file : :class:`skll.types.PathOrStr`
         The path to the learning curve TSV file.
     """
     # convert output_dir to Path object
@@ -219,9 +221,8 @@ def _print_fancy_output(
     ----------
     learner_result_dicts : List[Dict[str, Any]]
         List of result dictionaries.
-    output_file : IO[str]
+    output_file : IO[str], default=sys.stdout
         The file buffer to print to.
-        Defaults to ``sys.stdout``.
     """
     if not learner_result_dicts:
         raise ValueError("Result dictionary list is empty!")
@@ -411,9 +412,8 @@ def _write_summary_file(result_json_paths: List[str], output_file: IO[str], abla
         A list of paths to the individual result JSON files.
     output_file : IO[str]
         The file buffer to write to.
-    ablation : int
+    ablation : int, default=0
         The number of features to remove when doing ablation experiment.
-        Defaults to 0.
     """
     learner_result_dicts = []
     # Map from feature set names to all features in them

diff --git a/skll/experiments/utils.py b/skll/experiments/utils.py
@@ -82,7 +82,7 @@ def _create_learner_result_dicts(
 
     Parameters
     ----------
-    task_results : List[EvaluateTaskResults]
+    task_results : List[:class:`skll.types.EvaluateTaskResults`]
         The task results list.
     grid_scores : Union[List[None], List[float]]
         List of grid scores or a list of ``None`` instances for tasks that do not
@@ -99,8 +99,7 @@ def _create_learner_result_dicts(
     Returns
     -------
     res : List[Dict[str, Any]]
-        The results of the learners, as a list of
-        dictionaries.
+        The results of the learners, as a list of dictionaries.
     """
     res = []
 
@@ -233,16 +232,14 @@ def _get_stat_float(label_result_dict: Dict[str, float], stat: str) -> float:
     Parameters
     ----------
     label_result_dict : Dict[str, float]
-        Dictionary containing the stat we'd like
-        to retrieve for a particular label.
+        Dictionary containing the stats to retrieve for a particular label.
     stat : str
         The statistic we're looking for in the dictionary.
 
     Returns
     -------
     float
-        The value of the stat if it's in the dictionary, and NaN
-        otherwise.
+        The value of the stat if it's in the dictionary, and NaN otherwise.
     """
     if stat in label_result_dict and label_result_dict[stat] is not None:
         return label_result_dict[stat]