docs: update docstrings

Aarhus-Psychiatry-Research · Dec 13, 2022 · 36a9aae · 36a9aae
1 parent 8117cac
commit 36a9aae
Showing 1 changed file with 83 additions and 80 deletions.
diff --git a/src/timeseriesflattener/feature_spec_objects.py b/src/timeseriesflattener/feature_spec_objects.py
@@ -121,7 +121,7 @@ def generate_docstring_from_attributes(cls: BaseModel) -> str:
         # the name of the argument and the value. We are only interested in the
         # value of the type argument.
         type_ = [arg[1] for arg in field_obj.__repr_args__() if arg[0] == "type"]
-        type_ = type_[1]
+        type_ = type_[0]
 
         field_description = field_obj.field_info.description
 
@@ -168,24 +168,25 @@ class _AnySpec(BaseModel):
     Fields:
         values_loader (Optional[Callable]):
             Loader for the df. Tries to resolve from the data_loaders registry,
-            then calls the function which should return a dataframe.
+            then calls the function which should return a dataframe. 
         values_name (Optional[str]):
-            A string that corresponds to a key in a dictionary of multiple
-            dataframes that corresponds to a name of a type of values.
+            A string that maps to a key in a dictionary instantiated by 
+            `split_df_and_register_to_dict`. Each key corresponds to a dataframe, which 
+            is a subset of the df where the values_name == key. 
         loader_kwargs (Optional[Mapping[str, Any]]):
-            Optional kwargs for the values_loader.
+            Optional kwargs for the values_loader. 
         values_df (Optional[DataFrame]):
-            Dataframe with the values.
+            Dataframe with the values. 
         feature_name (str):
-            The name of the feature. Used for column name generation, e.g.
-            <prefix>_<feature_name>.
+            The name of the feature. Used for column name generation, e.g. 
+            <prefix>_<feature_name>. 
         prefix (str):
             The prefix used for column name generation, e.g.
-            <prefix>_<feature_name>.
+            <prefix>_<feature_name>. 
         input_col_name_override (Optional[str]):
-            An override for the input column name. If None, will  attempt
-            to infer it by looking for the only column that doesn't match id_col_name
-            or timestamp_col_name.
+            An override for the input column name. If None, will  attempt 
+            to infer it by looking for the only column that doesn't match id_col_name 
+            or timestamp_col_name. 
         output_col_name_override (Optional[str]):
             Override the generated column name after flattening the time series"""
 
@@ -293,49 +294,49 @@ class Doc:
 
 
 class TemporalSpec(_AnySpec):
-    """The minimum specification required for all
-        collapsed time series. (temporal features), whether looking ahead or behind.
-        Mostly used for inheritance below.
+    """The minimum specification required for collapsing a temporal 
+        feature, whether looking ahead or behind. Mostly used for inheritance below.
 
     Fields:
         values_loader (Optional[Callable]):
             Loader for the df. Tries to resolve from the data_loaders registry,
-            then calls the function which should return a dataframe.
+            then calls the function which should return a dataframe. 
         values_name (Optional[str]):
-            A string that corresponds to a key in a dictionary of multiple
-            dataframes that corresponds to a name of a type of values.
+            A string that maps to a key in a dictionary instantiated by 
+            `split_df_and_register_to_dict`. Each key corresponds to a dataframe, which 
+            is a subset of the df where the values_name == key. 
         loader_kwargs (Optional[dict]):
-            Optional kwargs passed onto the data loader.
+            Optional kwargs passed onto the data loader. 
         values_df (Optional[DataFrame]):
-            Dataframe with the values.
+            Dataframe with the values. 
         feature_name (str):
-            The name of the feature. Used for column name generation, e.g.
-            <prefix>_<feature_name>.
+            The name of the feature. Used for column name generation, e.g. 
+            <prefix>_<feature_name>. 
         prefix (str):
             The prefix used for column name generation, e.g.
-            <prefix>_<feature_name>.
+            <prefix>_<feature_name>. 
         input_col_name_override (Optional[str]):
-            An override for the input column name. If None, will  attempt
-            to infer it by looking for the only column that doesn't match id_col_name
-            or timestamp_col_name.
+            An override for the input column name. If None, will  attempt 
+            to infer it by looking for the only column that doesn't match id_col_name 
+            or timestamp_col_name. 
         output_col_name_override (Optional[str]):
-            Override the generated column name after flattening the time series
+            Override the generated column name after flattening the time series 
         interval_days (Union[int, float]):
-            How far to look in the given direction (ahead for outcomes,
-            behind for predictors)
+            How far to look in the given direction (ahead for outcomes, 
+            behind for predictors) 
         resolve_multiple_fn (Callable):
-            A function used for resolving multiple values within the
-            interval_days.
+            A function used for resolving multiple values within the 
+            interval_days. 
         key_for_resolve_multiple (Optional[str]):
-            Key used to lookup the resolve_multiple_fn in the
-            resolve_multiple_fns registry. Used for column name generation. Only
+            Key used to lookup the resolve_multiple_fn in the 
+            resolve_multiple_fns registry. Used for column name generation. Only 
             required if you don't specify a resolve_multiple_fn. Call
             timeseriesflattener.resolve_multiple_fns.resolve_multiple_fns.get_all()
-            for a list of options.
+            for a list of options. 
         fallback (Union[Callable, int, float, str]):
-            Which value to use if no values are found within interval_days.
+            Which value to use if no values are found within interval_days. 
         allowed_nan_value_prop (float):
-            If NaN is higher than this in the input dataframe during
+            If NaN is higher than this in the input dataframe during 
             resolution, raise an error. Defaults to: 0.0.
         entity_id_col_name (str):
             Col name for ids in the input dataframe. Defaults to: entity_id."""
@@ -418,42 +419,43 @@ class PredictorSpec(TemporalSpec):
     Fields:
         values_loader (Optional[Callable]):
             Loader for the df. Tries to resolve from the data_loaders registry,
-            then calls the function which should return a dataframe.
+            then calls the function which should return a dataframe. 
         values_name (Optional[str]):
-            A string that corresponds to a key in a dictionary of multiple
-            dataframes that corresponds to a name of a type of values.
+            A string that maps to a key in a dictionary instantiated by 
+            `split_df_and_register_to_dict`. Each key corresponds to a dataframe, which 
+            is a subset of the df where the values_name == key. 
         loader_kwargs (Optional[dict]):
-            Optional kwargs passed onto the data loader.
+            Optional kwargs passed onto the data loader. 
         values_df (Optional[DataFrame]):
-            Dataframe with the values.
+            Dataframe with the values. 
         feature_name (str):
-            The name of the feature. Used for column name generation, e.g.
-            <prefix>_<feature_name>.
+            The name of the feature. Used for column name generation, e.g. 
+            <prefix>_<feature_name>. 
         prefix (str):
             The prefix used for column name generation, e.g.
             <prefix>_<feature_name>. Defaults to: pred.
         input_col_name_override (Optional[str]):
-            An override for the input column name. If None, will  attempt
-            to infer it by looking for the only column that doesn't match id_col_name
-            or timestamp_col_name.
+            An override for the input column name. If None, will  attempt 
+            to infer it by looking for the only column that doesn't match id_col_name 
+            or timestamp_col_name. 
         output_col_name_override (Optional[str]):
-            Override the generated column name after flattening the time series
+            Override the generated column name after flattening the time series 
         interval_days (Union[int, float]):
-            How far to look in the given direction (ahead for outcomes,
-            behind for predictors)
+            How far to look in the given direction (ahead for outcomes, 
+            behind for predictors) 
         resolve_multiple_fn (Callable):
-            A function used for resolving multiple values within the
-            interval_days.
+            A function used for resolving multiple values within the 
+            interval_days. 
         key_for_resolve_multiple (Optional[str]):
-            Key used to lookup the resolve_multiple_fn in the
-            resolve_multiple_fns registry. Used for column name generation. Only
+            Key used to lookup the resolve_multiple_fn in the 
+            resolve_multiple_fns registry. Used for column name generation. Only 
             required if you don't specify a resolve_multiple_fn. Call
             timeseriesflattener.resolve_multiple_fns.resolve_multiple_fns.get_all()
-            for a list of options.
+            for a list of options. 
         fallback (Union[Callable, int, float, str]):
-            Which value to use if no values are found within interval_days.
+            Which value to use if no values are found within interval_days. 
         allowed_nan_value_prop (float):
-            If NaN is higher than this in the input dataframe during
+            If NaN is higher than this in the input dataframe during 
             resolution, raise an error. Defaults to: 0.0.
         entity_id_col_name (str):
             Col name for ids in the input dataframe. Defaults to: entity_id.
@@ -493,50 +495,51 @@ class OutcomeSpec(TemporalSpec):
     Fields:
         values_loader (Optional[Callable]):
             Loader for the df. Tries to resolve from the data_loaders registry,
-            then calls the function which should return a dataframe.
+            then calls the function which should return a dataframe. 
         values_name (Optional[str]):
-            A string that corresponds to a key in a dictionary of multiple
-            dataframes that corresponds to a name of a type of values.
+            A string that maps to a key in a dictionary instantiated by 
+            `split_df_and_register_to_dict`. Each key corresponds to a dataframe, which 
+            is a subset of the df where the values_name == key. 
         loader_kwargs (Optional[dict]):
-            Optional kwargs passed onto the data loader.
+            Optional kwargs passed onto the data loader. 
         values_df (Optional[DataFrame]):
-            Dataframe with the values.
+            Dataframe with the values. 
         feature_name (str):
-            The name of the feature. Used for column name generation, e.g.
-            <prefix>_<feature_name>.
+            The name of the feature. Used for column name generation, e.g. 
+            <prefix>_<feature_name>. 
         prefix (str):
             The prefix used for column name generation, e.g.
             <prefix>_<outcome_name>. Defaults to: outc.
         input_col_name_override (Optional[str]):
-            An override for the input column name. If None, will  attempt
-            to infer it by looking for the only column that doesn't match id_col_name
-            or timestamp_col_name.
+            An override for the input column name. If None, will  attempt 
+            to infer it by looking for the only column that doesn't match id_col_name 
+            or timestamp_col_name. 
         output_col_name_override (Optional[str]):
-            Override the generated column name after flattening the time series
+            Override the generated column name after flattening the time series 
         interval_days (Union[int, float]):
-            How far to look in the given direction (ahead for outcomes,
-            behind for predictors)
+            How far to look in the given direction (ahead for outcomes, 
+            behind for predictors) 
         resolve_multiple_fn (Callable):
-            A function used for resolving multiple values within the
-            interval_days.
+            A function used for resolving multiple values within the 
+            interval_days. 
         key_for_resolve_multiple (Optional[str]):
-            Key used to lookup the resolve_multiple_fn in the
-            resolve_multiple_fns registry. Used for column name generation. Only
+            Key used to lookup the resolve_multiple_fn in the 
+            resolve_multiple_fns registry. Used for column name generation. Only 
             required if you don't specify a resolve_multiple_fn. Call
             timeseriesflattener.resolve_multiple_fns.resolve_multiple_fns.get_all()
-            for a list of options.
+            for a list of options. 
         fallback (Union[Callable, int, float, str]):
-            Which value to use if no values are found within interval_days.
+            Which value to use if no values are found within interval_days. 
         allowed_nan_value_prop (float):
-            If NaN is higher than this in the input dataframe during
+            If NaN is higher than this in the input dataframe during 
             resolution, raise an error. Defaults to: 0.0.
         entity_id_col_name (str):
             Col name for ids in the input dataframe. Defaults to: entity_id.
         incident (bool):
-            Whether the outcome is incident or not, i.e. whether you
-            can experience it more than once. For example, type 2 diabetes is incident.
-            Incident outcomes can be handled in a vectorised way during resolution,
-            which is faster than non-incident outcomes.
+            Whether the outcome is incident or not. 
+            I.e., incident outcomes are outcomes you can only experience once. 
+            For example, type 2 diabetes is incident. Incident outcomes can be handled 
+            in a vectorised way during resolution, which is faster than non-incident outcomes. 
         lookahead_days (Union[int, float]):
             How far ahead to look for values"""