Skip to content

Commit

Permalink
docs: improve from review suggestions
Browse files Browse the repository at this point in the history
  • Loading branch information
MartinBernstorff committed Dec 7, 2022
1 parent c161f9a commit 57804df
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 18 deletions.
30 changes: 15 additions & 15 deletions src/timeseriesflattener/flattened_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -647,7 +647,7 @@ def _drop_pred_time_if_insufficient_look_distance(self, df: pd.DataFrame):
elif isinstance(spec, PredictorSpec):
cutoff_date_behind = max(cutoff_date_behind, spec_cutoff_date)

# Drop all prediction that are outside the cutoffs window
# Drop all prediction times that are outside the cutoffs window
output_df = df[
(df[self.timestamp_col_name] >= cutoff_date_behind)
& (df[self.timestamp_col_name] <= cutoff_date_ahead)
Expand Down Expand Up @@ -732,54 +732,54 @@ def add_spec(
def add_age(
self,
date_of_birth_df: DataFrame,
input_date_of_birth_col_name: Optional[str] = "date_of_birth",
date_of_birth_col_name: Optional[str] = "date_of_birth",
output_prefix: str = "pred",
):
"""Add age at prediction time as predictor.
Also add patient's birth date. Has its own function because of its very frequent use.
Has its own function because of its very frequent use.
Args:
date_of_birth_df (DataFrame): Two columns, id and date_of_birth.
input_date_of_birth_col_name (str, optional): Name of the date_of_birth column in id2date_of_birth.
date_of_birth_df (DataFrame): Two columns, one matching self.id_col_name and one containing date_of_birth.
date_of_birth_col_name (str, optional): Name of the date_of_birth column in date_of_birth_df.
Defaults to "date_of_birth".
output_prefix (str, optional): Prefix for the output column. Defaults to "pred".
"""
if date_of_birth_df[input_date_of_birth_col_name].dtype != "<M8[ns]":
if date_of_birth_df[date_of_birth_col_name].dtype != "<M8[ns]":
try:
date_of_birth_df[input_date_of_birth_col_name] = pd.to_datetime(
date_of_birth_df[input_date_of_birth_col_name],
date_of_birth_df[date_of_birth_col_name] = pd.to_datetime(
date_of_birth_df[date_of_birth_col_name],
format="%Y-%m-%d",
)
except ValueError as e:
raise ValueError(
f"Conversion of {input_date_of_birth_col_name} to datetime failed, doesn't match format %Y-%m-%d. Recommend converting to datetime before adding.",
f"Conversion of {date_of_birth_col_name} to datetime failed, doesn't match format %Y-%m-%d. Recommend converting to datetime before adding.",
) from e

output_age_col_name = f"{output_prefix}_age_in_years"

self._add_static_info(
static_spec=AnySpec(
values_df=date_of_birth_df,
input_col_name_override=input_date_of_birth_col_name,
prefix=output_prefix,
input_col_name_override=date_of_birth_col_name,
prefix="temp",
# We typically don't want to use date of birth as a predictor,
# but might want to use transformations - e.g. "year of birth" or "age at prediction time".
feature_name=input_date_of_birth_col_name,
feature_name=date_of_birth_col_name,
),
)

data_of_birth_col_name = f"{output_prefix}_{input_date_of_birth_col_name}"
tmp_date_of_birth_col_name = f"temp_{date_of_birth_col_name}"

self._df[output_age_col_name] = (
(
self._df[self.timestamp_col_name] - self._df[data_of_birth_col_name]
self._df[self.timestamp_col_name] - self._df[tmp_date_of_birth_col_name]
).dt.days
/ (365.25)
).round(2)

# Remove date of birth column
self._df.drop(columns=data_of_birth_col_name, inplace=True)
self._df.drop(columns=tmp_date_of_birth_col_name, inplace=True)

def compute(self):
"""Compute the flattened dataset."""
Expand Down
6 changes: 3 additions & 3 deletions tests/test_timeseriesflattener/test_add_values.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ def test_add_age():

dataset.add_age(
date_of_birth_df=str_to_df(static_predictor),
input_date_of_birth_col_name="date_of_birth",
date_of_birth_col_name="date_of_birth",
output_prefix=output_prefix,
)

Expand Down Expand Up @@ -294,7 +294,7 @@ def test_add_age_error():
with pytest.raises(ValueError):
dataset.add_age(
date_of_birth_df=str_to_df(static_predictor),
input_date_of_birth_col_name="date_of_birth",
date_of_birth_col_name="date_of_birth",
)


Expand Down Expand Up @@ -417,7 +417,7 @@ def test_add_multiple_static_predictors():
)

flattened_dataset.add_age(
input_date_of_birth_col_name="date_of_birth",
date_of_birth_col_name="date_of_birth",
date_of_birth_df=birthdates_df,
)

Expand Down

0 comments on commit 57804df

Please sign in to comment.