Skip to content

Commit

Permalink
docs: add basic tutorial
Browse files Browse the repository at this point in the history
  • Loading branch information
MartinBernstorff committed Dec 2, 2022
1 parent c569b74 commit 8136a1b
Show file tree
Hide file tree
Showing 10 changed files with 231,311 additions and 40,023 deletions.
15 changes: 15 additions & 0 deletions src/timeseriesflattener/testing/load_synth_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,21 @@ def synth_predictor_float(
return load_raw_test_csv("synth_raw_float_1.csv", n_rows=n_rows)


@data_loaders.register("synth_sex")
def load_synth_sex(
n_rows: Optional[int] = None,
) -> pd.DataFrame:
"""Load synth sex data.".
Args:
n_rows: Number of rows to return. Defaults to None which returns entire coercion data view.
Returns:
pd.DataFrame
"""
return load_raw_test_csv("synth_sex.csv", n_rows=n_rows)


@data_loaders.register("synth_predictor_binary")
def synth_predictor_binary(
n_rows: Optional[int] = None,
Expand Down
26 changes: 15 additions & 11 deletions tests/test_data/raw/create_synth_raw_binary.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,19 +10,23 @@
# Get project root directory
project_root = Path(__file__).resolve().parents[3]

column_specs = {
"dw_ek_borger": {
"column_type": "uniform_int",
"min": 0,
"max": 10_000,
column_specs = [
{
"dw_ek_borger": {
"column_type": "uniform_int",
"min": 0,
"max": 10_000,
}
},
"timestamp": {
"column_type": "datetime_uniform",
"min": -5 * 365,
"max": 0 * 365,
{
"timestamp": {
"column_type": "datetime_uniform",
"min": -5 * 365,
"max": 0 * 365,
}
},
"value": {"column_type": "uniform_int", "min": 0, "max": 1},
}
{"value": {"column_type": "uniform_int", "min": 0, "max": 2}},
]

for i in (1, 2):
df = generate_data_columns(
Expand Down
28 changes: 16 additions & 12 deletions tests/test_data/raw/create_synth_raw_float.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,24 +9,28 @@
if __name__ == "__main__":
# Get project root directory

column_specs = {
"dw_ek_borger": {
"column_type": "uniform_int",
"min": 0,
"max": 10_000,
column_specs = [
{
"dw_ek_borger": {
"column_type": "uniform_int",
"min": 0,
"max": 10_000,
}
},
"timestamp": {
"column_type": "datetime_uniform",
"min": -5 * 365,
"max": 0 * 365,
{
"timestamp": {
"column_type": "datetime_uniform",
"min": -5 * 365,
"max": 0 * 365,
}
},
"value": {"column_type": "uniform_float", "min": 0, "max": 10},
}
{"value": {"column_type": "uniform_float", "min": 0, "max": 10}},
]

for i in (1, 2):
df = generate_data_columns(
predictors=column_specs,
n_samples=10_000,
n_samples=100_000,
)

df.to_csv(
Expand Down
34 changes: 34 additions & 0 deletions tests/test_data/raw/create_synth_sex.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
"""Generate raw binary dataframe."""

from pathlib import Path

from psycopmlutils.synth_data_generator.synth_col_generators import (
generate_data_columns,
)

if __name__ == "__main__":
# Get project root directory
project_root = Path(__file__).resolve().parents[3]

column_specs = [
{
"dw_ek_borger": {
"column_type": "uniform_int",
"min": 0,
"max": 10_000,
}
},
{"female": {"column_type": "uniform_int", "min": 0, "max": 2}},
]

df = generate_data_columns(
predictors=column_specs,
n_samples=100_000,
)

df = df.groupby("dw_ek_borger").last().reset_index()

df.to_csv(
project_root / "tests" / "test_data" / "raw" / "synth_sex.csv",
index=False,
)
Loading

0 comments on commit 8136a1b

Please sign in to comment.