In [None]:
import numpy as np
import pandas as pd

In [None]:
bus_groups_replaceinfo = pd.read_csv("../data/bus_groups_replaceinfo.csv")

bus_groups_replaceinfo["purchase_date"] = pd.to_datetime(
    bus_groups_replaceinfo["purchase_date"], format="%Y-%m"
)
bus_groups_replaceinfo["1st_rep_date"] = pd.to_datetime(
    bus_groups_replaceinfo["1st_rep_date"], format="%Y-%m"
)
bus_groups_replaceinfo["2and_rep_date"] = pd.to_datetime(
    bus_groups_replaceinfo["2and_rep_date"], format="%Y-%m"
)
bus_groups_replaceinfo["data_begins_date"] = pd.to_datetime(
    bus_groups_replaceinfo["data_begins_date"], format="%Y-%m"
)

bus_groups_replaceinfo

In [None]:
bus_groups_replaceinfo_summary = (
    bus_groups_replaceinfo[["group", "odometer_1st"]]
    .groupby("group")
    .agg(["max", "min", "mean", "std", "count"])
    .round(0)
)
bus_groups_replaceinfo_summary

In [None]:
def calculate_months_diff(date1: pd.Series, date2: pd.Series) -> pd.Series:
    result = np.where(
        date1.isna() | date2.isna(),
        np.nan,
        (date2.dt.year - date1.dt.year) * 12 + (date2.dt.month - date1.dt.month) + 1,
    )
    return pd.Series(result)

In [None]:
bus_groups_replaceinfo["1st_rep_period"] = calculate_months_diff(
    bus_groups_replaceinfo["purchase_date"], bus_groups_replaceinfo["1st_rep_date"]
)
bus_groups_replaceinfo["2and_rep_period"] = calculate_months_diff(
    bus_groups_replaceinfo["1st_rep_date"], bus_groups_replaceinfo["2and_rep_date"]
)

bus_groups_replace_period = (
    pd.concat(
        [
            bus_groups_replaceinfo[["group", "1st_rep_period"]].rename(
                columns={"1st_rep_period": "period"}
            ),
            bus_groups_replaceinfo[["group", "2and_rep_period"]].rename(
                columns={"2and_rep_period": "period"}
            ),
        ],
        axis=0,
    )
    .dropna()
    .reset_index(drop=True)
)


bus_groups_replace_period_summary = (
    bus_groups_replace_period.groupby("group")
    .agg(["max", "min", "mean", "std", "count"])
    .round(1)
)
bus_groups_replace_period_summary

In [None]:
pd.merge(
    bus_groups_replaceinfo_summary,
    bus_groups_replace_period_summary,
    left_index=True,
    right_index=True,
)