# Capture elapsed time

In [1]:
import datetime
import numpy as np
import pandas as pd

In [2]:
# let's create a toy dataframe with some date variables

date = "2024-05-17"

rng_hr = pd.date_range(date, periods=20, freq="h")
rng_month = pd.date_range(date, periods=20, freq="ME")

df = pd.DataFrame(
    {"date1": rng_hr, "date2": rng_month})  

df.head()

Unnamed: 0,date1,date2
0,2024-05-17 00:00:00,2024-05-31
1,2024-05-17 01:00:00,2024-06-30
2,2024-05-17 02:00:00,2024-07-31
3,2024-05-17 03:00:00,2024-08-31
4,2024-05-17 04:00:00,2024-09-30


In [3]:
# let's capture the difference in days between the 2 variables

df["elapsed_days"] = (df["date2"] - df["date1"]).dt.days

df.head()

Unnamed: 0,date1,date2,elapsed_days
0,2024-05-17 00:00:00,2024-05-31,14
1,2024-05-17 01:00:00,2024-06-30,43
2,2024-05-17 02:00:00,2024-07-31,74
3,2024-05-17 03:00:00,2024-08-31,105
4,2024-05-17 04:00:00,2024-09-30,135


In [4]:
# let's capture the difference in weeks between the 2 variables

df["weeks_passed"] = (df["date2"] - df["date1"]) / np.timedelta64(1, "W")

df.head()

Unnamed: 0,date1,date2,elapsed_days,weeks_passed
0,2024-05-17 00:00:00,2024-05-31,14,2.0
1,2024-05-17 01:00:00,2024-06-30,43,6.279762
2,2024-05-17 02:00:00,2024-07-31,74,10.702381
3,2024-05-17 03:00:00,2024-08-31,105,15.125
4,2024-05-17 04:00:00,2024-09-30,135,19.404762


In [5]:
# calculate difference in seconds and minutes

df["diff_seconds"] = (df["date2"] - df["date1"]) / np.timedelta64(1, "s")
df["diff_minutes"] = (df["date2"] - df["date1"]) / np.timedelta64(1, "m")

df.head()

Unnamed: 0,date1,date2,elapsed_days,weeks_passed,diff_seconds,diff_minutes
0,2024-05-17 00:00:00,2024-05-31,14,2.0,1209600.0,20160.0
1,2024-05-17 01:00:00,2024-06-30,43,6.279762,3798000.0,63300.0
2,2024-05-17 02:00:00,2024-07-31,74,10.702381,6472800.0,107880.0
3,2024-05-17 03:00:00,2024-08-31,105,15.125,9147600.0,152460.0
4,2024-05-17 04:00:00,2024-09-30,135,19.404762,11736000.0,195600.0


In [6]:
# calculate difference to today

df["to_today"] = (datetime.datetime.today() - df["date1"]).dt.days

df.head()

Unnamed: 0,date1,date2,elapsed_days,weeks_passed,diff_seconds,diff_minutes,to_today
0,2024-05-17 00:00:00,2024-05-31,14,2.0,1209600.0,20160.0,0
1,2024-05-17 01:00:00,2024-06-30,43,6.279762,3798000.0,63300.0,0
2,2024-05-17 02:00:00,2024-07-31,74,10.702381,6472800.0,107880.0,0
3,2024-05-17 03:00:00,2024-08-31,105,15.125,9147600.0,152460.0,0
4,2024-05-17 04:00:00,2024-09-30,135,19.404762,11736000.0,195600.0,0


In [7]:
import pandas as pd
from feature_engine.datetime import DatetimeSubtraction

In [8]:
date = "2024-05-17"

rng_hr = pd.date_range(date, periods=20, freq="h")
rng_month = pd.date_range(date, periods=20, freq="ME")

df = pd.DataFrame(
    {"date1": rng_hr, "date2": rng_month})  

df.head()

Unnamed: 0,date1,date2
0,2024-05-17 00:00:00,2024-05-31
1,2024-05-17 01:00:00,2024-06-30
2,2024-05-17 02:00:00,2024-07-31
3,2024-05-17 03:00:00,2024-08-31
4,2024-05-17 04:00:00,2024-09-30


In [9]:
ds = DatetimeSubtraction(
    variables="date2",
    reference="date1",
    output_unit="D",
)

dft = ds.fit_transform(df)

dft.head()

Unnamed: 0,date1,date2,date2_sub_date1
0,2024-05-17 00:00:00,2024-05-31,14.0
1,2024-05-17 01:00:00,2024-06-30,43.958333
2,2024-05-17 02:00:00,2024-07-31,74.916667
3,2024-05-17 03:00:00,2024-08-31,105.875
4,2024-05-17 04:00:00,2024-09-30,135.833333
