# Capture elapsed time

In [1]:
import datetime
import numpy as np
import pandas as pd

In [2]:
# let's create a toy dataframe with some date variables

rng_hr = pd.date_range("2019-03-05", periods=20, freq="h")
rng_month = pd.date_range("2019-03-05", periods=20, freq="ME")

df = pd.DataFrame({"date1": rng_hr, "date2": rng_month})

df.head()

Unnamed: 0,date1,date2
0,2019-03-05 00:00:00,2019-03-31
1,2019-03-05 01:00:00,2019-04-30
2,2019-03-05 02:00:00,2019-05-31
3,2019-03-05 03:00:00,2019-06-30
4,2019-03-05 04:00:00,2019-07-31


In [3]:
# let's capture the difference in days between the 2 variables

df["elapsed_days"] = (df["date2"] - df["date1"]).dt.days

df.head()

Unnamed: 0,date1,date2,elapsed_days
0,2019-03-05 00:00:00,2019-03-31,26
1,2019-03-05 01:00:00,2019-04-30,55
2,2019-03-05 02:00:00,2019-05-31,86
3,2019-03-05 03:00:00,2019-06-30,116
4,2019-03-05 04:00:00,2019-07-31,147


In [4]:
# let's capture the difference in weeks between the 2 variables

df["weeks_passed"] = (df["date2"] - df["date1"]) / np.timedelta64(1, "W")

df.head()

Unnamed: 0,date1,date2,elapsed_days,weeks_passed
0,2019-03-05 00:00:00,2019-03-31,26,3.714286
1,2019-03-05 01:00:00,2019-04-30,55,7.994048
2,2019-03-05 02:00:00,2019-05-31,86,12.416667
3,2019-03-05 03:00:00,2019-06-30,116,16.696429
4,2019-03-05 04:00:00,2019-07-31,147,21.119048


In [5]:
# calculate difference in seconds and minutes

df["diff_seconds"] = (df["date2"] - df["date1"]) / np.timedelta64(1, "s")
df["diff_minutes"] = (df["date2"] - df["date1"]) / np.timedelta64(1, "m")

df.head()

Unnamed: 0,date1,date2,elapsed_days,weeks_passed,diff_seconds,diff_minutes
0,2019-03-05 00:00:00,2019-03-31,26,3.714286,2246400.0,37440.0
1,2019-03-05 01:00:00,2019-04-30,55,7.994048,4834800.0,80580.0
2,2019-03-05 02:00:00,2019-05-31,86,12.416667,7509600.0,125160.0
3,2019-03-05 03:00:00,2019-06-30,116,16.696429,10098000.0,168300.0
4,2019-03-05 04:00:00,2019-07-31,147,21.119048,12772800.0,212880.0


In [6]:
# calculate difference to today

df["to_today"] = (datetime.datetime.today() - df["date1"]).dt.days

df.head()

Unnamed: 0,date1,date2,elapsed_days,weeks_passed,diff_seconds,diff_minutes,to_today
0,2019-03-05 00:00:00,2019-03-31,26,3.714286,2246400.0,37440.0,1889
1,2019-03-05 01:00:00,2019-04-30,55,7.994048,4834800.0,80580.0,1889
2,2019-03-05 02:00:00,2019-05-31,86,12.416667,7509600.0,125160.0,1889
3,2019-03-05 03:00:00,2019-06-30,116,16.696429,10098000.0,168300.0,1889
4,2019-03-05 04:00:00,2019-07-31,147,21.119048,12772800.0,212880.0,1889


In [7]:
import pandas as pd
from feature_engine.datetime import DatetimeSubtraction

In [8]:
rng_hr = pd.date_range("2019-03-05", periods=20, freq="h")
rng_month = pd.date_range("2019-03-05", periods=20, freq="ME")

df = pd.DataFrame({"date1": rng_hr, "date2": rng_month})

In [9]:
ds = DatetimeSubtraction(
    variables="date2",
    reference="date1",
    output_unit="D",
)

dft = ds.fit_transform(df)

dft.head()

Unnamed: 0,date1,date2,date2_sub_date1
0,2019-03-05 00:00:00,2019-03-31,26.0
1,2019-03-05 01:00:00,2019-04-30,55.958333
2,2019-03-05 02:00:00,2019-05-31,86.916667
3,2019-03-05 03:00:00,2019-06-30,116.875
4,2019-03-05 04:00:00,2019-07-31,147.833333
