# Date and time features with Feature-engine

In [1]:
import pandas as pd
from feature_engine.datetime import DatetimeFeatures

In [2]:
# let's create a toy dataframe with some date variables

# first we create a series with the ranges
rng_ = pd.date_range("2024-05-17", periods=20, freq="D")

# now we convert the series in a dataframe
data = pd.DataFrame({"date": rng_})

# output the first 5 rows
data.head()

Unnamed: 0,date
0,2024-05-17
1,2024-05-18
2,2024-05-19
3,2024-05-20
4,2024-05-21


In [3]:
# Extract all possible features

dtfs = DatetimeFeatures(
    variables=None,  # it identifies the datetime variable automatically.
    features_to_extract="all",
)

In [4]:
# Extract features.
dft = dtfs.fit_transform(data)

# Capture the names of the features we just created.
# (Feature-engine tags them with the original variable name
# plus the feature it extracted).
vars_ = [v for v in dft.columns if "date" in v]

# Show
dft[vars_].head()

Unnamed: 0,date_month,date_quarter,date_semester,date_year,date_week,date_day_of_week,date_day_of_month,date_day_of_year,date_weekend,date_month_start,date_month_end,date_quarter_start,date_quarter_end,date_year_start,date_year_end,date_leap_year,date_days_in_month,date_hour,date_minute,date_second
0,5,2,1,2024,20,4,17,138,0,0,0,0,0,0,0,1,31,0,0,0
1,5,2,1,2024,20,5,18,139,1,0,0,0,0,0,0,1,31,0,0,0
2,5,2,1,2024,20,6,19,140,1,0,0,0,0,0,0,1,31,0,0,0
3,5,2,1,2024,21,0,20,141,0,0,0,0,0,0,0,1,31,0,0,0
4,5,2,1,2024,21,1,21,142,0,0,0,0,0,0,0,1,31,0,0,0


In [5]:
vars_

['date_month',
 'date_quarter',
 'date_semester',
 'date_year',
 'date_week',
 'date_day_of_week',
 'date_day_of_month',
 'date_day_of_year',
 'date_weekend',
 'date_month_start',
 'date_month_end',
 'date_quarter_start',
 'date_quarter_end',
 'date_year_start',
 'date_year_end',
 'date_leap_year',
 'date_days_in_month',
 'date_hour',
 'date_minute',
 'date_second']

In [6]:
# The datetime variable, which was automatically
# identified, is stored in an attribute.

dtfs.variables_

['date']

In [7]:
# Extract most common features

dtfs = DatetimeFeatures(
    variables=None,  # it identifies the datetime variable automatically
    features_to_extract=None,
)

In [8]:
# Extract features
dft = dtfs.fit_transform(data)

# Capture the names of the features we just created.
# (Feature-engine tags them with the original variable name
# plus the feature it extracted).
vars_ = [v for v in dft.columns if "date" in v]

# Show
dft[vars_].head()

Unnamed: 0,date_month,date_year,date_day_of_week,date_day_of_month,date_hour,date_minute,date_second
0,5,2024,4,17,0,0,0
1,5,2024,5,18,0,0,0
2,5,2024,6,19,0,0,0
3,5,2024,0,20,0,0,0
4,5,2024,1,21,0,0,0


In [9]:
# Extract user defined features

dtfs = DatetimeFeatures(
    variables=None,  # it identifies the datetime variable automatically
    features_to_extract=["week", "year", "day_of_month", "day_of_week"],
)

In [10]:
# Extract features
dft = dtfs.fit_transform(data)

# Capture the names of the features we just created.
# (Feature-engine tags them with the original variable name
# plus the feature it extracted).
vars_ = [v for v in dft.columns if "date" in v]

# Show
dft[vars_].head()

Unnamed: 0,date_week,date_year,date_day_of_month,date_day_of_week
0,20,2024,17,4
1,20,2024,18,5
2,20,2024,19,6
3,21,2024,20,0
4,21,2024,21,1


In [11]:
# First, let's create a toy dataframe with some
# timestamps in different time zones.

df = pd.DataFrame()

df["time"] = pd.concat(
    [
        pd.Series(
            pd.date_range(
                start="2024-08-01 09:00", freq="h", periods=3, tz="Europe/Berlin"
            )
        ),
        pd.Series(
            pd.date_range(
                start="2024-08-01 09:00", freq="h", periods=3, tz="US/Central"
            )
        ),
    ],
    axis=0,
)

df

Unnamed: 0,time
0,2024-08-01 09:00:00+02:00
1,2024-08-01 10:00:00+02:00
2,2024-08-01 11:00:00+02:00
0,2024-08-01 09:00:00-05:00
1,2024-08-01 10:00:00-05:00
2,2024-08-01 11:00:00-05:00


We can see the different timezones indicated by the +2 and -5, with respect to the central meridian.

In [12]:
dfts = DatetimeFeatures(
    features_to_extract=["day_of_week", "hour", "minute"],
    drop_original=False,
    utc=True,  # to handle timezones
)

# DatetimeFeatures will take all timestamps to utc
# before deriving the features.

In [13]:
dft = dfts.fit_transform(df)

dft.head()

Unnamed: 0,time,time_day_of_week,time_hour,time_minute
0,2024-08-01 09:00:00+02:00,3,7,0
1,2024-08-01 10:00:00+02:00,3,8,0
2,2024-08-01 11:00:00+02:00,3,9,0
0,2024-08-01 09:00:00-05:00,3,14,0
1,2024-08-01 10:00:00-05:00,3,15,0
