# Extracting features from dates with pandas

In [1]:
import numpy as np
import pandas as pd

In [2]:
# let's create a toy dataframe with some date variables

# first we create a series with the ranges
rng_ = pd.date_range("2024-05-17", periods=20, freq="D")

# now we convert the series in a dataframe
data = pd.DataFrame({"date": rng_})

# output the first 5 rows
data.head()

Unnamed: 0,date
0,2024-05-17
1,2024-05-18
2,2024-05-19
3,2024-05-20
4,2024-05-21


In [3]:
# Extract year.

data["year"] = data["date"].dt.year

data[["date", "year"]].head()

Unnamed: 0,date,year
0,2024-05-17,2024
1,2024-05-18,2024
2,2024-05-19,2024
3,2024-05-20,2024
4,2024-05-21,2024


In [4]:
# Extract quarter from date variable - takes values 1 to 4.

data["quarter"] = data["date"].dt.quarter

data[["date", "quarter"]].head()

Unnamed: 0,date,quarter
0,2024-05-17,2
1,2024-05-18,2
2,2024-05-19,2
3,2024-05-20,2
4,2024-05-21,2


In [5]:
# Extract semester.

data["semester"] = np.where(data["quarter"] < 3, 1, 2)

data[["semester", "quarter"]].head()

Unnamed: 0,semester,quarter
0,1,2
1,1,2
2,1,2
3,1,2
4,1,2


In [6]:
# Extract month - 1 to 12.

data["month"] = data["date"].dt.month

data[["date", "month"]].head()

Unnamed: 0,date,month
0,2024-05-17,5
1,2024-05-18,5
2,2024-05-19,5
3,2024-05-20,5
4,2024-05-21,5


In [7]:
# Extract week of the year - varies from 1 to 52.

data["week"] = data["date"].dt.isocalendar().week

data[["date", "week"]].head()

Unnamed: 0,date,week
0,2024-05-17,20
1,2024-05-18,20
2,2024-05-19,20
3,2024-05-20,21
4,2024-05-21,21


In [8]:
# Day of the month - numeric from 1-31.

data["day_mo"] = data["date"].dt.day

data[["date", "day_mo"]].head()

Unnamed: 0,date,day_mo
0,2024-05-17,17
1,2024-05-18,18
2,2024-05-19,19
3,2024-05-20,20
4,2024-05-21,21


In [9]:
# Day of the week - from 0 to 6.

# It is assumed the week starts on Monday,
# denoted by 0, and ends on Sunday, denoted by 6.

data["day_week"] = data["date"].dt.dayofweek

data[["date", "day_mo", "day_week"]].head()

Unnamed: 0,date,day_mo,day_week
0,2024-05-17,17,4
1,2024-05-18,18,5
2,2024-05-19,19,6
3,2024-05-20,20,0
4,2024-05-21,21,1


In [10]:
# Was the invoice issued on a weekend?

data["is_weekend"] = (data["date"].dt.dayofweek > 4).astype(int)

data[["date", "day_week", "is_weekend"]].head()

Unnamed: 0,date,day_week,is_weekend
0,2024-05-17,4,0
1,2024-05-18,5,1
2,2024-05-19,6,1
3,2024-05-20,0,0
4,2024-05-21,1,0
