# Extracting features from dates with pandas

In [1]:
import numpy as np
import pandas as pd

In [2]:
# let's create a toy dataframe with some date variables

# first we create a series with the ranges
rng_ = pd.date_range("2019-03-05", periods=20, freq="D")

# now we convert the series in a dataframe
data = pd.DataFrame({"date": rng_})

# output the first 5 rows
data.head()

Unnamed: 0,date
0,2019-03-05
1,2019-03-06
2,2019-03-07
3,2019-03-08
4,2019-03-09


In [3]:
# Extract year.

data["year"] = data["date"].dt.year

data[["date", "year"]].head()

Unnamed: 0,date,year
0,2019-03-05,2019
1,2019-03-06,2019
2,2019-03-07,2019
3,2019-03-08,2019
4,2019-03-09,2019


In [4]:
# Extract quarter from date variable - takes values 1 to 4.

data["quarter"] = data["date"].dt.quarter

data[["date", "quarter"]].head()

Unnamed: 0,date,quarter
0,2019-03-05,1
1,2019-03-06,1
2,2019-03-07,1
3,2019-03-08,1
4,2019-03-09,1


In [5]:
# Extract semester.

data["semester"] = np.where(data["quarter"] < 3, 1, 2)

data[["semester", "quarter"]].head()

Unnamed: 0,semester,quarter
0,1,1
1,1,1
2,1,1
3,1,1
4,1,1


In [6]:
# Extract month - 1 to 12.

data["month"] = data["date"].dt.month

data[["date", "month"]].head()

Unnamed: 0,date,month
0,2019-03-05,3
1,2019-03-06,3
2,2019-03-07,3
3,2019-03-08,3
4,2019-03-09,3


In [7]:
# Extract week of the year - varies from 1 to 52.

data["week"] = data["date"].dt.isocalendar().week

data[["date", "week"]].head()

Unnamed: 0,date,week
0,2019-03-05,10
1,2019-03-06,10
2,2019-03-07,10
3,2019-03-08,10
4,2019-03-09,10


In [8]:
# Day of the month - numeric from 1-31.

data["day_mo"] = data["date"].dt.day

data[["date", "day_mo"]].head()

Unnamed: 0,date,day_mo
0,2019-03-05,5
1,2019-03-06,6
2,2019-03-07,7
3,2019-03-08,8
4,2019-03-09,9


In [9]:
# Day of the week - from 0 to 6.

# It is assumed the week starts on Monday,
# denoted by 0, and ends on Sunday, denoted by 6.

data["day_week"] = data["date"].dt.dayofweek

data[["date", "day_mo", "day_week"]].head()

Unnamed: 0,date,day_mo,day_week
0,2019-03-05,5,1
1,2019-03-06,6,2
2,2019-03-07,7,3
3,2019-03-08,8,4
4,2019-03-09,9,5


In [10]:
# Was the invoice issued on a weekend?

data["is_weekend"] = (data["date"].dt.dayofweek > 4).astype(int)

data[["date", "day_week", "is_weekend"]].head()

Unnamed: 0,date,day_week,is_weekend
0,2019-03-05,1,0
1,2019-03-06,2,0
2,2019-03-07,3,0
3,2019-03-08,4,0
4,2019-03-09,5,1
