<a href="https://colab.research.google.com/github/Saifullah785/machine-learning-engineer-roadmap/blob/main/Lecture_34_working_with_dates_and_time/Lecture_34_working_with_dates_and_time.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **🎓 Lecture: Handling Date and Time Columns in Machine Learning**


---


✅ Goal: Understand how to extract features from datetime columns to boost ML model performance

In [135]:
# example during studying article

import seaborn as sns
import pandas as pd

# Load planets dataset

df = sns.load_dataset('planets')

# Add a datetime-like column

df['discovered_date'] = pd.date_range(start='1990-01-01', periods=len(df), freq='M')

# Convert to datetime

df['discovered_date'] = pd.to_datetime(df['discovered_date'])

# Extract datetime features

df['year'] = df['discovered_date'].dt.year
df['month'] = df['discovered_date'].dt.month
df['day'] = df['discovered_date'].dt.day
df['dayofweek'] = df['discovered_date'].dt.dayofweek
df['is_weekend'] = df['dayofweek'].isin([5, 6]).astype(int)

# View result

print(df[['discovered_date', 'year', 'month', 'day', 'dayofweek', 'is_weekend']].head())


  discovered_date  year  month  day  dayofweek  is_weekend
0      1990-01-31  1990      1   31          2           0
1      1990-02-28  1990      2   28          2           0
2      1990-03-31  1990      3   31          5           1
3      1990-04-30  1990      4   30          0           0
4      1990-05-31  1990      5   31          3           0


  df['discovered_date'] = pd.date_range(start='1990-01-01', periods=len(df), freq='M')


# **Working on Date and Time column During Lecture**

In [99]:
import numpy as np
import pandas as pd

In [100]:
date = pd.read_csv('/content/orders.csv')
time = pd.read_csv('/content/messages.csv')

In [101]:
date.head()

Unnamed: 0,date,product_id,city_id,orders
0,2019-12-10,5628,25,3
1,2018-08-15,3646,14,157
2,2018-10-23,1859,25,1
3,2019-08-17,7292,25,1
4,2019-01-06,4344,25,3


In [102]:
time.head()

Unnamed: 0,date,msg
0,2013-12-15 00:50:00,ищу на сегодня мужика 37
1,2014-04-29 23:40:00,ПАРЕНЬ БИ ИЩЕТ ДРУГА СЕЙЧАС!! СМС ММС 0955532826
2,2012-12-30 00:21:00,Днепр.м 43 позн.с д/ж *.о 067.16.34.576
3,2014-11-28 00:31:00,КИЕВ ИЩУ Д/Ж ДО 45 МНЕ СЕЙЧАС СКУЧНО 093 629 9...
4,2013-10-26 23:11:00,Зая я тебя никогда не обижу люблю тебя!) Даше


In [103]:
date.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   date        1000 non-null   object
 1   product_id  1000 non-null   int64 
 2   city_id     1000 non-null   int64 
 3   orders      1000 non-null   int64 
dtypes: int64(3), object(1)
memory usage: 31.4+ KB


In [104]:
time.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   date    1000 non-null   object
 1   msg     1000 non-null   object
dtypes: object(2)
memory usage: 15.8+ KB


# **Working with Dates**

In [105]:
# Converting to datetime datatype

date['date'] = pd.to_datetime(date['date'])

In [106]:
date.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype         
---  ------      --------------  -----         
 0   date        1000 non-null   datetime64[ns]
 1   product_id  1000 non-null   int64         
 2   city_id     1000 non-null   int64         
 3   orders      1000 non-null   int64         
dtypes: datetime64[ns](1), int64(3)
memory usage: 31.4 KB


# **1.Extract year**

In [107]:
date['date_year'] = date['date'].dt.year

date.sample(5)

Unnamed: 0,date,product_id,city_id,orders,date_year
618,2019-10-14,1140,26,2,2019
80,2018-12-05,1754,23,2,2018
194,2019-08-09,5,16,1,2019
211,2019-01-04,6588,28,7,2019
669,2018-10-31,1647,4,1,2018


In [108]:
date['date_month_no'] = date['date'].dt.month

date.head()

Unnamed: 0,date,product_id,city_id,orders,date_year,date_month_no
0,2019-12-10,5628,25,3,2019,12
1,2018-08-15,3646,14,157,2018,8
2,2018-10-23,1859,25,1,2018,10
3,2019-08-17,7292,25,1,2019,8
4,2019-01-06,4344,25,3,2019,1


In [109]:
date['date_month_name'] = date['date'].dt.month_name()

date.head()

Unnamed: 0,date,product_id,city_id,orders,date_year,date_month_no,date_month_name
0,2019-12-10,5628,25,3,2019,12,December
1,2018-08-15,3646,14,157,2018,8,August
2,2018-10-23,1859,25,1,2018,10,October
3,2019-08-17,7292,25,1,2019,8,August
4,2019-01-06,4344,25,3,2019,1,January


# **Extract Days**

In [110]:
date['date_day']= date['date'].dt.day
date.head()

Unnamed: 0,date,product_id,city_id,orders,date_year,date_month_no,date_month_name,date_day
0,2019-12-10,5628,25,3,2019,12,December,10
1,2018-08-15,3646,14,157,2018,8,August,15
2,2018-10-23,1859,25,1,2018,10,October,23
3,2019-08-17,7292,25,1,2019,8,August,17
4,2019-01-06,4344,25,3,2019,1,January,6


In [111]:
# day of week

date['date_dow'] = date['date'].dt.dayofweek

date.head()

Unnamed: 0,date,product_id,city_id,orders,date_year,date_month_no,date_month_name,date_day,date_dow
0,2019-12-10,5628,25,3,2019,12,December,10,1
1,2018-08-15,3646,14,157,2018,8,August,15,2
2,2018-10-23,1859,25,1,2018,10,October,23,1
3,2019-08-17,7292,25,1,2019,8,August,17,5
4,2019-01-06,4344,25,3,2019,1,January,6,6


In [112]:
# day of week name
date['date_dow_name'] = date['date'].dt.day_name()

date.drop(columns=['product_id','city_id','orders']).head()

Unnamed: 0,date,date_year,date_month_no,date_month_name,date_day,date_dow,date_dow_name
0,2019-12-10,2019,12,December,10,1,Tuesday
1,2018-08-15,2018,8,August,15,2,Wednesday
2,2018-10-23,2018,10,October,23,1,Tuesday
3,2019-08-17,2019,8,August,17,5,Saturday
4,2019-01-06,2019,1,January,6,6,Sunday


In [113]:
# is weekend?
date['date_is_weekend'] = np.where(date['date_dow_name'].isin(['Sunday','Saturday']),1,0)
date.drop(columns=['product_id','city_id','orders']).head()

Unnamed: 0,date,date_year,date_month_no,date_month_name,date_day,date_dow,date_dow_name,date_is_weekend
0,2019-12-10,2019,12,December,10,1,Tuesday,0
1,2018-08-15,2018,8,August,15,2,Wednesday,0
2,2018-10-23,2018,10,October,23,1,Tuesday,0
3,2019-08-17,2019,8,August,17,5,Saturday,1
4,2019-01-06,2019,1,January,6,6,Sunday,1


#**Extract week of the year**

In [114]:
date['date_week'] = date['date'].dt.isocalendar().week.astype(int)  # Extract week number as integer
date.drop(columns=['product_id','city_id','orders']).head()

Unnamed: 0,date,date_year,date_month_no,date_month_name,date_day,date_dow,date_dow_name,date_is_weekend,date_week
0,2019-12-10,2019,12,December,10,1,Tuesday,0,50
1,2018-08-15,2018,8,August,15,2,Wednesday,0,33
2,2018-10-23,2018,10,October,23,1,Tuesday,0,43
3,2019-08-17,2019,8,August,17,5,Saturday,1,33
4,2019-01-06,2019,1,January,6,6,Sunday,1,1


In [115]:
# Extract quarter

In [116]:
date['quarter']= date['date'].dt.quarter
date.drop(columns=['product_id','city_id','orders']).head()

Unnamed: 0,date,date_year,date_month_no,date_month_name,date_day,date_dow,date_dow_name,date_is_weekend,date_week,quarter
0,2019-12-10,2019,12,December,10,1,Tuesday,0,50,4
1,2018-08-15,2018,8,August,15,2,Wednesday,0,33,3
2,2018-10-23,2018,10,October,23,1,Tuesday,0,43,4
3,2019-08-17,2019,8,August,17,5,Saturday,1,33,3
4,2019-01-06,2019,1,January,6,6,Sunday,1,1,1


In [117]:
# Extract Semester
date['semester'] = np.where(date['quarter'].isin([1,2]),1,2)
date.drop(columns=['product_id','city_id','orders']).head()

Unnamed: 0,date,date_year,date_month_no,date_month_name,date_day,date_dow,date_dow_name,date_is_weekend,date_week,quarter,semester
0,2019-12-10,2019,12,December,10,1,Tuesday,0,50,4,2
1,2018-08-15,2018,8,August,15,2,Wednesday,0,33,3,2
2,2018-10-23,2018,10,October,23,1,Tuesday,0,43,4,2
3,2019-08-17,2019,8,August,17,5,Saturday,1,33,3,2
4,2019-01-06,2019,1,January,6,6,Sunday,1,1,1,1


In [118]:
# Extract Time elapsed betwween dates
import datetime

today = datetime.datetime.today()

today

datetime.datetime(2025, 5, 7, 12, 54, 41, 238945)

In [119]:
today - date['date']

Unnamed: 0,date
0,1975 days 12:54:41.238945
1,2457 days 12:54:41.238945
2,2388 days 12:54:41.238945
3,2090 days 12:54:41.238945
4,2313 days 12:54:41.238945
...,...
995,2403 days 12:54:41.238945
996,2344 days 12:54:41.238945
997,2192 days 12:54:41.238945
998,2257 days 12:54:41.238945


In [120]:
(today - date['date']).dt.days

Unnamed: 0,date
0,1975
1,2457
2,2388
3,2090
4,2313
...,...
995,2403
996,2344
997,2192
998,2257


In [121]:
# Months passed

# np.round((today - date['date']) / np.timedelta64(1, 'M'),0)

In [122]:
# today = pd.to_datetime("today")

In [123]:
# # Months passed

# np.round((today - date['date']) / np.timedelta64(1, 'M'),0)

# **Working on Time base column Dataframe**

In [124]:
time

Unnamed: 0,date,msg
0,2013-12-15 00:50:00,ищу на сегодня мужика 37
1,2014-04-29 23:40:00,ПАРЕНЬ БИ ИЩЕТ ДРУГА СЕЙЧАС!! СМС ММС 0955532826
2,2012-12-30 00:21:00,Днепр.м 43 позн.с д/ж *.о 067.16.34.576
3,2014-11-28 00:31:00,КИЕВ ИЩУ Д/Ж ДО 45 МНЕ СЕЙЧАС СКУЧНО 093 629 9...
4,2013-10-26 23:11:00,Зая я тебя никогда не обижу люблю тебя!) Даше
...,...,...
995,2012-03-16 00:50:00,ПАРЕНЬ СДЕЛАЕТ МАССАЖ ЖЕНЩИНАМ -066-877-32-44
996,2014-01-23 23:14:00,сельский п 23 ищу девушку для отношений
997,2012-10-15 23:37:00,Д+Д ДЛЯ серьезных отношений. Мой номер 093-156...
998,2012-06-21 23:34:00,7 ДНЕПР М.34 ПОЗ.С Д/Ж ДЛЯ ВСТРЕЧ.Т.098 809 15 14


In [125]:
time.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   date    1000 non-null   object
 1   msg     1000 non-null   object
dtypes: object(2)
memory usage: 15.8+ KB


In [126]:
#Converting to datetime datatype
time['date'] = pd.to_datetime(time['date'])

In [128]:
time['hour'] = time['date'].dt.hour
time['min'] = time['date'].dt.minute
time['sec'] = time['date'].dt.second

time.head()

Unnamed: 0,date,msg,hour,min,sec
0,2013-12-15 00:50:00,ищу на сегодня мужика 37,0,50,0
1,2014-04-29 23:40:00,ПАРЕНЬ БИ ИЩЕТ ДРУГА СЕЙЧАС!! СМС ММС 0955532826,23,40,0
2,2012-12-30 00:21:00,Днепр.м 43 позн.с д/ж *.о 067.16.34.576,0,21,0
3,2014-11-28 00:31:00,КИЕВ ИЩУ Д/Ж ДО 45 МНЕ СЕЙЧАС СКУЧНО 093 629 9...,0,31,0
4,2013-10-26 23:11:00,Зая я тебя никогда не обижу люблю тебя!) Даше,23,11,0


In [129]:
# Extract Time part
time['time'] = time['date'].dt.time

time.head()

Unnamed: 0,date,msg,hour,min,sec,time
0,2013-12-15 00:50:00,ищу на сегодня мужика 37,0,50,0,00:50:00
1,2014-04-29 23:40:00,ПАРЕНЬ БИ ИЩЕТ ДРУГА СЕЙЧАС!! СМС ММС 0955532826,23,40,0,23:40:00
2,2012-12-30 00:21:00,Днепр.м 43 позн.с д/ж *.о 067.16.34.576,0,21,0,00:21:00
3,2014-11-28 00:31:00,КИЕВ ИЩУ Д/Ж ДО 45 МНЕ СЕЙЧАС СКУЧНО 093 629 9...,0,31,0,00:31:00
4,2013-10-26 23:11:00,Зая я тебя никогда не обижу люблю тебя!) Даше,23,11,0,23:11:00


In [130]:
#Time difference
today - time['date']

Unnamed: 0,date
0,4161 days 12:04:41.238945
1,4025 days 13:14:41.238945
2,4511 days 12:33:41.238945
3,3813 days 12:23:41.238945
4,4210 days 13:43:41.238945
...,...
995,4800 days 12:04:41.238945
996,4121 days 13:40:41.238945
997,4586 days 13:17:41.238945
998,4702 days 13:20:41.238945


In [131]:
# in seconds
(today - time['date'])/np.timedelta64(1,'s')

Unnamed: 0,date
0,3.595539e+08
1,3.478077e+08
2,3.897956e+08
3,3.294878e+08
4,3.637934e+08
...,...
995,4.147635e+08
996,3.561036e+08
997,3.962783e+08
998,4.063008e+08


In [132]:
# in minutes
(today - time['date'])/np.timedelta64(1,'m')

Unnamed: 0,date
0,5.992565e+06
1,5.796795e+06
2,6.496594e+06
3,5.491464e+06
4,6.063224e+06
...,...
995,6.912725e+06
996,5.935061e+06
997,6.604638e+06
998,6.771681e+06


In [133]:
# in hours
(today - time['date'])/np.timedelta64(1,'h')

Unnamed: 0,date
0,99876.078122
1,96613.244789
2,108276.561455
3,91524.394789
4,101053.728122
...,...
995,115212.078122
996,98917.678122
997,110077.294789
998,112861.344789
