In [1]:
import numpy as np
import pandas as pd

In [2]:
date = pd.read_csv('orders.csv')
time = pd.read_csv('messages.csv')

In [3]:
date.head()

Unnamed: 0,date,product_id,city_id,orders
0,2019-12-10,5628,25,3
1,2018-08-15,3646,14,157
2,2018-10-23,1859,25,1
3,2019-08-17,7292,25,1
4,2019-01-06,4344,25,3


In [4]:
time.head()

Unnamed: 0,date,msg
0,2013-12-15 00:50:00,ищу на сегодня мужика 37
1,2014-04-29 23:40:00,ПАРЕНЬ БИ ИЩЕТ ДРУГА СЕЙЧАС!! СМС ММС 0955532826
2,2012-12-30 00:21:00,Днепр.м 43 позн.с д/ж *.о 067.16.34.576
3,2014-11-28 00:31:00,КИЕВ ИЩУ Д/Ж ДО 45 МНЕ СЕЙЧАС СКУЧНО 093 629 9...
4,2013-10-26 23:11:00,Зая я тебя никогда не обижу люблю тебя!) Даше


In [5]:
date.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   date        1000 non-null   object
 1   product_id  1000 non-null   int64 
 2   city_id     1000 non-null   int64 
 3   orders      1000 non-null   int64 
dtypes: int64(3), object(1)
memory usage: 31.4+ KB


In [6]:
time.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   date    1000 non-null   object
 1   msg     1000 non-null   object
dtypes: object(2)
memory usage: 15.8+ KB


# Working with Dates

In [7]:
# Converting to datetime datatype
date['date'] = pd.to_datetime(date['date'])

In [8]:
date.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype         
---  ------      --------------  -----         
 0   date        1000 non-null   datetime64[ns]
 1   product_id  1000 non-null   int64         
 2   city_id     1000 non-null   int64         
 3   orders      1000 non-null   int64         
dtypes: datetime64[ns](1), int64(3)
memory usage: 31.4 KB


#### 1. Extract year

In [9]:
date['date_year'] = date['date'].dt.year

date.sample(5)

Unnamed: 0,date,product_id,city_id,orders,date_year
334,2019-10-27,1130,14,48,2019
519,2019-11-01,6377,14,4,2019
835,2019-04-27,2736,25,5,2019
569,2019-02-18,5795,14,4,2019
516,2018-12-09,3953,13,1,2018


#### 2. Extract Month

In [10]:
date['date_month_no'] = date['date'].dt.month

date.head()

Unnamed: 0,date,product_id,city_id,orders,date_year,date_month_no
0,2019-12-10,5628,25,3,2019,12
1,2018-08-15,3646,14,157,2018,8
2,2018-10-23,1859,25,1,2018,10
3,2019-08-17,7292,25,1,2019,8
4,2019-01-06,4344,25,3,2019,1


In [56]:
date['date_month_name'] = date['date'].dt.month_name()

date.head()

Unnamed: 0,date,product_id,city_id,orders,date_year,date_month_no,date_month_name
0,2019-12-10,5628,25,3,2019,12,December
1,2018-08-15,3646,14,157,2018,8,August
2,2018-10-23,1859,25,1,2018,10,October
3,2019-08-17,7292,25,1,2019,8,August
4,2019-01-06,4344,25,3,2019,1,January


#### Extract Days

In [11]:
date['date_day'] = date['date'].dt.day

date.head()

Unnamed: 0,date,product_id,city_id,orders,date_year,date_month_no,date_day
0,2019-12-10,5628,25,3,2019,12,10
1,2018-08-15,3646,14,157,2018,8,15
2,2018-10-23,1859,25,1,2018,10,23
3,2019-08-17,7292,25,1,2019,8,17
4,2019-01-06,4344,25,3,2019,1,6


In [12]:
# day of week
date['date_dow'] = date['date'].dt.dayofweek

date.head()


Unnamed: 0,date,product_id,city_id,orders,date_year,date_month_no,date_day,date_dow
0,2019-12-10,5628,25,3,2019,12,10,1
1,2018-08-15,3646,14,157,2018,8,15,2
2,2018-10-23,1859,25,1,2018,10,23,1
3,2019-08-17,7292,25,1,2019,8,17,5
4,2019-01-06,4344,25,3,2019,1,6,6


In [13]:
# day of week - name

date['date_dow_name'] = date['date'].dt.day_name()

date.drop(columns=['product_id','city_id','orders']).head()

Unnamed: 0,date,date_year,date_month_no,date_day,date_dow,date_dow_name
0,2019-12-10,2019,12,10,1,Tuesday
1,2018-08-15,2018,8,15,2,Wednesday
2,2018-10-23,2018,10,23,1,Tuesday
3,2019-08-17,2019,8,17,5,Saturday
4,2019-01-06,2019,1,6,6,Sunday


In [14]:
# is weekend?

date['date_is_weekend'] = np.where(date['date_dow_name'].isin(['Sunday', 'Saturday']), 1,0)

date.drop(columns=['product_id','city_id','orders']).head()

Unnamed: 0,date,date_year,date_month_no,date_day,date_dow,date_dow_name,date_is_weekend
0,2019-12-10,2019,12,10,1,Tuesday,0
1,2018-08-15,2018,8,15,2,Wednesday,0
2,2018-10-23,2018,10,23,1,Tuesday,0
3,2019-08-17,2019,8,17,5,Saturday,1
4,2019-01-06,2019,1,6,6,Sunday,1


#### Extract week of the year

In [16]:
date['date_week'] = date['date'].dt.week()

date.drop(columns=['product_id','city_id','orders']).head()

AttributeError: 'DatetimeProperties' object has no attribute 'week'

#### Extract Quarter

In [17]:
date['quarter'] = date['date'].dt.quarter

date.drop(columns=['product_id','city_id','orders']).head()

Unnamed: 0,date,date_year,date_month_no,date_day,date_dow,date_dow_name,date_is_weekend,quarter
0,2019-12-10,2019,12,10,1,Tuesday,0,4
1,2018-08-15,2018,8,15,2,Wednesday,0,3
2,2018-10-23,2018,10,23,1,Tuesday,0,4
3,2019-08-17,2019,8,17,5,Saturday,1,3
4,2019-01-06,2019,1,6,6,Sunday,1,1


#### Extract Semester

In [20]:
date['semester'] = np.where(date['quarter'].isin([1,2]), 1, 2)

date.drop(columns=['product_id','city_id','orders']).head()

Unnamed: 0,date,date_year,date_month_no,date_day,date_dow,date_dow_name,date_is_weekend,quarter,semester
0,2019-12-10,2019,12,10,1,Tuesday,0,4,2
1,2018-08-15,2018,8,15,2,Wednesday,0,3,2
2,2018-10-23,2018,10,23,1,Tuesday,0,4,2
3,2019-08-17,2019,8,17,5,Saturday,1,3,2
4,2019-01-06,2019,1,6,6,Sunday,1,1,1


#### Extract Time elapsed between dates

In [21]:
import datetime

today = datetime.datetime.today()

today

datetime.datetime(2025, 11, 13, 12, 14, 24, 512841)

In [22]:
today - date['date']

0     2165 days 12:14:24.512841
1     2647 days 12:14:24.512841
2     2578 days 12:14:24.512841
3     2280 days 12:14:24.512841
4     2503 days 12:14:24.512841
                 ...           
995   2593 days 12:14:24.512841
996   2534 days 12:14:24.512841
997   2382 days 12:14:24.512841
998   2447 days 12:14:24.512841
999   2221 days 12:14:24.512841
Name: date, Length: 1000, dtype: timedelta64[ns]

In [23]:
(today - date['date']).dt.days

0      2165
1      2647
2      2578
3      2280
4      2503
       ... 
995    2593
996    2534
997    2382
998    2447
999    2221
Name: date, Length: 1000, dtype: int64

In [26]:
# Months passed

np.round((today -date['date']) / np.timedelta64(1, 'W'),0)

0      309.0
1      378.0
2      368.0
3      326.0
4      358.0
       ...  
995    371.0
996    362.0
997    340.0
998    350.0
999    317.0
Name: date, Length: 1000, dtype: float64

In [27]:
time.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   date    1000 non-null   object
 1   msg     1000 non-null   object
dtypes: object(2)
memory usage: 15.8+ KB


In [28]:
# Converting to datetime datatype
time['date'] = pd.to_datetime(time['date'])

In [29]:
time.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   date    1000 non-null   datetime64[ns]
 1   msg     1000 non-null   object        
dtypes: datetime64[ns](1), object(1)
memory usage: 15.8+ KB


In [30]:
time['hour'] = time['date'].dt.hour
time['min'] = time['date'].dt.minute
time['sec'] = time['date'].dt.second

time.head()

Unnamed: 0,date,msg,hour,min,sec
0,2013-12-15 00:50:00,ищу на сегодня мужика 37,0,50,0
1,2014-04-29 23:40:00,ПАРЕНЬ БИ ИЩЕТ ДРУГА СЕЙЧАС!! СМС ММС 0955532826,23,40,0
2,2012-12-30 00:21:00,Днепр.м 43 позн.с д/ж *.о 067.16.34.576,0,21,0
3,2014-11-28 00:31:00,КИЕВ ИЩУ Д/Ж ДО 45 МНЕ СЕЙЧАС СКУЧНО 093 629 9...,0,31,0
4,2013-10-26 23:11:00,Зая я тебя никогда не обижу люблю тебя!) Даше,23,11,0


#### Extract Time part

In [31]:
time['time'] = time['date'].dt.time

time.head()

Unnamed: 0,date,msg,hour,min,sec,time
0,2013-12-15 00:50:00,ищу на сегодня мужика 37,0,50,0,00:50:00
1,2014-04-29 23:40:00,ПАРЕНЬ БИ ИЩЕТ ДРУГА СЕЙЧАС!! СМС ММС 0955532826,23,40,0,23:40:00
2,2012-12-30 00:21:00,Днепр.м 43 позн.с д/ж *.о 067.16.34.576,0,21,0,00:21:00
3,2014-11-28 00:31:00,КИЕВ ИЩУ Д/Ж ДО 45 МНЕ СЕЙЧАС СКУЧНО 093 629 9...,0,31,0,00:31:00
4,2013-10-26 23:11:00,Зая я тебя никогда не обижу люблю тебя!) Даше,23,11,0,23:11:00


#### Time difference

In [32]:
today - time['date']

0     4351 days 11:24:24.512841
1     4215 days 12:34:24.512841
2     4701 days 11:53:24.512841
3     4003 days 11:43:24.512841
4     4400 days 13:03:24.512841
                 ...           
995   4990 days 11:24:24.512841
996   4311 days 13:00:24.512841
997   4776 days 12:37:24.512841
998   4892 days 12:40:24.512841
999   4164 days 12:49:24.512841
Name: date, Length: 1000, dtype: timedelta64[ns]

In [33]:
# in seconds

(today - time['date'])/np.timedelta64(1,'s')

0      3.759675e+08
1      3.642213e+08
2      4.062092e+08
3      3.459014e+08
4      3.802070e+08
           ...     
995    4.311771e+08
996    3.725172e+08
997    4.126918e+08
998    4.227144e+08
999    3.598158e+08
Name: date, Length: 1000, dtype: float64

In [34]:
# in minutes

(today - time['date'])/np.timedelta64(1,'m')

0      6.266124e+06
1      6.070354e+06
2      6.770153e+06
3      5.765023e+06
4      6.336783e+06
           ...     
995    7.186284e+06
996    6.208620e+06
997    6.878197e+06
998    7.045240e+06
999    5.996929e+06
Name: date, Length: 1000, dtype: float64

In [35]:
# in hours

(today - time['date'])/np.timedelta64(1,'h')

0      104435.406809
1      101172.573476
2      112835.890142
3       96083.723476
4      105613.056809
           ...      
995    119771.406809
996    103477.006809
997    114636.623476
998    117420.673476
999     99948.823476
Name: date, Length: 1000, dtype: float64

In [37]:
from datetime import datetime

In [41]:
date.combine, time.columns

(<bound method DataFrame.combine of           date  product_id  city_id  orders  date_year  date_month_no  \
 0   2019-12-10        5628       25       3       2019             12   
 1   2018-08-15        3646       14     157       2018              8   
 2   2018-10-23        1859       25       1       2018             10   
 3   2019-08-17        7292       25       1       2019              8   
 4   2019-01-06        4344       25       3       2019              1   
 ..         ...         ...      ...     ...        ...            ...   
 995 2018-10-08         255       13       1       2018             10   
 996 2018-12-06        5521        7       1       2018             12   
 997 2019-05-07         487       26      14       2019              5   
 998 2019-03-03        1503       21       2       2019              3   
 999 2019-10-15        6371        7      22       2019             10   
 
      date_day  date_dow date_dow_name  date_is_weekend  quarter  semester 

In [48]:
date = "2023-07-13"
date_object = datetime.strptime(date, "%Y-%m-%d")
date = date_object.strftime("%y-%d-%m")
date

'23-13-07'