In [1]:
import pandas as pd
import numpy as np

In [2]:
date = pd.read_csv('34_1_orders.csv')
time = pd.read_csv('34_2_messages.csv')

## Working with date:

In [3]:
date.head()

Unnamed: 0,date,product_id,city_id,orders
0,2019-12-10,5628,25,3
1,2018-08-15,3646,14,157
2,2018-10-23,1859,25,1
3,2019-08-17,7292,25,1
4,2019-01-06,4344,25,3


In [4]:
date.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   date        1000 non-null   object
 1   product_id  1000 non-null   int64 
 2   city_id     1000 non-null   int64 
 3   orders      1000 non-null   int64 
dtypes: int64(3), object(1)
memory usage: 31.4+ KB


In [5]:
date['date'] = pd.to_datetime(date['date'])

In [6]:
date.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype         
---  ------      --------------  -----         
 0   date        1000 non-null   datetime64[ns]
 1   product_id  1000 non-null   int64         
 2   city_id     1000 non-null   int64         
 3   orders      1000 non-null   int64         
dtypes: datetime64[ns](1), int64(3)
memory usage: 31.4 KB


In [7]:
# extract Year
date['year'] = date['date'].dt.year

In [8]:
date['year']

0      2019
1      2018
2      2018
3      2019
4      2019
       ... 
995    2018
996    2018
997    2019
998    2019
999    2019
Name: year, Length: 1000, dtype: int32

In [9]:
# extract Month
date['month'] = date['date'].dt.month

In [10]:
date['month']

0      12
1       8
2      10
3       8
4       1
       ..
995    10
996    12
997     5
998     3
999    10
Name: month, Length: 1000, dtype: int32

In [11]:
#month name
date['month_name'] = date['date'].dt.month_name()

In [12]:
date['month_name'] 

0      December
1        August
2       October
3        August
4       January
         ...   
995     October
996    December
997         May
998       March
999     October
Name: month_name, Length: 1000, dtype: object

In [13]:
# Day of week
date['day_of_week'] = date['date'].dt.dayofweek

In [14]:
date['day_of_week']

0      1
1      2
2      1
3      5
4      6
      ..
995    0
996    3
997    1
998    6
999    1
Name: day_of_week, Length: 1000, dtype: int32

In [17]:
# day of week - name

date['date_dow_name'] = date['date'].dt.day_name()
date['date_dow_name']

0        Tuesday
1      Wednesday
2        Tuesday
3       Saturday
4         Sunday
         ...    
995       Monday
996     Thursday
997      Tuesday
998       Sunday
999      Tuesday
Name: date_dow_name, Length: 1000, dtype: object

In [19]:
# is weekend?
date['date_is_weekend'] = np.where(date['date_dow_name'].isin(['Sunday', 'Saturday']), 'Yes','No')
date['date_is_weekend']

0       No
1       No
2       No
3      Yes
4      Yes
      ... 
995     No
996     No
997     No
998    Yes
999     No
Name: date_is_weekend, Length: 1000, dtype: object

In [22]:
# week of the year
date['date_week'] = date['date'].dt.isocalendar().week
date['date_week']

0      50
1      33
2      43
3      33
4       1
       ..
995    41
996    49
997    19
998     9
999    42
Name: date_week, Length: 1000, dtype: UInt32

In [25]:
# Quarter 
date['quarter'] = date['date'].dt.quarter
date['quarter']

0      4
1      3
2      4
3      3
4      1
      ..
995    4
996    4
997    2
998    1
999    4
Name: quarter, Length: 1000, dtype: int32

In [26]:
date['semester'] = np.where(date['quarter'].isin([1,2]), 1, 2)
date['semester']

0      2
1      2
2      2
3      2
4      1
      ..
995    2
996    2
997    1
998    1
999    2
Name: semester, Length: 1000, dtype: int64

In [27]:
date['semester'].value_counts()

semester
2    689
1    311
Name: count, dtype: int64

In [28]:
# Extract Time elapsed between date

In [30]:
import datetime

today = datetime.datetime.today()

today

datetime.datetime(2025, 10, 2, 21, 19, 40, 855145)

In [31]:
today - date['date']

0     2123 days 21:19:40.855145
1     2605 days 21:19:40.855145
2     2536 days 21:19:40.855145
3     2238 days 21:19:40.855145
4     2461 days 21:19:40.855145
                 ...           
995   2551 days 21:19:40.855145
996   2492 days 21:19:40.855145
997   2340 days 21:19:40.855145
998   2405 days 21:19:40.855145
999   2179 days 21:19:40.855145
Name: date, Length: 1000, dtype: timedelta64[ns]

In [32]:
# only days
(today - date['date']).dt.days

0      2123
1      2605
2      2536
3      2238
4      2461
       ... 
995    2551
996    2492
997    2340
998    2405
999    2179
Name: date, Length: 1000, dtype: int64

In [48]:
#Months passed

import pandas as pd
from datetime import datetime

today = pd.Timestamp(datetime.today())  # ✅ Convert datetime → pandas Timestamp

months_passed = (today.to_period('M') - date['date'].dt.to_period('M')).apply(lambda x: x.n)


In [49]:
months_passed

0      70
1      86
2      84
3      74
4      81
       ..
995    84
996    82
997    77
998    79
999    72
Name: date, Length: 1000, dtype: int64

## working with time

In [35]:
time.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   date    1000 non-null   object
 1   msg     1000 non-null   object
dtypes: object(2)
memory usage: 15.8+ KB


In [36]:
time['date'] = pd.to_datetime(time['date'])

In [38]:
time.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   date    1000 non-null   datetime64[ns]
 1   msg     1000 non-null   object        
dtypes: datetime64[ns](1), object(1)
memory usage: 15.8+ KB


In [42]:
# extract hour,minute,second
time['hour'] = time['date'].dt.hour
time['min'] = time['date'].dt.minute
time['sec'] = time['date'].dt.second

time.head()

Unnamed: 0,date,msg,hour,min,sec,time
0,2013-12-15 00:50:00,ищу на сегодня мужика 37,0,50,0,00:50:00
1,2014-04-29 23:40:00,ПАРЕНЬ БИ ИЩЕТ ДРУГА СЕЙЧАС!! СМС ММС 0955532826,23,40,0,23:40:00
2,2012-12-30 00:21:00,Днепр.м 43 позн.с д/ж *.о 067.16.34.576,0,21,0,00:21:00
3,2014-11-28 00:31:00,КИЕВ ИЩУ Д/Ж ДО 45 МНЕ СЕЙЧАС СКУЧНО 093 629 9...,0,31,0,00:31:00
4,2013-10-26 23:11:00,Зая я тебя никогда не обижу люблю тебя!) Даше,23,11,0,23:11:00


In [43]:
# extract time
time['time'] = time['date'].dt.time
time['time']

0      00:50:00
1      23:40:00
2      00:21:00
3      00:31:00
4      23:11:00
         ...   
995    00:50:00
996    23:14:00
997    23:37:00
998    23:34:00
999    23:25:00
Name: time, Length: 1000, dtype: object

In [44]:
# time difference
today - time['date']

0     4309 days 20:29:40.855145
1     4173 days 21:39:40.855145
2     4659 days 20:58:40.855145
3     3961 days 20:48:40.855145
4     4358 days 22:08:40.855145
                 ...           
995   4948 days 20:29:40.855145
996   4269 days 22:05:40.855145
997   4734 days 21:42:40.855145
998   4850 days 21:45:40.855145
999   4122 days 21:54:40.855145
Name: date, Length: 1000, dtype: timedelta64[ns]

In [45]:
# in seconds
(today - time['date'])/np.timedelta64(1,'s')

0      3.723714e+08
1      3.606252e+08
2      4.026131e+08
3      3.423053e+08
4      3.766109e+08
           ...     
995    4.275810e+08
996    3.689211e+08
997    4.090958e+08
998    4.191183e+08
999    3.562197e+08
Name: date, Length: 1000, dtype: float64

In [46]:
# in minutes

(today - time['date'])/np.timedelta64(1,'m')

0      6.206190e+06
1      6.010420e+06
2      6.710219e+06
3      5.705089e+06
4      6.276849e+06
           ...     
995    7.126350e+06
996    6.148686e+06
997    6.818263e+06
998    6.985306e+06
999    5.936995e+06
Name: date, Length: 1000, dtype: float64

In [47]:
# in hours

(today - time['date'])/np.timedelta64(1,'h')

0      103436.494682
1      100173.661349
2      111836.978015
3       95084.811349
4      104614.144682
           ...      
995    118772.494682
996    102478.094682
997    113637.711349
998    116421.761349
999     98949.911349
Name: date, Length: 1000, dtype: float64