In [2]:
import pandas as pd
import numpy as np
import random

# Working with dates and time in Pandas

## pandas.Timestamp

[Official documentantion](https://pandas.pydata.org/docs/reference/api/pandas.Timestamp.html)

`Pandas` replacement for python `datetime.datetime` object.

Timestamp is the pandas equivalent of python’s Datetime and is interchangeable with it in most cases. It’s the type used for the entries that make up a DatetimeIndex, and other timeseries oriented data structures in pandas.

In [3]:
# creating timestamp with different parameters

print(pd.Timestamp('2022-11-20'))
print(pd.Timestamp(year=2022, month=11, day=20, hour=10, minute=51))
print(pd.Timestamp(2000, 4, 14 ,14, 0))
print(pd.Timestamp(1713393355.5, unit='s', tz='US/Pacific'))

2022-11-20 00:00:00
2022-11-20 10:51:00
2000-04-14 14:00:00
2024-04-17 15:35:55.500000-07:00


In [4]:
# get timezones list

import pytz

pytz.all_timezones

['Africa/Abidjan',
 'Africa/Accra',
 'Africa/Addis_Ababa',
 'Africa/Algiers',
 'Africa/Asmara',
 'Africa/Asmera',
 'Africa/Bamako',
 'Africa/Bangui',
 'Africa/Banjul',
 'Africa/Bissau',
 'Africa/Blantyre',
 'Africa/Brazzaville',
 'Africa/Bujumbura',
 'Africa/Cairo',
 'Africa/Casablanca',
 'Africa/Ceuta',
 'Africa/Conakry',
 'Africa/Dakar',
 'Africa/Dar_es_Salaam',
 'Africa/Djibouti',
 'Africa/Douala',
 'Africa/El_Aaiun',
 'Africa/Freetown',
 'Africa/Gaborone',
 'Africa/Harare',
 'Africa/Johannesburg',
 'Africa/Juba',
 'Africa/Kampala',
 'Africa/Khartoum',
 'Africa/Kigali',
 'Africa/Kinshasa',
 'Africa/Lagos',
 'Africa/Libreville',
 'Africa/Lome',
 'Africa/Luanda',
 'Africa/Lubumbashi',
 'Africa/Lusaka',
 'Africa/Malabo',
 'Africa/Maputo',
 'Africa/Maseru',
 'Africa/Mbabane',
 'Africa/Mogadishu',
 'Africa/Monrovia',
 'Africa/Nairobi',
 'Africa/Ndjamena',
 'Africa/Niamey',
 'Africa/Nouakchott',
 'Africa/Ouagadougou',
 'Africa/Porto-Novo',
 'Africa/Sao_Tome',
 'Africa/Timbuktu',
 'Africa/

In [5]:
# pd.Timestamp attributes

t = pd.Timestamp(1713092400.0, unit='s', tz='Europe/Moscow')
print(f'This day: {t}')
print(f'Day of week (number): {t.day_of_week}')
print(f'Day of year (number): {t.day_of_year}')
print(f'Week\'s number: {t.week}')
print(f'Days in month: {t.days_in_month}')

This day: 2024-04-14 14:00:00+03:00
Day of week (number): 6
Day of year (number): 105
Week's number: 15
Days in month: 30


In [6]:
# pd.Timestamp methods

t = pd.Timestamp.now()
print(f'This day: {t}')
print(f'Day name: {t.day_name()}')
print(f'Month name: {t.month_name()}')
print(f'Today is: {pd.Timestamp.date(t)}')
print(f'Now is: {pd.Timestamp.time(t)}')

This day: 2022-11-20 11:49:39.858288
Day name: Sunday
Month name: November
Today is: 2022-11-20
Now is: 11:49:39.858288


In [7]:
# replace datetime components

d = pd.Timestamp.today()
d.replace(year=1999)

Timestamp('1999-11-20 11:49:51.693732')

In [8]:
# combine date and time

date_ = d.date()
time_ = d.time()
print(f'{date_=}, {time_=}')
date_and_time = pd.Timestamp.combine(date_, time_)
print(f'{date_and_time=}')

date_=datetime.date(2022, 11, 20), time_=datetime.time(11, 49, 51, 693732)
date_and_time=Timestamp('2022-11-20 11:49:51.693732')


In [9]:
# format datetime to string

d.strftime('%d %b %Y')

'20 Nov 2022'

## Series.dt methods

When you're working with datetime data (`dtype` = `datetime64 [ns]`), `pandas` comes with a set of methods that are only available to datetime data. These methods can be accessed through the `dt` attribute when used in a pandas series.

In [10]:
s = pd.to_datetime(pd.Series([
        '1990-05-31 10:00',\
        '1995-06-05 15:00',\
        '2020-09-09 12:00']
))

In [11]:
# displaying the date
s.dt.date

0    1990-05-31
1    1995-06-05
2    2020-09-09
dtype: object

In [12]:
# displaying the time
s.dt.time

0    10:00:00
1    15:00:00
2    12:00:00
dtype: object

In [13]:
# displaying the year
s.dt.year

0    1990
1    1995
2    2020
dtype: int64

In [14]:
# displaying the month
s.dt.month

0    5
1    6
2    9
dtype: int64

In [15]:
# displaying the day
s.dt.day

0    31
1     5
2     9
dtype: int64

In [16]:
# displaying the hour
s.dt.hour

0    10
1    15
2    12
dtype: int64

In [17]:
# displaying the day name of each date
s.dt.day_name()

0     Thursday
1       Monday
2    Wednesday
dtype: object

In [18]:
# month name
s.dt.month_name()

0          May
1         June
2    September
dtype: object

In [19]:
# day of year
s.dt.dayofyear

0    151
1    156
2    253
dtype: int64

In [20]:
# splitting Series of datetime type to the DataFrame with columns year, week and day
s.dt.isocalendar()

Unnamed: 0,year,week,day
0,1990,22,4
1,1995,23,1
2,2020,37,3


## Series.timedelta methods

When you're working with time difference data (`dtype` = `timedelta64[ns]`), `pandas` come with a set of methods that are only available to different data. These methods can be accessed through the `dt` attribute when used in a pandas series.

In [21]:
# defining a series with timedelta
s = pd.to_datetime(pd.Series([
    '1990-05-31',\
    '1995-06-05',\
    '2020-09-09']))\
    - pd.to_datetime('2020-01-01')
s

0   -10807 days
1    -8976 days
2      252 days
dtype: timedelta64[ns]

In [22]:
# displaying the number of seconds
s.dt.total_seconds()

0   -933724800.0
1   -775526400.0
2     21772800.0
dtype: float64

In [23]:
# displaying the series as datetime.timdelta array
s.dt.to_pytimedelta()

array([datetime.timedelta(days=-10807), datetime.timedelta(days=-8976),
       datetime.timedelta(days=252)], dtype=object)

In [24]:
# displaying each time component
s.dt.components

Unnamed: 0,days,hours,minutes,seconds,milliseconds,microseconds,nanoseconds
0,-10807,0,0,0,0,0,0
1,-8976,0,0,0,0,0,0
2,252,0,0,0,0,0,0


In [25]:
dates = pd.date_range(start='2022-2-24', end='2022-11-16', freq='D')
pd.Series(dates)

0     2022-02-24
1     2022-02-25
2     2022-02-26
3     2022-02-27
4     2022-02-28
         ...    
261   2022-11-12
262   2022-11-13
263   2022-11-14
264   2022-11-15
265   2022-11-16
Length: 266, dtype: datetime64[ns]

## Converting argument to datetime with pd.to_datetime

In [26]:
# if a DF is provided, it should have columns: year, month and day

years = list(range(1900, 2000, 5))
months = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 1, 2, 3, 4, 5, 6, 7, 8]
days = list(range(1, 21))
df = pd.DataFrame({
    'year': years,
    'month': months,
    'day': days
})
df['datetime'] = pd.to_datetime(df)
df

Unnamed: 0,year,month,day,datetime
0,1900,1,1,1900-01-01
1,1905,2,2,1905-02-02
2,1910,3,3,1910-03-03
3,1915,4,4,1915-04-04
4,1920,5,5,1920-05-05
5,1925,6,6,1925-06-06
6,1930,7,7,1930-07-07
7,1935,8,8,1935-08-08
8,1940,9,9,1940-09-09
9,1945,10,10,1945-10-10


In [27]:
# using datefirst and yearfirst arguments

dates = ['10/11/12', '11/12/12', '12/10/11', '5/3/7']
s1 = pd.to_datetime(pd.Series(dates), dayfirst=True)
s2 = pd.to_datetime(pd.Series(dates), yearfirst=True)
df = pd.DataFrame(data={'dayfirst': s1, 'yearfirst': s2})
df

Unnamed: 0,dayfirst,yearfirst
0,2012-11-10,2010-11-12
1,2012-12-11,2011-12-12
2,2011-10-12,2012-10-11
3,2007-03-05,2005-03-07


In [28]:
# using format argument to parse dates from strings

dates = ['14/1977/4', '16/2021/10', '28/1987/2', '19/1947/6', '8/1954/8']
df = pd.to_datetime(pd.Series(dates), format='%d/%Y/%m')
df

0   1977-04-14
1   2021-10-16
2   1987-02-28
3   1947-06-19
4   1954-08-08
dtype: datetime64[ns]

In [29]:
# using exact argument to parse dates from mixed strings

dates = [
    'April: 14/1977/4', 
    'Stacy: 16/2021/10', 
    'Guru: 28/1987/2', 
    'Dad: 19/1947/6', 
    'Mom: 8/1954/8'
]
df = pd.to_datetime(pd.Series(dates), format='%d/%Y/%m', exact=False)
df

0   1977-04-14
1   2021-10-16
2   1987-02-28
3   1947-06-19
4   1954-08-08
dtype: datetime64[ns]

## Using pd.to_timedelta

In [30]:
# argument can be string
pd.to_timedelta('1 days 06:05:01.00003')

Timedelta('1 days 06:05:01.000030')

In [31]:
# or list-like
pd.to_timedelta([1, 2, 3, 4, 5], unit='D')

TimedeltaIndex(['1 days', '2 days', '3 days', '4 days', '5 days'], dtype='timedelta64[ns]', freq=None)

In [32]:
# or Series
s = pd.Series([1, 2, 3, 4, 5, 6, 7])
pd.to_timedelta(s, unit='d')

0   1 days
1   2 days
2   3 days
3   4 days
4   5 days
5   6 days
6   7 days
dtype: timedelta64[ns]

In [33]:
# unit argument define units of measure of timedelta
pd.to_timedelta(s, unit='W')

0    7 days
1   14 days
2   21 days
3   28 days
4   35 days
5   42 days
6   49 days
dtype: timedelta64[ns]

## Datetime ranges using pd.date_range and pd.bdate_range

In [34]:
# simple date range, default frequency is day
df1 = pd.DataFrame({'date': pd.date_range(start='14/11/2022', end='20/11/2022')})
df1['day_of_week'] = df1.date.dt.day_name()
df1

Unnamed: 0,date,day_of_week
0,2022-11-14,Monday
1,2022-11-15,Tuesday
2,2022-11-16,Wednesday
3,2022-11-17,Thursday
4,2022-11-18,Friday
5,2022-11-19,Saturday
6,2022-11-20,Sunday


In [35]:
# using periods argument to define the quantity of the values
# and name argument to defint the name of the DatetimeIndex
df1_1 = pd.DataFrame(pd.date_range(start='17/11/22', periods=7, name='date'))
df1_1

Unnamed: 0,date
0,2022-11-17
1,2022-11-18
2,2022-11-19
3,2022-11-20
4,2022-11-21
5,2022-11-22
6,2022-11-23


In [36]:
# using freq argument to define the frequency of values
df1_2 = pd.DataFrame(pd.date_range(start='1/1/22', end='31/12/22', freq='M'))
df1_2

Unnamed: 0,0
0,2022-01-31
1,2022-02-28
2,2022-03-31
3,2022-04-30
4,2022-05-31
5,2022-06-30
6,2022-07-31
7,2022-08-31
8,2022-09-30
9,2022-10-31


In [37]:
# multiplies frequencys are allowed
df1_3 = pd.DataFrame(pd.date_range(end='1/1/22', periods=10, freq='6M'))
df1_3

Unnamed: 0,0
0,2017-06-30
1,2017-12-31
2,2018-06-30
3,2018-12-31
4,2019-06-30
5,2019-12-31
6,2020-06-30
7,2020-12-31
8,2021-06-30
9,2021-12-31


In [38]:
# uses buisness days
df2 = pd.DataFrame({'date': pd.bdate_range(start='14/11/2022', end='20/11/2022')})
df2['day_of_week'] = df2.date.dt.day_name()
df2

Unnamed: 0,date,day_of_week
0,2022-11-14,Monday
1,2022-11-15,Tuesday
2,2022-11-16,Wednesday
3,2022-11-17,Thursday
4,2022-11-18,Friday
