# Chapter 11 Working with Dates and Times

In [2]:
import pandas as pd #this is external library
import datetime as dt #this is python's own library

## Review of Python's Datetime Module

In [2]:
# there is both datetime module and datetime method

In [14]:
someday= dt.date(2016,4,12)
someday

datetime.date(2016, 4, 12)

In [9]:
someday.year, someday.month, someday.day

(2016, 4, 12)

In [13]:
sometime = dt.datetime(2010,1,11,17,20,30)
sometime

datetime.datetime(2010, 1, 11, 17, 20, 30)

In [12]:
str(sometime)

'2010-01-11 17:20:30'

In [15]:
sometime.hour, sometime.minute, sometime.second

(17, 20, 30)

## The pandas Timestamp Object

In [3]:
pd.Timestamp('2015-03-01')

Timestamp('2015-03-01 00:00:00')

In [4]:
pd.Timestamp('2015/01/01')

Timestamp('2015-01-01 00:00:00')

In [9]:
pd.Timestamp('2023.1.1')

Timestamp('2023-01-01 00:00:00')

In [12]:
pd.Timestamp('1/3/2015') # be careful if number can be mos or day

Timestamp('2015-01-03 00:00:00')

In [11]:
pd.Timestamp('19/12/2015')

Timestamp('2015-12-19 00:00:00')

In [13]:
pd.Timestamp('2023/1/1 08:35:20')

Timestamp('2023-01-01 08:35:20')

In [16]:
pd.Timestamp('2022-12-2 6:17:49pm')

Timestamp('2022-12-02 18:17:49')

In [20]:
pd.Timestamp(dt.datetime(2015,1,1,9,12,24))

Timestamp('2015-01-01 09:12:24')

# The pandas DatetimeIndex Object
convert to Timestamps

In [130]:
dates = ['2020-1-1','1987-5-17','2023-1-11']
pd.DatetimeIndex(dates)

DatetimeIndex(['2020-01-01', '1987-05-17', '2023-01-11'], dtype='datetime64[ns]', freq=None)

In [134]:
dates = [dt.date(2010,1,10), dt.date(2023,1,1), dt.date(1987,5,17)]
dtIndex = pd.DatetimeIndex(dates)

In [135]:
values = [100,200,300]
pd.Series(data = values, index = dtIndex)

2010-01-10    100
2023-01-01    200
1987-05-17    300
dtype: int64

# The pd.to_datetme() Method

In [21]:
pd.to_datetime('2020-01-01')

Timestamp('2020-01-01 00:00:00')

In [22]:
pd.to_datetime(dt.date(2015,2,2))

Timestamp('2015-02-02 00:00:00')

In [26]:
pd.to_datetime(['2022-02-02','2023','2024.1.1','July 19th 2009'])

DatetimeIndex(['2022-02-02', '2023-01-01', '2024-01-01', '2009-07-19'], dtype='datetime64[ns]', freq=None)

In [28]:
times = pd.Series(['2022-02-02','2023','2024.1.1','July 19th 2009'])
times

0        2022-02-02
1              2023
2          2024.1.1
3    July 19th 2009
dtype: object

In [29]:
pd.to_datetime(times)

0   2022-02-02
1   2023-01-01
2   2024-01-01
3   2009-07-19
dtype: datetime64[ns]

In [37]:
wrongdate = pd.Series(['July 4th 1990', "hello", '2023-2-31'])

In [38]:
pd.to_datetime(wrongdate) # Throw errors

ParserError: Unknown string format: hello

In [39]:
pd.to_datetime(wrongdate, errors = 'coerce') #NaT is for datetime

0   1990-07-04
1          NaT
2          NaT
dtype: datetime64[ns]

## Create Range of Dates with the pd.date_range() Method Part 1
start = , end = ,freq = 

In [42]:
times = pd.date_range(start = '2020-1-1', end = '2020-1-31', freq ='D') #default is D = day
times

DatetimeIndex(['2020-01-01', '2020-01-02', '2020-01-03', '2020-01-04',
               '2020-01-05', '2020-01-06', '2020-01-07', '2020-01-08',
               '2020-01-09', '2020-01-10', '2020-01-11', '2020-01-12',
               '2020-01-13', '2020-01-14', '2020-01-15', '2020-01-16',
               '2020-01-17', '2020-01-18', '2020-01-19', '2020-01-20',
               '2020-01-21', '2020-01-22', '2020-01-23', '2020-01-24',
               '2020-01-25', '2020-01-26', '2020-01-27', '2020-01-28',
               '2020-01-29', '2020-01-30', '2020-01-31'],
              dtype='datetime64[ns]', freq='D')

In [43]:
type(times)

pandas.core.indexes.datetimes.DatetimeIndex

In [45]:
type(times[0])

pandas._libs.tslibs.timestamps.Timestamp

In [46]:
pd.date_range(start = '2022-1-1', end = '2022-1-31', freq = '2D')

DatetimeIndex(['2022-01-01', '2022-01-03', '2022-01-05', '2022-01-07',
               '2022-01-09', '2022-01-11', '2022-01-13', '2022-01-15',
               '2022-01-17', '2022-01-19', '2022-01-21', '2022-01-23',
               '2022-01-25', '2022-01-27', '2022-01-29', '2022-01-31'],
              dtype='datetime64[ns]', freq='2D')

In [48]:
pd.date_range(start = '2022-1-1', end = '2022-1-31', freq = 'B') #B= business days

DatetimeIndex(['2022-01-03', '2022-01-04', '2022-01-05', '2022-01-06',
               '2022-01-07', '2022-01-10', '2022-01-11', '2022-01-12',
               '2022-01-13', '2022-01-14', '2022-01-17', '2022-01-18',
               '2022-01-19', '2022-01-20', '2022-01-21', '2022-01-24',
               '2022-01-25', '2022-01-26', '2022-01-27', '2022-01-28',
               '2022-01-31'],
              dtype='datetime64[ns]', freq='B')

In [49]:
pd.date_range(start = '2022-1-1', end = '2022-1-31', freq = 'W') #w= weekly on sunday

DatetimeIndex(['2022-01-02', '2022-01-09', '2022-01-16', '2022-01-23',
               '2022-01-30'],
              dtype='datetime64[ns]', freq='W-SUN')

In [53]:
pd.date_range(start = '2022-1-1', end = '2022-1-31', freq = 'W-SAT')

DatetimeIndex(['2022-01-01', '2022-01-08', '2022-01-15', '2022-01-22',
               '2022-01-29'],
              dtype='datetime64[ns]', freq='W-SAT')

In [54]:
pd.date_range(start = '2022-1-1', end = '2022-1-31', freq = 'H') #H= hour

DatetimeIndex(['2022-01-01 00:00:00', '2022-01-01 01:00:00',
               '2022-01-01 02:00:00', '2022-01-01 03:00:00',
               '2022-01-01 04:00:00', '2022-01-01 05:00:00',
               '2022-01-01 06:00:00', '2022-01-01 07:00:00',
               '2022-01-01 08:00:00', '2022-01-01 09:00:00',
               ...
               '2022-01-30 15:00:00', '2022-01-30 16:00:00',
               '2022-01-30 17:00:00', '2022-01-30 18:00:00',
               '2022-01-30 19:00:00', '2022-01-30 20:00:00',
               '2022-01-30 21:00:00', '2022-01-30 22:00:00',
               '2022-01-30 23:00:00', '2022-01-31 00:00:00'],
              dtype='datetime64[ns]', length=721, freq='H')

In [55]:
pd.date_range(start = '2022-1-1', end = '2022-1-31', freq = '12H')

DatetimeIndex(['2022-01-01 00:00:00', '2022-01-01 12:00:00',
               '2022-01-02 00:00:00', '2022-01-02 12:00:00',
               '2022-01-03 00:00:00', '2022-01-03 12:00:00',
               '2022-01-04 00:00:00', '2022-01-04 12:00:00',
               '2022-01-05 00:00:00', '2022-01-05 12:00:00',
               '2022-01-06 00:00:00', '2022-01-06 12:00:00',
               '2022-01-07 00:00:00', '2022-01-07 12:00:00',
               '2022-01-08 00:00:00', '2022-01-08 12:00:00',
               '2022-01-09 00:00:00', '2022-01-09 12:00:00',
               '2022-01-10 00:00:00', '2022-01-10 12:00:00',
               '2022-01-11 00:00:00', '2022-01-11 12:00:00',
               '2022-01-12 00:00:00', '2022-01-12 12:00:00',
               '2022-01-13 00:00:00', '2022-01-13 12:00:00',
               '2022-01-14 00:00:00', '2022-01-14 12:00:00',
               '2022-01-15 00:00:00', '2022-01-15 12:00:00',
               '2022-01-16 00:00:00', '2022-01-16 12:00:00',
               '2022-01-

In [57]:
pd.date_range(start = '2022-1-1', end = '2022-3-31', freq = 'M') #M = month-end

DatetimeIndex(['2022-01-31', '2022-02-28', '2022-03-31'], dtype='datetime64[ns]', freq='M')

In [60]:
pd.date_range(start = '2022-1-10', end = '2022-3-31', freq = 'MS') #MS= month-start

DatetimeIndex(['2022-02-01', '2022-03-01'], dtype='datetime64[ns]', freq='MS')

In [61]:
pd.date_range(start = '2022-1-1', end = '2029-1-31', freq = 'A') #A= year

DatetimeIndex(['2022-12-31', '2023-12-31', '2024-12-31', '2025-12-31',
               '2026-12-31', '2027-12-31', '2028-12-31'],
              dtype='datetime64[ns]', freq='A-DEC')

## Create Range of Dates with the pd.date_range() Method Part 2
start = , period = 

In [67]:
pd.date_range(start = '2023-1-21', periods = 25, freq = 'D')

DatetimeIndex(['2023-01-21', '2023-01-22', '2023-01-23', '2023-01-24',
               '2023-01-25', '2023-01-26', '2023-01-27', '2023-01-28',
               '2023-01-29', '2023-01-30', '2023-01-31', '2023-02-01',
               '2023-02-02', '2023-02-03', '2023-02-04', '2023-02-05',
               '2023-02-06', '2023-02-07', '2023-02-08', '2023-02-09',
               '2023-02-10', '2023-02-11', '2023-02-12', '2023-02-13',
               '2023-02-14'],
              dtype='datetime64[ns]', freq='D')

In [68]:
pd.date_range(start = '2023-1-21', periods = 25, freq = 'B')

DatetimeIndex(['2023-01-23', '2023-01-24', '2023-01-25', '2023-01-26',
               '2023-01-27', '2023-01-30', '2023-01-31', '2023-02-01',
               '2023-02-02', '2023-02-03', '2023-02-06', '2023-02-07',
               '2023-02-08', '2023-02-09', '2023-02-10', '2023-02-13',
               '2023-02-14', '2023-02-15', '2023-02-16', '2023-02-17',
               '2023-02-20', '2023-02-21', '2023-02-22', '2023-02-23',
               '2023-02-24'],
              dtype='datetime64[ns]', freq='B')

## Create Range of Dates with the pd.date_range() Method Part 3
end = , period = 

In [73]:
pd.date_range(end = '2023-1-21', periods = 25, freq = 'D')

DatetimeIndex(['2022-12-28', '2022-12-29', '2022-12-30', '2022-12-31',
               '2023-01-01', '2023-01-02', '2023-01-03', '2023-01-04',
               '2023-01-05', '2023-01-06', '2023-01-07', '2023-01-08',
               '2023-01-09', '2023-01-10', '2023-01-11', '2023-01-12',
               '2023-01-13', '2023-01-14', '2023-01-15', '2023-01-16',
               '2023-01-17', '2023-01-18', '2023-01-19', '2023-01-20',
               '2023-01-21'],
              dtype='datetime64[ns]', freq='D')

In [72]:
pd.date_range(end = '2023-1-21', periods = 25, freq = 'B')

DatetimeIndex(['2022-12-19', '2022-12-20', '2022-12-21', '2022-12-22',
               '2022-12-23', '2022-12-26', '2022-12-27', '2022-12-28',
               '2022-12-29', '2022-12-30', '2023-01-02', '2023-01-03',
               '2023-01-04', '2023-01-05', '2023-01-06', '2023-01-09',
               '2023-01-10', '2023-01-11', '2023-01-12', '2023-01-13',
               '2023-01-16', '2023-01-17', '2023-01-18', '2023-01-19',
               '2023-01-20'],
              dtype='datetime64[ns]', freq='B')

# The .dt Accessor
similar to .str prefix wth strings. .dt prefix for time

In [75]:
bunch_of_dates = pd.date_range(start = '2000/1/1', end = '2010-1-1', freq = '24D')
bunch_of_dates

DatetimeIndex(['2000-01-01', '2000-01-25', '2000-02-18', '2000-03-13',
               '2000-04-06', '2000-04-30', '2000-05-24', '2000-06-17',
               '2000-07-11', '2000-08-04',
               ...
               '2009-05-25', '2009-06-18', '2009-07-12', '2009-08-05',
               '2009-08-29', '2009-09-22', '2009-10-16', '2009-11-09',
               '2009-12-03', '2009-12-27'],
              dtype='datetime64[ns]', length=153, freq='24D')

In [76]:
s = pd.Series(bunch_of_dates)
s.head()

0   2000-01-01
1   2000-01-25
2   2000-02-18
3   2000-03-13
4   2000-04-06
dtype: datetime64[ns]

In [79]:
s.day

AttributeError: 'Series' object has no attribute 'day'

In [78]:
s.dt.day

0       1
1      25
2      18
3      13
4       6
       ..
148    22
149    16
150     9
151     3
152    27
Length: 153, dtype: int64

In [89]:
mask = s.dt.is_quarter_start
s[mask]

0     2000-01-01
19    2001-04-01
38    2002-07-01
137   2009-01-01
dtype: datetime64[ns]

# Import Financial Dataset with pandas_datareader library

In [191]:
import pandas_datareader.data as web
stock = web.DataReader('005930', 'naver', start='2010-01-1', end='2020-12-31')
stock.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2010-01-04,16060,16180,16000,16180,239271
2010-01-05,16520,16580,16300,16440,559219
2010-01-06,16580,16820,16520,16820,459755
2010-01-07,16820,16820,16260,16260,443237
2010-01-08,16400,16420,16120,16420,295798


In [121]:
stock.values

array([['16060', '16180', '16000', '16180', '239271'],
       ['16520', '16580', '16300', '16440', '559219'],
       ['16580', '16820', '16520', '16820', '459755'],
       ...,
       ['79000', '80100', '78200', '78700', '40085044'],
       ['78800', '78900', '77300', '78300', '30339449'],
       ['77400', '81300', '77300', '81000', '29417421']], dtype=object)

In [122]:
stock.axes

[DatetimeIndex(['2010-01-04', '2010-01-05', '2010-01-06', '2010-01-07',
                '2010-01-08', '2010-01-11', '2010-01-12', '2010-01-13',
                '2010-01-14', '2010-01-15',
                ...
                '2020-12-16', '2020-12-17', '2020-12-18', '2020-12-21',
                '2020-12-22', '2020-12-23', '2020-12-24', '2020-12-28',
                '2020-12-29', '2020-12-30'],
               dtype='datetime64[ns]', name='Date', length=2714, freq=None),
 Index(['Open', 'High', 'Low', 'Close', 'Volume'], dtype='object')]

In [123]:
stock.columns, stock.index

(Index(['Open', 'High', 'Low', 'Close', 'Volume'], dtype='object'),
 DatetimeIndex(['2010-01-04', '2010-01-05', '2010-01-06', '2010-01-07',
                '2010-01-08', '2010-01-11', '2010-01-12', '2010-01-13',
                '2010-01-14', '2010-01-15',
                ...
                '2020-12-16', '2020-12-17', '2020-12-18', '2020-12-21',
                '2020-12-22', '2020-12-23', '2020-12-24', '2020-12-28',
                '2020-12-29', '2020-12-30'],
               dtype='datetime64[ns]', name='Date', length=2714, freq=None))

# Selecting Rows from a Dataframe with a DatetimeIndex

In [124]:
stock.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2010-01-04,16060,16180,16000,16180,239271
2010-01-05,16520,16580,16300,16440,559219
2010-01-06,16580,16820,16520,16820,459755
2010-01-07,16820,16820,16260,16260,443237
2010-01-08,16400,16420,16120,16420,295798


In [125]:
stock.loc['2010-01-04']

Open       16060
High       16180
Low        16000
Close      16180
Volume    239271
Name: 2010-01-04 00:00:00, dtype: object

In [127]:
stock.loc[pd.Timestamp('2010-01-04')] #better practice

Open       16060
High       16180
Low        16000
Close      16180
Volume    239271
Name: 2010-01-04 00:00:00, dtype: object

In [128]:
stock.iloc[0]

Open       16060
High       16180
Low        16000
Close      16180
Volume    239271
Name: 2010-01-04 00:00:00, dtype: object

In [141]:
stock.loc[['2010-01-04','2010-01-05']] #not working

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2010-01-04,16060,16180,16000,16180,239271
2010-01-05,16520,16580,16300,16440,559219


In [140]:
stock.loc[[pd.Timestamp('2010-01-04'), pd.Timestamp('2010-01-06')]]

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2010-01-04,16060,16180,16000,16180,239271
2010-01-06,16580,16820,16520,16820,459755


In [146]:
stock.loc['2010-1-1':'2010-1-11']

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2010-01-04,16060,16180,16000,16180,239271
2010-01-05,16520,16580,16300,16440,559219
2010-01-06,16580,16820,16520,16820,459755
2010-01-07,16820,16820,16260,16260,443237
2010-01-08,16400,16420,16120,16420,295798
2010-01-11,16420,16460,15940,15940,398901


In [147]:
stock.truncate(before = '2010-1-1', after = '2010-1-11')

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2010-01-04,16060,16180,16000,16180,239271
2010-01-05,16520,16580,16300,16440,559219
2010-01-06,16580,16820,16520,16820,459755
2010-01-07,16820,16820,16260,16260,443237
2010-01-08,16400,16420,16120,16420,295798
2010-01-11,16420,16460,15940,15940,398901


In [149]:
stock.iloc[0:10] #loc is incl. iloc is exclusive for last number

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2010-01-04,16060,16180,16000,16180,239271
2010-01-05,16520,16580,16300,16440,559219
2010-01-06,16580,16820,16520,16820,459755
2010-01-07,16820,16820,16260,16260,443237
2010-01-08,16400,16420,16120,16420,295798
2010-01-11,16420,16460,15940,15940,398901
2010-01-12,15940,16180,15900,16180,384460
2010-01-13,16100,16119,15940,15940,267619
2010-01-14,16120,16620,16000,16540,532705
2010-01-15,16640,16860,16580,16840,398401


In [162]:
birthdays = pd.date_range(start ='1987-05-17', end = '2022-12-31', freq = 'A-MAY')
birthdays

DatetimeIndex(['1987-05-31', '1988-05-31', '1989-05-31', '1990-05-31',
               '1991-05-31', '1992-05-31', '1993-05-31', '1994-05-31',
               '1995-05-31', '1996-05-31', '1997-05-31', '1998-05-31',
               '1999-05-31', '2000-05-31', '2001-05-31', '2002-05-31',
               '2003-05-31', '2004-05-31', '2005-05-31', '2006-05-31',
               '2007-05-31', '2008-05-31', '2009-05-31', '2010-05-31',
               '2011-05-31', '2012-05-31', '2013-05-31', '2014-05-31',
               '2015-05-31', '2016-05-31', '2017-05-31', '2018-05-31',
               '2019-05-31', '2020-05-31', '2021-05-31', '2022-05-31'],
              dtype='datetime64[ns]', freq='A-MAY')

In [171]:
birthday_stock = stock.index.isin(birthdays)
birthday_stock

array([False, False, False, ..., False, False, False])

In [175]:
stock[birthday_stock]

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2010-05-31,15440,15620,15360,15520,255822
2011-05-31,17800,18280,17720,18040,478089
2012-05-31,24260,24260,23840,24220,442818
2013-05-31,30920,31180,30740,30760,360501
2016-05-31,25600,26000,25360,25840,1250554
2017-05-31,44580,45020,44400,44700,373382
2018-05-31,50400,50800,49850,50700,63491109
2019-05-31,42600,42800,42150,42500,11365999


# Timestamp Object Attributes and Methods

In [176]:
stock.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2010-01-04,16060,16180,16000,16180,239271
2010-01-05,16520,16580,16300,16440,559219
2010-01-06,16580,16820,16520,16820,459755
2010-01-07,16820,16820,16260,16260,443237
2010-01-08,16400,16420,16120,16420,295798


In [181]:
someday = stock.index[100]
someday

Timestamp('2010-05-28 00:00:00')

In [184]:
someday.month, someday.week, someday.is_month_start, someday.is_quarter_start

(5, 21, False, False)

In [186]:
someday.month_name() #this is a method, not an attribute

'May'

In [187]:
someday.day_name()

'Friday'

In [188]:
stock.index.day_name()

Index(['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Monday',
       'Tuesday', 'Wednesday', 'Thursday', 'Friday',
       ...
       'Wednesday', 'Thursday', 'Friday', 'Monday', 'Tuesday', 'Wednesday',
       'Thursday', 'Monday', 'Tuesday', 'Wednesday'],
      dtype='object', name='Date', length=2714)

In [192]:
stock.insert(0, 'Day of Week', stock.index.day_name())
stock

Unnamed: 0_level_0,Day of Week,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-01-04,Monday,16060,16180,16000,16180,239271
2010-01-05,Tuesday,16520,16580,16300,16440,559219
2010-01-06,Wednesday,16580,16820,16520,16820,459755
2010-01-07,Thursday,16820,16820,16260,16260,443237
2010-01-08,Friday,16400,16420,16120,16420,295798
...,...,...,...,...,...,...
2020-12-23,Wednesday,72400,74000,72300,73900,19411326
2020-12-24,Thursday,74100,78800,74000,77800,32502870
2020-12-28,Monday,79000,80100,78200,78700,40085044
2020-12-29,Tuesday,78800,78900,77300,78300,30339449


In [193]:
stock.index.is_month_start

array([False, False, False, ..., False, False, False])

In [194]:
stock.insert(1, 'Is Start of Month', stock.index.is_month_start)
stock

Unnamed: 0_level_0,Day of Week,Is Start of Month,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2010-01-04,Monday,False,16060,16180,16000,16180,239271
2010-01-05,Tuesday,False,16520,16580,16300,16440,559219
2010-01-06,Wednesday,False,16580,16820,16520,16820,459755
2010-01-07,Thursday,False,16820,16820,16260,16260,443237
2010-01-08,Friday,False,16400,16420,16120,16420,295798
...,...,...,...,...,...,...,...
2020-12-23,Wednesday,False,72400,74000,72300,73900,19411326
2020-12-24,Thursday,False,74100,78800,74000,77800,32502870
2020-12-28,Monday,False,79000,80100,78200,78700,40085044
2020-12-29,Tuesday,False,78800,78900,77300,78300,30339449


In [195]:
stock[stock['Is Start of Month']]

Unnamed: 0_level_0,Day of Week,Is Start of Month,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2010-02-01,Monday,True,15679,15700,15300,15540,501042
2010-04-01,Thursday,True,16520,16900,16480,16900,561659
2010-06-01,Tuesday,True,15520,15580,15300,15360,228117
2010-07-01,Thursday,True,15580,15600,15199,15320,444516
2010-09-01,Wednesday,True,15240,15259,15000,15040,389308
...,...,...,...,...,...,...,...
2020-04-01,Wednesday,True,47450,47900,45800,45800,27259532
2020-06-01,Monday,True,50800,51200,50600,51200,16949183
2020-07-01,Wednesday,True,53400,53600,52400,52600,16706143
2020-09-01,Tuesday,True,54100,54800,54100,54200,19363117


## The pd.DateOffset Object

In [215]:
import pandas_datareader.data as web
stock = web.DataReader('005930', 'naver', start='2010-01-1', end='2020-12-31')
stock.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2010-01-04,16060,16180,16000,16180,239271
2010-01-05,16520,16580,16300,16440,559219
2010-01-06,16580,16820,16520,16820,459755
2010-01-07,16820,16820,16260,16260,443237
2010-01-08,16400,16420,16120,16420,295798


In [216]:
stock.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 2714 entries, 2010-01-04 to 2020-12-30
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Open    2714 non-null   object
 1   High    2714 non-null   object
 2   Low     2714 non-null   object
 3   Close   2714 non-null   object
 4   Volume  2714 non-null   object
dtypes: object(5)
memory usage: 127.2+ KB


In [217]:
stock = stock.astype('int')

In [218]:
stock.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 2714 entries, 2010-01-04 to 2020-12-30
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   Open    2714 non-null   int64
 1   High    2714 non-null   int64
 2   Low     2714 non-null   int64
 3   Close   2714 non-null   int64
 4   Volume  2714 non-null   int64
dtypes: int64(5)
memory usage: 127.2 KB


In [219]:
stock.High + 5

Date
2010-01-04    16185
2010-01-05    16585
2010-01-06    16825
2010-01-07    16825
2010-01-08    16425
              ...  
2020-12-23    74005
2020-12-24    78805
2020-12-28    80105
2020-12-29    78905
2020-12-30    81305
Name: High, Length: 2714, dtype: int64

In [221]:
stock.index + 5 # not possible on datetime

TypeError: Addition/subtraction of integers and integer-arrays with DatetimeArray is no longer supported.  Instead of adding/subtracting `n`, use `n * obj.freq`

In [223]:
type(stock.index)

pandas.core.indexes.datetimes.DatetimeIndex

In [225]:
stock.index

DatetimeIndex(['2010-01-04', '2010-01-05', '2010-01-06', '2010-01-07',
               '2010-01-08', '2010-01-11', '2010-01-12', '2010-01-13',
               '2010-01-14', '2010-01-15',
               ...
               '2020-12-16', '2020-12-17', '2020-12-18', '2020-12-21',
               '2020-12-22', '2020-12-23', '2020-12-24', '2020-12-28',
               '2020-12-29', '2020-12-30'],
              dtype='datetime64[ns]', name='Date', length=2714, freq=None)

In [232]:
stock.index + pd.DateOffset(days = 5)

DatetimeIndex(['2010-01-09', '2010-01-10', '2010-01-11', '2010-01-12',
               '2010-01-13', '2010-01-16', '2010-01-17', '2010-01-18',
               '2010-01-19', '2010-01-20',
               ...
               '2020-12-21', '2020-12-22', '2020-12-23', '2020-12-26',
               '2020-12-27', '2020-12-28', '2020-12-29', '2021-01-02',
               '2021-01-03', '2021-01-04'],
              dtype='datetime64[ns]', name='Date', length=2714, freq=None)

In [234]:
stock.index = stock.index - pd.DateOffset(days = 5)

In [235]:
stock.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2009-12-30,16060,16180,16000,16180,239271
2009-12-31,16520,16580,16300,16440,559219
2010-01-01,16580,16820,16520,16820,459755
2010-01-02,16820,16820,16260,16260,443237
2010-01-03,16400,16420,16120,16420,295798


In [236]:
stock.index + pd.DateOffset(weeks = 2)

DatetimeIndex(['2010-01-13', '2010-01-14', '2010-01-15', '2010-01-16',
               '2010-01-17', '2010-01-20', '2010-01-21', '2010-01-22',
               '2010-01-23', '2010-01-24',
               ...
               '2020-12-25', '2020-12-26', '2020-12-27', '2020-12-30',
               '2020-12-31', '2021-01-01', '2021-01-02', '2021-01-06',
               '2021-01-07', '2021-01-08'],
              dtype='datetime64[ns]', name='Date', length=2714, freq=None)

In [237]:
stock.index + pd.DateOffset(years = 1, months = 1, days = 1)

DatetimeIndex(['2011-01-31', '2011-02-01', '2011-02-02', '2011-02-03',
               '2011-02-04', '2011-02-07', '2011-02-08', '2011-02-09',
               '2011-02-10', '2011-02-11',
               ...
               '2022-01-12', '2022-01-13', '2022-01-14', '2022-01-17',
               '2022-01-18', '2022-01-19', '2022-01-20', '2022-01-24',
               '2022-01-25', '2022-01-26'],
              dtype='datetime64[ns]', name='Date', length=2714, freq=None)

## Timeseries Offsets
Dynamic change date

In [243]:
import pandas_datareader.data as web
stock = web.DataReader('005930', 'naver', start='2010-1-31', end='2020-12-31')
stock.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2010-02-01,15679,15700,15300,15540,501042
2010-02-02,15800,15800,15400,15440,393803
2010-02-03,15640,15700,15360,15540,330235
2010-02-04,15500,15660,15440,15520,239799
2010-02-05,15160,15220,14940,15000,515034


In [244]:
stock.index + pd.tseries.offsets.MonthEnd()

DatetimeIndex(['2010-02-28', '2010-02-28', '2010-02-28', '2010-02-28',
               '2010-02-28', '2010-02-28', '2010-02-28', '2010-02-28',
               '2010-02-28', '2010-02-28',
               ...
               '2020-12-31', '2020-12-31', '2020-12-31', '2020-12-31',
               '2020-12-31', '2020-12-31', '2020-12-31', '2020-12-31',
               '2020-12-31', '2020-12-31'],
              dtype='datetime64[ns]', name='Date', length=2694, freq=None)

In [245]:
# error occur when origional date is already end month. it will go to last day of next month
# example, 2020-1-31 changed to 2020-2-28

In [246]:
from pandas.tseries import offsets

In [247]:
stock.index - offsets.MonthEnd()

DatetimeIndex(['2010-01-31', '2010-01-31', '2010-01-31', '2010-01-31',
               '2010-01-31', '2010-01-31', '2010-01-31', '2010-01-31',
               '2010-01-31', '2010-01-31',
               ...
               '2020-11-30', '2020-11-30', '2020-11-30', '2020-11-30',
               '2020-11-30', '2020-11-30', '2020-11-30', '2020-11-30',
               '2020-11-30', '2020-11-30'],
              dtype='datetime64[ns]', name='Date', length=2694, freq=None)

In [249]:
stock.index + offsets.BMonthEnd() # business monthend

DatetimeIndex(['2010-02-26', '2010-02-26', '2010-02-26', '2010-02-26',
               '2010-02-26', '2010-02-26', '2010-02-26', '2010-02-26',
               '2010-02-26', '2010-02-26',
               ...
               '2020-12-31', '2020-12-31', '2020-12-31', '2020-12-31',
               '2020-12-31', '2020-12-31', '2020-12-31', '2020-12-31',
               '2020-12-31', '2020-12-31'],
              dtype='datetime64[ns]', name='Date', length=2694, freq=None)

## The Timedelta Object
diff between times

In [251]:
time_a = pd.Timestamp('2020/02/23')
time_b = pd.Timestamp('2020/3/1')
time_a - time_b

Timedelta('-7 days +00:00:00')

In [252]:
pd.Timedelta(days = 3)

Timedelta('3 days 00:00:00')

In [256]:
time_a + pd.Timedelta(weeks = 2, days =3, hours = 12, minutes = 45) #years will not work

Timestamp('2020-03-11 12:45:00')

In [260]:
time_a + pd.Timedelta('5 minutes') # very flexible on format

Timestamp('2020-02-23 00:05:00')

In [261]:
time_a + pd.Timedelta('14days 6 hours 12 minutes')

Timestamp('2020-03-08 06:12:00')

## The Timedelta in Dataset

In [265]:
shipping = pd.read_csv('ecommerce.csv', index_col = 'ID', 
            parse_dates = ['order_date', 'delivery_date'])
shipping.head()

Unnamed: 0_level_0,order_date,delivery_date
ID,Unnamed: 1_level_1,Unnamed: 2_level_1
1,1998-05-24,1999-02-05
2,1992-04-22,1998-03-06
4,1991-02-10,1992-08-26
5,1992-07-21,1997-11-20
7,1993-09-02,1998-06-10


In [269]:
shipping['delivery_time'] = shipping.delivery_date - shipping.order_date
shipping.head()

Unnamed: 0_level_0,order_date,delivery_date,delivery_time
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,1998-05-24,1999-02-05,257 days
2,1992-04-22,1998-03-06,2144 days
4,1991-02-10,1992-08-26,563 days
5,1992-07-21,1997-11-20,1948 days
7,1993-09-02,1998-06-10,1742 days


In [275]:
shipping['twice_as_long'] = shipping.delivery_date + shipping.delivery_time
shipping.dtypes

order_date        datetime64[ns]
delivery_date     datetime64[ns]
delivery_time    timedelta64[ns]
twice_as_long     datetime64[ns]
dtype: object

In [278]:
shipping.delivery_time > '365 days' #time can compare string

ID
1      False
2       True
4       True
5       True
7       True
       ...  
990     True
991     True
993     True
994    False
997     True
Length: 501, dtype: bool

In [None]:
shipping[shipping.delivery_time ]