### Intro to the Working with Dates and Times Module

In [69]:
import pandas as pd
import datetime as dt
## datetime page is not an external library, it is a built in Python core library


### Review of Python's datetime Module

In [7]:
# there is a .datetime() function in the datetime library, some code might be: datetime.datetime()  dt.datetime()
## datetime contains year, month, day, hours, minutes and seconds
someday = dt.date(2016, 4, 12)  ## more condense of datetime, date(year, month, day)
dt.date(2016, 4, 12) 

datetime.date(2016, 4, 12)

In [12]:
## date attribute: .year, .month, .day, 
someday.year
someday.month
someday.day

12

In [13]:
dt.datetime(2010, 1, 10)  ## the default of time will be midnight(default is 0)

datetime.datetime(2010, 1, 10, 0, 0)

In [20]:
str(dt.datetime(2010, 1, 10))

'2010-01-10 00:00:00'

In [17]:
dt.datetime(2010, 1, 10, 17, 13, 57)  ## 2010/01/10 5pm13mins57s

datetime.datetime(2010, 1, 10, 17, 13, 57)

In [19]:
str(dt.datetime(2010, 1, 10, 17, 13, 57))   ## more readable

'2010-01-10 17:13:57'

In [21]:
sometime = dt.datetime(2010, 1, 10, 17, 13, 57)

In [28]:
sometime.year
sometime.month
sometime.day
sometime.hour
sometime.minute
sometime.second

57

### The pandas Timestamp Object

In [29]:
pd.Timestamp('2015-03-31')
## ts_input = can be strings or Python datetime object 

Timestamp('2015-03-31 00:00:00')

In [30]:
pd.Timestamp('2015/03/31')  ## slashes work

Timestamp('2015-03-31 00:00:00')

In [31]:
pd.Timestamp('2013, 11, 04')  ## comma works

Timestamp('2013-11-04 00:00:00')

In [32]:
pd.Timestamp('1/1/2015')  ## month-day-year work

Timestamp('2015-01-01 00:00:00')

In [33]:
pd.Timestamp('19/12/2015')  ## day-month-year work, the day should be greater than 12

Timestamp('2015-12-19 00:00:00')

In [35]:
pd.Timestamp('2021-03-08 08:35:15')  ## pd.Timestamp() with date and time

Timestamp('2021-03-08 08:35:15')

In [37]:
pd.Timestamp('2021-03-08 06:13:29 PM')  ## evening time add PM/AM

Timestamp('2021-03-08 18:13:29')

In [39]:
## feed in Python datetime
pd.Timestamp(dt.date(2015, 1, 1))

Timestamp('2015-01-01 00:00:00')

In [41]:
pd.Timestamp(dt.datetime(2015, 9, 23, 15, 30, 59))

Timestamp('2015-09-23 15:30:59')

### The pandas DatetimeIndex Object

In [47]:
## DateTimeIndex is a collection of pandas Timestamp object
dates = ['2016-01-02', '2016-04-12', '2009-09-07']   ## The format can be with slashes
pd.DatetimeIndex(dates)   ## First it converts the strings into pandas Timestamps and then store in new object DateTimeIndex

DatetimeIndex(['2016-01-02', '2016-04-12', '2009-09-07'], dtype='datetime64[ns]', freq=None)

In [58]:
dates = [dt.date(2016, 1, 10), dt.date(1994, 6, 13), dt.datetime(2003, 12, 29, 10, 59, 59)]
dtIndex = pd.DatetimeIndex(dates)  ## all converts to datetime
pd.DatetimeIndex(dates)

DatetimeIndex(['2016-01-10 00:00:00', '1994-06-13 00:00:00',
               '2003-12-29 10:59:59'],
              dtype='datetime64[ns]', freq=None)

In [59]:
values = [100, 200, 300]
pd.Series(data = values, index = dtIndex)

2016-01-10 00:00:00    100
1994-06-13 00:00:00    200
2003-12-29 10:59:59    300
dtype: int64

### The pd.to_datetime() Method

In [64]:
## pd.to_datetime() function directly called on pandas library and it converts an existing data object into a panda's timestamp object
pd.to_datetime('2001-04-19 03:15:59')  ## can directly pass string
pd.to_datetime(dt.date(2015, 1, 1))  ## can pass python date
pd.to_datetime(dt.datetime(2015, 1, 1, 14, 35, 20))  ## can pass python datetime
pd.to_datetime(['2015-01-03', '2014/02/08', '2016', 'July 4th, 1996']) ## can pass a list with multiple formats of times

DatetimeIndex(['2015-01-03', '2014-02-08', '2016-01-01', '1996-07-04'], dtype='datetime64[ns]', freq=None)

In [66]:
times = pd.Series(data = ['2015-01-03', '2014/02/08', '2016', 'July 4th, 1996'])  ## output a string series
times 

0        2015-01-03
1        2014/02/08
2              2016
3    July 4th, 1996
dtype: object

In [67]:
pd.to_datetime(times)  ## can also pass pandas Series into .to_datetime() function, format changes

0   2015-01-03
1   2014-02-08
2   2016-01-01
3   1996-07-04
dtype: datetime64[ns]

In [69]:
## examples of bad data/funky series
dates = pd.Series(data = ['July 4th, 1996', '10/04/1991', 'Hello', '2015-02-31'])  ## string series
dates 

0    July 4th, 1996
1        10/04/1991
2             Hello
3        2015-02-31
dtype: object

In [70]:
pd.to_datetime(dates)   ## gives error message of unknown string format

ValueError: ('Unknown string format:', 'Hello')

In [71]:
## parameter errors = 'raise' means if encountered an error, raise it to the user, 
## errors = 'coerce' means when encountered an error, the value will be shown as NaT(Not a Time), just like NaN
pd.to_datetime(dates, errors = 'coerce')

0   1996-07-04
1   1991-10-04
2          NaT
3          NaT
dtype: datetime64[ns]

In [74]:
## Another thing in pd.to_datetime() is store time in Unix(seconds since Jan 1st, 1970)
## unit = 's' stands for seconds, pandas will know it is Unix time
pd.to_datetime([1349720105, 1349806505, 1349892905, 1349979305, 1350065705], unit = 's' )

DatetimeIndex(['2012-10-08 18:15:05', '2012-10-09 18:15:05',
               '2012-10-10 18:15:05', '2012-10-11 18:15:05',
               '2012-10-12 18:15:05'],
              dtype='datetime64[ns]', freq=None)

### Create Range of Dates with the pd.date_range() Method, Part 1

In [3]:
## pd.date_range() function directly called on the pandas library and generates a time series index
## parameters: start = , end = , periods = must have at least two of these three, freq = 'D' default is set to days
times = pd.date_range(start = '2016-01-01', end = '2016-01-10', freq = 'D')
times

DatetimeIndex(['2016-01-01', '2016-01-02', '2016-01-03', '2016-01-04',
               '2016-01-05', '2016-01-06', '2016-01-07', '2016-01-08',
               '2016-01-09', '2016-01-10'],
              dtype='datetime64[ns]', freq='D')

In [4]:
times[0]  ## times is a series/date time index and ready to go

Timestamp('2016-01-01 00:00:00', freq='D')

In [5]:
pd.date_range(start = '2016-01-01', end = '2016-01-10', freq = '2D')  ## freq = '2D' is the interval of two days
## freq = 'B' means Business days and will exclude weekends

DatetimeIndex(['2016-01-01', '2016-01-03', '2016-01-05', '2016-01-07',
               '2016-01-09'],
              dtype='datetime64[ns]', freq='2D')

In [6]:
pd.date_range(start = '2016-01-01', end = '2016-01-10', freq = 'B')

DatetimeIndex(['2016-01-01', '2016-01-04', '2016-01-05', '2016-01-06',
               '2016-01-07', '2016-01-08'],
              dtype='datetime64[ns]', freq='B')

In [7]:
pd.date_range(start = '2016-01-01', end = '2016-01-15', freq = 'W')
## freq = 'W' means week, the default is going to look for each Sunday in each week, W-SUN

DatetimeIndex(['2016-01-03', '2016-01-10'], dtype='datetime64[ns]', freq='W-SUN')

In [8]:
pd.date_range(start = '2016-01-01', end = '2016-01-15', freq = 'W-fri')
pd.date_range(start = '2016-01-01', end = '2016-01-15', freq = 'W-FRI')

DatetimeIndex(['2016-01-01', '2016-01-08', '2016-01-15'], dtype='datetime64[ns]', freq='W-FRI')

In [9]:
pd.date_range(start = '2016-01-01', end = '2016-01-15', freq = 'H')
## freq = 'H' means hour from the start of start day to the start of end day

DatetimeIndex(['2016-01-01 00:00:00', '2016-01-01 01:00:00',
               '2016-01-01 02:00:00', '2016-01-01 03:00:00',
               '2016-01-01 04:00:00', '2016-01-01 05:00:00',
               '2016-01-01 06:00:00', '2016-01-01 07:00:00',
               '2016-01-01 08:00:00', '2016-01-01 09:00:00',
               ...
               '2016-01-14 15:00:00', '2016-01-14 16:00:00',
               '2016-01-14 17:00:00', '2016-01-14 18:00:00',
               '2016-01-14 19:00:00', '2016-01-14 20:00:00',
               '2016-01-14 21:00:00', '2016-01-14 22:00:00',
               '2016-01-14 23:00:00', '2016-01-15 00:00:00'],
              dtype='datetime64[ns]', length=337, freq='H')

In [10]:
pd.date_range(start = '2016-01-01', end = '2016-01-15', freq = '6H')  ## 6 hours interval

DatetimeIndex(['2016-01-01 00:00:00', '2016-01-01 06:00:00',
               '2016-01-01 12:00:00', '2016-01-01 18:00:00',
               '2016-01-02 00:00:00', '2016-01-02 06:00:00',
               '2016-01-02 12:00:00', '2016-01-02 18:00:00',
               '2016-01-03 00:00:00', '2016-01-03 06:00:00',
               '2016-01-03 12:00:00', '2016-01-03 18:00:00',
               '2016-01-04 00:00:00', '2016-01-04 06:00:00',
               '2016-01-04 12:00:00', '2016-01-04 18:00:00',
               '2016-01-05 00:00:00', '2016-01-05 06:00:00',
               '2016-01-05 12:00:00', '2016-01-05 18:00:00',
               '2016-01-06 00:00:00', '2016-01-06 06:00:00',
               '2016-01-06 12:00:00', '2016-01-06 18:00:00',
               '2016-01-07 00:00:00', '2016-01-07 06:00:00',
               '2016-01-07 12:00:00', '2016-01-07 18:00:00',
               '2016-01-08 00:00:00', '2016-01-08 06:00:00',
               '2016-01-08 12:00:00', '2016-01-08 18:00:00',
               '2016-01-

In [11]:
pd.date_range(start = '2016-01-01', end = '2016-12-31', freq = 'M')
## freq = 'M' will give us the month's end day

DatetimeIndex(['2016-01-31', '2016-02-29', '2016-03-31', '2016-04-30',
               '2016-05-31', '2016-06-30', '2016-07-31', '2016-08-31',
               '2016-09-30', '2016-10-31', '2016-11-30', '2016-12-31'],
              dtype='datetime64[ns]', freq='M')

In [12]:
pd.date_range(start = '2016-01-01', end = '2016-12-31', freq = 'MS')
## freq = 'MS' means month start, give the first day of each month in the range

DatetimeIndex(['2016-01-01', '2016-02-01', '2016-03-01', '2016-04-01',
               '2016-05-01', '2016-06-01', '2016-07-01', '2016-08-01',
               '2016-09-01', '2016-10-01', '2016-11-01', '2016-12-01'],
              dtype='datetime64[ns]', freq='MS')

In [13]:
pd.date_range(start = '2016-01-01', end = '2020-01-01', freq = 'A')
## freq = 'A' will show the year end within our date range, default is freq = 'A-DEC'

DatetimeIndex(['2016-12-31', '2017-12-31', '2018-12-31', '2019-12-31'], dtype='datetime64[ns]', freq='A-DEC')

### Create Range of Dates with the pd.date_range() Method, Part II

In [14]:
## use start =  and periods =  parameters periods =  means the number of results we want to get
pd.date_range(start = '2012-09-09', periods = 25, freq = 'D')  ## it has 25 values

DatetimeIndex(['2012-09-09', '2012-09-10', '2012-09-11', '2012-09-12',
               '2012-09-13', '2012-09-14', '2012-09-15', '2012-09-16',
               '2012-09-17', '2012-09-18', '2012-09-19', '2012-09-20',
               '2012-09-21', '2012-09-22', '2012-09-23', '2012-09-24',
               '2012-09-25', '2012-09-26', '2012-09-27', '2012-09-28',
               '2012-09-29', '2012-09-30', '2012-10-01', '2012-10-02',
               '2012-10-03'],
              dtype='datetime64[ns]', freq='D')

### Create Range of Dates with the pd.date_range() Method, Part III

In [15]:
## use end =  and periods =  parameters
pd.date_range(end = '1999-12-31', periods = 20, freq = 'D')  ## backward extract date

DatetimeIndex(['1999-12-12', '1999-12-13', '1999-12-14', '1999-12-15',
               '1999-12-16', '1999-12-17', '1999-12-18', '1999-12-19',
               '1999-12-20', '1999-12-21', '1999-12-22', '1999-12-23',
               '1999-12-24', '1999-12-25', '1999-12-26', '1999-12-27',
               '1999-12-28', '1999-12-29', '1999-12-30', '1999-12-31'],
              dtype='datetime64[ns]', freq='D')

### The .dt Accessor

In [18]:
## .dt accessor working on a series of datetimes is very similar to .str accessor when working on series of text data
bunch_of_dates = pd.date_range(start='2000-01-01', end = '2010-12-31', freq = '24D')
bunch_of_dates

DatetimeIndex(['2000-01-01', '2000-01-25', '2000-02-18', '2000-03-13',
               '2000-04-06', '2000-04-30', '2000-05-24', '2000-06-17',
               '2000-07-11', '2000-08-04',
               ...
               '2010-05-20', '2010-06-13', '2010-07-07', '2010-07-31',
               '2010-08-24', '2010-09-17', '2010-10-11', '2010-11-04',
               '2010-11-28', '2010-12-22'],
              dtype='datetime64[ns]', length=168, freq='24D')

In [19]:
s = pd.Series(bunch_of_dates)
s.head(3)

0   2000-01-01
1   2000-01-25
2   2000-02-18
dtype: datetime64[ns]

In [24]:
## We can not directly call attribute on the datetime series, we need to use .dt.attribute
s.dt.day.head(3)  ## can be used in filtration process or business logic

0     1
1    25
2    18
dtype: int64

In [25]:
s.dt.month.value_counts()

11    15
8     15
7     15
4     15
1     15
12    14
5     14
3     14
10    13
9     13
6     13
2     12
dtype: int64

In [30]:
s.dt.weekday_name.head(3)  ## Monday starts from 0

0    Saturday
1     Tuesday
2      Friday
dtype: object

In [32]:
## The attribute that has boolean output
s[s.dt.is_quarter_start==True]

0     2000-01-01
19    2001-04-01
38    2002-07-01
137   2009-01-01
dtype: datetime64[ns]

In [33]:
s[s.dt.is_month_start == True]

0     2000-01-01
19    2001-04-01
38    2002-07-01
104   2006-11-01
109   2007-03-01
137   2009-01-01
142   2009-05-01
dtype: datetime64[ns]

### Install pandas-datareader Library

In [36]:
from pandas_datareader import data

In [65]:
## parameters: name =  put the company name you want to pull for(string stock symbol), start =  and end =  is the date rnage
## data_source = where we want to pull from, eg Yahoo, Google
start = '2010-01-01'
end = '2017-12-31'
company = 'MSFT'
stocks = data.DataReader(name = company, data_source = 'google', start = start, end = end)   ## googe has deprecated API
stocks = data.DataReader(start = start, end = end, name = company, data_source = 'quandl', access_key = 'RJyax6kGTkasPdspoyAc')
stocks.head(3)   ## data.DataReader returns a dataframe with datetime index
## also try yahoo and iex and quandl

SSLError: ("bad handshake: Error([('SSL routines', 'SSL23_GET_SERVER_HELLO', 'unknown protocol')],)",)

In [129]:
stocks = pd.read_csv('MSFT.csv', index_col = 'Date').drop('Adj Close', axis = 'columns')
stocks.head(3)
stocks.index = pd.DatetimeIndex(stocks.index)
stocks.index

DatetimeIndex(['2010-01-04', '2010-01-05', '2010-01-06', '2010-01-07',
               '2010-01-08', '2010-01-11', '2010-01-12', '2010-01-13',
               '2010-01-14', '2010-01-15',
               ...
               '2017-12-15', '2017-12-18', '2017-12-19', '2017-12-20',
               '2017-12-21', '2017-12-22', '2017-12-26', '2017-12-27',
               '2017-12-28', '2017-12-29'],
              dtype='datetime64[ns]', name='Date', length=2013, freq=None)

In [77]:
stocks.values
stocks.columns

Index(['Open', 'High', 'Low', 'Close', 'Volume'], dtype='object')

In [78]:
stocks.axes

[DatetimeIndex(['2010-01-04', '2010-01-05', '2010-01-06', '2010-01-07',
                '2010-01-08', '2010-01-11', '2010-01-12', '2010-01-13',
                '2010-01-14', '2010-01-15',
                ...
                '2017-12-15', '2017-12-18', '2017-12-19', '2017-12-20',
                '2017-12-21', '2017-12-22', '2017-12-26', '2017-12-27',
                '2017-12-28', '2017-12-29'],
               dtype='datetime64[ns]', name='Date', length=2013, freq=None),
 Index(['Open', 'High', 'Low', 'Close', 'Volume'], dtype='object')]

### Selecting Rows from a DataFrame with a DatetimeIndex

In [87]:
## manipulate the dataframes when index is DatetimeIndex
stocks.loc['2014-03-04']
## stocks.loc['2010-01-01']  stock market is closed on holiday this will give an error
## stocks.ix['2014-03-04']
stocks.iloc[300]
## stocks.ix[300]

Open      2.549000e+01
High      2.576000e+01
Low       2.535000e+01
Close     2.569000e+01
Volume    5.447340e+07
Name: 2011-03-14 00:00:00, dtype: float64

In [88]:
stocks.loc['2013-10-01' : '2013-10-07']  ## pull a sequence of dates

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2013-10-01,33.349998,33.610001,33.299999,33.580002,36718700
2013-10-02,33.360001,34.029999,33.290001,33.919998,46946800
2013-10-03,33.880001,34.0,33.419998,33.860001,38703800
2013-10-04,33.689999,33.990002,33.619999,33.880001,33008100
2013-10-07,33.599998,33.709999,33.200001,33.299999,35069300


In [130]:
## pull information on specific date, pd.DateOffset(years = 1), since the start date, add one year interval
birthdays = pd.date_range(start = '1991-04-12', end = '2017-12-31', freq = pd.DateOffset(years = 1))
birthdays

DatetimeIndex(['1991-04-12', '1992-04-12', '1993-04-12', '1994-04-12',
               '1995-04-12', '1996-04-12', '1997-04-12', '1998-04-12',
               '1999-04-12', '2000-04-12', '2001-04-12', '2002-04-12',
               '2003-04-12', '2004-04-12', '2005-04-12', '2006-04-12',
               '2007-04-12', '2008-04-12', '2009-04-12', '2010-04-12',
               '2011-04-12', '2012-04-12', '2013-04-12', '2014-04-12',
               '2015-04-12', '2016-04-12', '2017-04-12'],
              dtype='datetime64[ns]', freq='<DateOffset: years=1>')

In [132]:
stocks.loc[stocks.index.isin(birthdays)]  ##It extracts the stock information on each year of my birthday by using .isin() function
stocks[stocks.index.isin(birthdays)]

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2010-04-12,30.25,30.49,30.209999,30.32,37068800
2011-04-12,25.83,25.85,25.549999,25.639999,36920400
2012-04-12,30.48,31.040001,30.42,30.98,38304000
2013-04-12,28.85,29.02,28.66,28.790001,62886300
2016-04-12,54.369999,54.779999,53.759998,54.650002,24944300
2017-04-12,65.419998,65.510002,65.110001,65.230003,17108500


###  Timestamp Objet Attributes

In [104]:
stocks.head(2)

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2010-01-04,30.620001,31.1,30.59,30.950001,38409100
2010-01-05,30.85,31.1,30.639999,30.959999,49749600


In [119]:
someday = stocks.index[500]
somedays = stocks.index
somedays.day
somedays.month
somedays.year
somedays.weekday_name
somedays.is_month_end
somedays.is_month_start

array([False, False, False, ..., False, False, False])

In [133]:
## Build a new column based on the manipulation on index
stocks.insert(loc = 0, column = 'Day of Week', value = stocks.index.weekday_name)

In [134]:
stocks.head(3)

Unnamed: 0_level_0,Day of Week,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-01-04,Monday,30.620001,31.1,30.59,30.950001,38409100
2010-01-05,Tuesday,30.85,31.1,30.639999,30.959999,49749600
2010-01-06,Wednesday,30.879999,31.08,30.52,30.77,58182400


In [135]:
stocks.insert(loc = 1, column = 'Is Start of Month', value = stocks.index.is_month_start)
stocks.head(3)

Unnamed: 0_level_0,Day of Week,Is Start of Month,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2010-01-04,Monday,False,30.620001,31.1,30.59,30.950001,38409100
2010-01-05,Tuesday,False,30.85,31.1,30.639999,30.959999,49749600
2010-01-06,Wednesday,False,30.879999,31.08,30.52,30.77,58182400


In [138]:
stocks[stocks['Is Start of Month']].head(3)

Unnamed: 0_level_0,Day of Week,Is Start of Month,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2010-02-01,Monday,True,28.389999,28.48,27.92,28.41,85931100
2010-03-01,Monday,True,28.77,29.049999,28.530001,29.02,43805400
2010-04-01,Thursday,True,29.35,29.540001,28.620001,29.16,74768100


### The .truncate() Method

In [147]:
## .truncate() function can be called on dataframes or series for slicing operations with a datetime index
stocks = pd.read_csv('MSFT.csv', index_col = 'Date').drop('Adj Close', axis = 'columns')
stocks.index = pd.DatetimeIndex(stocks.index)
stocks.head(1)

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2010-01-04,30.620001,31.1,30.59,30.950001,38409100


In [148]:
## parameters: before =  after =  are the 'start date' and 'end date'
stocks.truncate(before = '2011-02-05', after = '2011-02-28')  ## extract the dataframe, no matter start or end date exist
## the same as stocks.loc['2011-02-05':'2011-02-28']

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2011-02-07,27.799999,28.34,27.790001,28.200001,68980900
2011-02-08,28.1,28.34,28.049999,28.280001,34904200
2011-02-09,28.190001,28.26,27.91,27.969999,52905100
2011-02-10,27.93,27.940001,27.290001,27.5,76672400
2011-02-11,27.76,27.809999,27.07,27.25,83939700
2011-02-14,27.209999,27.27,26.950001,27.23,56766200
2011-02-15,27.040001,27.33,26.950001,26.959999,44116500
2011-02-16,27.049999,27.07,26.6,27.02,70817900
2011-02-17,26.969999,27.370001,26.91,27.209999,57207300
2011-02-18,27.129999,27.209999,26.99,27.059999,68667800


### pd.DateOffset Objects

In [None]:
## pd.DateOffset is used to modify existing times, add and subtract
## dt.datetime.now() get the current time, up to now
data.DataReader(name = 'GOOG', data_source = 'google', start = dat.date(2000, 1, 1), end = dt.datetime.now())

In [152]:
stocks = pd.read_csv('GOOG.csv', index_col = 'Date').drop('Adj Close', axis = 'columns')
stocks.index = pd.DatetimeIndex(stocks.index)
stocks.head(3)

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2004-08-19,49.813286,51.835709,47.800831,49.982655,44871300
2004-08-20,50.316402,54.336334,50.062355,53.95277,22942800
2004-08-23,55.168217,56.528118,54.321388,54.495735,18342800


In [155]:
## pd.DateOffset generates an amount of time(length of time, duration), add/subtract the duration from existing time
## stocks.index + 5 ## get an error
stocks.index + pd.DateOffset(days = 5)  ## overwrite needs to be reassign

DatetimeIndex(['2004-08-24', '2004-08-25', '2004-08-28', '2004-08-29',
               '2004-08-30', '2004-08-31', '2004-09-01', '2004-09-04',
               '2004-09-05', '2004-09-06',
               ...
               '2019-08-05', '2019-08-06', '2019-08-07', '2019-08-10',
               '2019-08-11', '2019-08-12', '2019-08-13', '2019-08-14',
               '2019-08-17', '2019-08-18'],
              dtype='datetime64[ns]', name='Date', length=3772, freq=None)

In [156]:
stocks.index + pd.DateOffset(weeks = 2)

DatetimeIndex(['2004-09-02', '2004-09-03', '2004-09-06', '2004-09-07',
               '2004-09-08', '2004-09-09', '2004-09-10', '2004-09-13',
               '2004-09-14', '2004-09-15',
               ...
               '2019-08-14', '2019-08-15', '2019-08-16', '2019-08-19',
               '2019-08-20', '2019-08-21', '2019-08-22', '2019-08-23',
               '2019-08-26', '2019-08-27'],
              dtype='datetime64[ns]', name='Date', length=3772, freq=None)

In [158]:
stocks.index - pd.DateOffset(months = 3)
stocks.index + pd.DateOffset(years = 1)

DatetimeIndex(['2005-08-19', '2005-08-20', '2005-08-23', '2005-08-24',
               '2005-08-25', '2005-08-26', '2005-08-27', '2005-08-30',
               '2005-08-31', '2005-09-01',
               ...
               '2020-07-31', '2020-08-01', '2020-08-02', '2020-08-05',
               '2020-08-06', '2020-08-07', '2020-08-08', '2020-08-09',
               '2020-08-12', '2020-08-13'],
              dtype='datetime64[ns]', name='Date', length=3772, freq=None)

In [159]:
## adding/subtracting hours, the date will be converted to datetime if it is date index
stocks.index + pd.DateOffset(hours = 6)

DatetimeIndex(['2004-08-19 06:00:00', '2004-08-20 06:00:00',
               '2004-08-23 06:00:00', '2004-08-24 06:00:00',
               '2004-08-25 06:00:00', '2004-08-26 06:00:00',
               '2004-08-27 06:00:00', '2004-08-30 06:00:00',
               '2004-08-31 06:00:00', '2004-09-01 06:00:00',
               ...
               '2019-07-31 06:00:00', '2019-08-01 06:00:00',
               '2019-08-02 06:00:00', '2019-08-05 06:00:00',
               '2019-08-06 06:00:00', '2019-08-07 06:00:00',
               '2019-08-08 06:00:00', '2019-08-09 06:00:00',
               '2019-08-12 06:00:00', '2019-08-13 06:00:00'],
              dtype='datetime64[ns]', name='Date', length=3772, freq=None)

In [161]:
## add/subtract multiple time structures
stocks.index - pd.DateOffset(years = 1, months = 3, days = 10)   ## order does not matter

DatetimeIndex(['2003-05-09', '2003-05-10', '2003-05-13', '2003-05-14',
               '2003-05-15', '2003-05-16', '2003-05-17', '2003-05-20',
               '2003-05-21', '2003-05-22',
               ...
               '2018-04-20', '2018-04-21', '2018-04-22', '2018-04-25',
               '2018-04-26', '2018-04-27', '2018-04-28', '2018-04-29',
               '2018-05-02', '2018-05-03'],
              dtype='datetime64[ns]', name='Date', length=3772, freq=None)

### More Fun with pd.DateOffset Objects

In [162]:
## previously we add/subtract a specific duration to all the dates, now we can use methods to operate differently on different date
## It is useful when we want to round the date to end/begining
stocks.index + pd.tseries.offsets.MonthEnd()
## the date will be converted to the end of each month, however, 2004/08/31 will be converted to 2004/09/30 because reach threshold

DatetimeIndex(['2004-08-31', '2004-08-31', '2004-08-31', '2004-08-31',
               '2004-08-31', '2004-08-31', '2004-08-31', '2004-08-31',
               '2004-09-30', '2004-09-30',
               ...
               '2019-08-31', '2019-08-31', '2019-08-31', '2019-08-31',
               '2019-08-31', '2019-08-31', '2019-08-31', '2019-08-31',
               '2019-08-31', '2019-08-31'],
              dtype='datetime64[ns]', name='Date', length=3772, freq=None)

In [163]:
stocks.index - pd.tseries.offsets.MonthEnd()  ## all be converted to the end of last month including 2004-08-31

DatetimeIndex(['2004-07-31', '2004-07-31', '2004-07-31', '2004-07-31',
               '2004-07-31', '2004-07-31', '2004-07-31', '2004-07-31',
               '2004-07-31', '2004-08-31',
               ...
               '2019-06-30', '2019-07-31', '2019-07-31', '2019-07-31',
               '2019-07-31', '2019-07-31', '2019-07-31', '2019-07-31',
               '2019-07-31', '2019-07-31'],
              dtype='datetime64[ns]', name='Date', length=3772, freq=None)

In [170]:
stocks.index + pd.tseries.offsets.MonthBegin()  ## go to the next month beginning
pd.Series(stocks.index - pd.tseries.offsets.MonthBegin()).head(10)  ## here same situation as the + .MonthEnd() function

0   2004-08-01
1   2004-08-01
2   2004-08-01
3   2004-08-01
4   2004-08-01
5   2004-08-01
6   2004-08-01
7   2004-08-01
8   2004-08-01
9   2004-08-01
Name: Date, dtype: datetime64[ns]

In [171]:
from pandas.tseries.offsets import *  ## means import every function/attribute under pandas.tseries.offsets

In [172]:
## so that we can write code in this way:
stocks.index - MonthEnd()

DatetimeIndex(['2004-07-31', '2004-07-31', '2004-07-31', '2004-07-31',
               '2004-07-31', '2004-07-31', '2004-07-31', '2004-07-31',
               '2004-07-31', '2004-08-31',
               ...
               '2019-06-30', '2019-07-31', '2019-07-31', '2019-07-31',
               '2019-07-31', '2019-07-31', '2019-07-31', '2019-07-31',
               '2019-07-31', '2019-07-31'],
              dtype='datetime64[ns]', name='Date', length=3772, freq=None)

In [176]:
stocks.index + BMonthEnd()
stocks.index + QuarterEnd()
stocks.index - QuarterEnd()
stocks.index + QuarterBegin()   ## does not make total sense
stocks.index - QuarterBegin()

DatetimeIndex(['2004-06-01', '2004-06-01', '2004-06-01', '2004-06-01',
               '2004-06-01', '2004-06-01', '2004-06-01', '2004-06-01',
               '2004-06-01', '2004-06-01',
               ...
               '2019-06-01', '2019-06-01', '2019-06-01', '2019-06-01',
               '2019-06-01', '2019-06-01', '2019-06-01', '2019-06-01',
               '2019-06-01', '2019-06-01'],
              dtype='datetime64[ns]', name='Date', length=3772, freq=None)

In [180]:
stocks.index + YearEnd()
stocks.index - YearEnd()
stocks.index + YearBegin()
stocks.index - YearBegin()

DatetimeIndex(['2004-01-01', '2004-01-01', '2004-01-01', '2004-01-01',
               '2004-01-01', '2004-01-01', '2004-01-01', '2004-01-01',
               '2004-01-01', '2004-01-01',
               ...
               '2019-01-01', '2019-01-01', '2019-01-01', '2019-01-01',
               '2019-01-01', '2019-01-01', '2019-01-01', '2019-01-01',
               '2019-01-01', '2019-01-01'],
              dtype='datetime64[ns]', name='Date', length=3772, freq=None)

### The Timedelta Object

In [183]:
## Timedelta is the time distance or duration
## two ways to create Timedelta
timeA = pd.Timestamp('2016-03-31 04:35:16 PM')
timeB = pd.Timestamp('2016-03-20 02:16:49 AM')
timeA - timeB  ## first way to create a Timedelta

Timedelta('11 days 14:18:27')

In [185]:
type(timeA - timeB)   ## This is a Timedelta object

pandas._libs.tslibs.timedeltas.Timedelta

In [186]:
timeB - timeA   ## negative values of duration

Timedelta('-12 days +09:41:33')

In [188]:
## Use the Timedelta() function on pandas library to create Timedelta obj, pd.Timedelta() works similar to pd.DateOffset()
pd.Timedelta(days = 3, hours = 12, minutes = 45, weeks = 8)  ## years =  does not work for pd.Timedelta()

Timedelta('59 days 12:45:00')

In [189]:
## pd.Timedelta() also can be passed with strings
pd.Timedelta('5 minutes')

Timedelta('0 days 00:05:00')

In [197]:
pd.Timedelta('14 days 12 minutes 6 hours 49 seconds')

Timedelta('14 days 06:12:49')

### Timedeltas in a Dataset

In [205]:
shipping = pd.read_csv('ecommerce.csv', index_col = 'ID', parse_dates = ['order_date', 'delivery_date'])
shipping.head(3)

Unnamed: 0_level_0,order_date,delivery_date
ID,Unnamed: 1_level_1,Unnamed: 2_level_1
1,1998-05-24,1999-02-05
2,1992-04-22,1998-03-06
4,1991-02-10,1992-08-26


In [206]:
shipping['Delivery Time'] = shipping['delivery_date'] - shipping['order_date']
shipping.head(3)

Unnamed: 0_level_0,order_date,delivery_date,Delivery Time
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,1998-05-24,1999-02-05,257 days
2,1992-04-22,1998-03-06,2144 days
4,1991-02-10,1992-08-26,563 days


In [210]:
## datetimes and Timedelta can do the mathematical operations
shipping['Twice As Long'] = shipping['delivery_date'] + shipping['Delivery Time']

In [211]:
shipping.head(3)

Unnamed: 0_level_0,order_date,delivery_date,Delivery Time,Twice As Long
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,1998-05-24,1999-02-05,257 days,1999-10-20
2,1992-04-22,1998-03-06,2144 days,2004-01-18
4,1991-02-10,1992-08-26,563 days,1994-03-12


In [212]:
shipping.dtypes

order_date        datetime64[ns]
delivery_date     datetime64[ns]
Delivery Time    timedelta64[ns]
Twice As Long     datetime64[ns]
dtype: object

In [214]:
shipping[shipping['Delivery Time'] > '365 days'].head(3)

Unnamed: 0_level_0,order_date,delivery_date,Delivery Time,Twice As Long
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2,1992-04-22,1998-03-06,2144 days,2004-01-18
4,1991-02-10,1992-08-26,563 days,1994-03-12
5,1992-07-21,1997-11-20,1948 days,2003-03-22


In [216]:
## Mathematical operations also can apply on Timedelta objects
shipping['Delivery Time'].max()
min(shipping['Delivery Time'])

Timedelta('8 days 00:00:00')