In [201]:
import pandas as pd
import datetime as dt
from dateutil import relativedelta
from pandas_datareader import data

## Review of python's datetime module

In [2]:
someday = dt.date(1996, 10, 20)
someday

datetime.date(1996, 10, 20)

In [3]:
dt.datetime.strptime("20-10-1996", "%d-%m-%Y").date()

datetime.date(1996, 10, 20)

In [4]:
someday.day
someday.month
someday.year
someday.max
someday.min
someday.weekday()
someday.timetuple()
someday.strftime("%Y-%m-%d")
str(someday)

'1996-10-20'

In [5]:
sometime = dt.datetime(1996, 10, 20, 8, 12, 16)
sometime

datetime.datetime(1996, 10, 20, 8, 12, 16)

In [6]:
sometime.day
sometime.month
sometime.year
sometime.hour
sometime.minute
sometime.second
sometime.microsecond
sometime.date()
sometime.strftime("%Y-%m-%d - %H:%M:%S")
str(sometime)
sometime.today()

datetime.datetime(2020, 7, 7, 13, 48, 51, 503272)

In [7]:
#Convert from date to datetime
dt.datetime.fromordinal(someday.toordinal())
dt.datetime(someday.year, someday.month, someday.day)

datetime.datetime(1996, 10, 20, 0, 0)

## The <code> pandas Timestamp </code> object

In [8]:
#Passing strings to create Timestamp object.
"""
NOTE:- Timestamp constructor can smartly recognize the string to create object from it. The only caveat is
when you pass say "11/3/2018" (dd/mm/yyy) --> Timestamp will always take first mentioned as month. (mm/dd/yyyy)
However if you pass something like (19/11/2020) --> Timestamp will correctly understand this. (dd/mm/yyyy)
"""

pd.Timestamp("20-10-1996")
pd.Timestamp("20/10/1996")
pd.Timestamp("10-20-1996")
pd.Timestamp("1996-10-20")
pd.Timestamp("10/8/2019")  #-> 10 will considered as month
pd.Timestamp("1996-10-20 8:12:18 AM")
pd.Timestamp("1996-10-20 8:12:18 PM")

Timestamp('1996-10-20 20:12:18')

In [9]:
#Creating Timestamp object from date object
pd.Timestamp(dt.date(1996, 10, 20))

Timestamp('1996-10-20 00:00:00')

In [10]:
#Creating Timestamp object from datetime object
pd.Timestamp(dt.datetime(1996, 10, 20, 8, 12, 18))

Timestamp('1996-10-20 08:12:18')

## Some attributes/methods on Pandas Timestamp
- convert timestamp back to python date object.
- convert timestamp back to python datetime object.
- check the day of month/week.

In [11]:
ts = pd.Timestamp("20-10-1996")
ts

Timestamp('1996-10-20 00:00:00')

In [12]:
ts.to_pydatetime()

datetime.datetime(1996, 10, 20, 0, 0)

In [13]:
ts.date()

datetime.date(1996, 10, 20)

In [14]:
ts.day

20

In [15]:
ts.day_name()

'Sunday'

In [16]:
ts.dayofyear

294

In [17]:
ts.is_month_end

False

In [18]:
ts.is_month_start

False

In [19]:
ts.weekday()

6

In [20]:
ts.year

1996

## Pandas <code> DateTimeIndex </code> Object
- collection of pandas Timestamp

In [21]:
dates = ["1996-10-20", "1999-8-10", "1970-5-1", "1970-1-12"]
dates = [dt.date(1996, 10, 20), dt.date(1999, 8, 10), dt.date(1970, 5, 1), dt.date(1970, 1, 12)]
pd_dt_index = pd.DatetimeIndex(dates)
pd_dt_index

DatetimeIndex(['1996-10-20', '1999-08-10', '1970-05-01', '1970-01-12'], dtype='datetime64[ns]', freq=None)

In [22]:
values = [100, 200, 300, 400]
pd.Series(data=values, index=pd_dt_index)

1996-10-20    100
1999-08-10    200
1970-05-01    300
1970-01-12    400
dtype: int64

## <code> pd.to_datetime() </code> method
- convinient method to convert strings, date, datetime objects to Timestamp.
- can used given a list, thus converting it to DatetimeIndex object.

In [23]:
pd.to_datetime("1996-10-20")
pd.to_datetime(dt.date(1996, 10, 20))
pd.to_datetime(dt.datetime(1996, 10, 20, 8, 12, 18))

#Let's provide different forms in list to convert it to DatetimeIndex
pd.to_datetime(["1996/10/20", "1999-8-12", "2010", "Jan 12th, 1970", dt.date(1970, 5, 1)]) 

DatetimeIndex(['1996-10-20', '1999-08-12', '2010-01-01', '1970-01-12',
               '1970-05-01'],
              dtype='datetime64[ns]', freq=None)

In [24]:
#Let's convert pandas Series to DatetimeIndex
times = pd.Series(["1996/10/20", "1999-8-12", "2010", "Jan 12th, 1970", dt.date(1970, 5, 1)])
times  #Check dtype is "object"

0        1996/10/20
1         1999-8-12
2              2010
3    Jan 12th, 1970
4        1970-05-01
dtype: object

In [25]:
pd.to_datetime(times)   #dtype --> datetime64[ns]

0   1996-10-20
1   1999-08-12
2   2010-01-01
3   1970-01-12
4   1970-05-01
dtype: datetime64[ns]

In [26]:
#Wrong Input data to pd.to_timestampabs
times = pd.Series(["1996/10/20", "1999-8-12", "Hello", "2019-02-31"])
pd.to_datetime(times)   #This raised an error, because by default the parameter 'errors' is set to 'raise'

ValueError: (u'Unknown string format:', 'Hello')

In [27]:
#Let's ask this function to convert the correct values to Timestamps and ignore incorrect values.
pd.to_datetime(times, errors='coerce') #This will store incorrect values as NaT -> Not a Timestamp

0   1996-10-20
1   1999-08-12
2          NaT
3          NaT
dtype: datetime64[ns]

In [28]:
#Converting unix times (in seconds) to Timestamp
pd.to_datetime([1349720105, 1349780105, 1319720105, 1349720105], unit='s')

DatetimeIndex(['2012-10-08 18:15:05', '2012-10-09 10:55:05',
               '2011-10-27 12:55:05', '2012-10-08 18:15:05'],
              dtype='datetime64[ns]', freq=None)

## Create Range of Dates with the <code> pd.date_range() </code> Method (Part -1)
- Mandatory to have either two of these three parameters:- start, end, period
- freq parameter: 
    <ol>
    <li> 'H' - Hourly frequency. Can be prefixed with a number. Eg:- '5H' - 5 hourly etc </li>
        <li> 'D' - Daily Frequency. Can be used with a numeric in start for depicitng days. Eg: '2D', '5D' differences.</li>
    <li> 'B' - Business days. Excludes weekends </li>
        <li> 'W' - Weekly Frequency. default:- 'W-SUN' Sumday to Sunday week. Can be changed to any other day to start with:- say 'W-MON', 'W-FRI' </li>
        <li> 'M' - Month end frequency. </li>
        <li> 'MS' - Month start frequency. </li>
        <li> 'A' - Annually frequency. </li>
    </ol>
- Check all aliases here:- https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#timeseries-offset-aliases    

In [29]:
pd.date_range(start = "2015-1-1", end="2015-1-10", freq = '10H') #10 hour frequency

DatetimeIndex(['2015-01-01 00:00:00', '2015-01-01 10:00:00',
               '2015-01-01 20:00:00', '2015-01-02 06:00:00',
               '2015-01-02 16:00:00', '2015-01-03 02:00:00',
               '2015-01-03 12:00:00', '2015-01-03 22:00:00',
               '2015-01-04 08:00:00', '2015-01-04 18:00:00',
               '2015-01-05 04:00:00', '2015-01-05 14:00:00',
               '2015-01-06 00:00:00', '2015-01-06 10:00:00',
               '2015-01-06 20:00:00', '2015-01-07 06:00:00',
               '2015-01-07 16:00:00', '2015-01-08 02:00:00',
               '2015-01-08 12:00:00', '2015-01-08 22:00:00',
               '2015-01-09 08:00:00', '2015-01-09 18:00:00'],
              dtype='datetime64[ns]', freq='10H')

In [30]:
pd.date_range(start = "2015-1-1", end="2015-1-10", freq = 'D')

DatetimeIndex(['2015-01-01', '2015-01-02', '2015-01-03', '2015-01-04',
               '2015-01-05', '2015-01-06', '2015-01-07', '2015-01-08',
               '2015-01-09', '2015-01-10'],
              dtype='datetime64[ns]', freq='D')

In [31]:
pd.date_range(start = "2015-1-1", end="2015-1-10", freq = '2D')

DatetimeIndex(['2015-01-01', '2015-01-03', '2015-01-05', '2015-01-07',
               '2015-01-09'],
              dtype='datetime64[ns]', freq='2D')

In [32]:
pd.date_range(start = "2015-1-1", end="2015-1-10", freq = 'B')  #3rd, 4th, 9th and 10th are weekend dates

DatetimeIndex(['2015-01-01', '2015-01-02', '2015-01-05', '2015-01-06',
               '2015-01-07', '2015-01-08', '2015-01-09'],
              dtype='datetime64[ns]', freq='B')

In [33]:
pd.date_range(start = "2015-1-1", end="2015-1-20", freq = 'W') #By - default starts from Sunday, that's why starts from 4th

DatetimeIndex(['2015-01-04', '2015-01-11', '2015-01-18'], dtype='datetime64[ns]', freq='W-SUN')

In [34]:
pd.date_range(start = "2015-1-1", end="2015-12-31", freq = 'M') #Month-end dates

DatetimeIndex(['2015-01-31', '2015-02-28', '2015-03-31', '2015-04-30',
               '2015-05-31', '2015-06-30', '2015-07-31', '2015-08-31',
               '2015-09-30', '2015-10-31', '2015-11-30', '2015-12-31'],
              dtype='datetime64[ns]', freq='M')

In [35]:
pd.date_range(start = "2015-1-1", end="2015-12-31", freq = 'MS') #Month-start dates

DatetimeIndex(['2015-01-01', '2015-02-01', '2015-03-01', '2015-04-01',
               '2015-05-01', '2015-06-01', '2015-07-01', '2015-08-01',
               '2015-09-01', '2015-10-01', '2015-11-01', '2015-12-01'],
              dtype='datetime64[ns]', freq='MS')

In [36]:
pd.date_range(start = "2015-1-1", end="2020-12-31", freq = 'A') #Annual-end dates

DatetimeIndex(['2015-12-31', '2016-12-31', '2017-12-31', '2018-12-31',
               '2019-12-31', '2020-12-31'],
              dtype='datetime64[ns]', freq='A-DEC')

In [37]:
pd.date_range(start = "2015-1-1", end="2020-12-31", freq = 'AS') #Annual-start dates

DatetimeIndex(['2015-01-01', '2016-01-01', '2017-01-01', '2018-01-01',
               '2019-01-01', '2020-01-01'],
              dtype='datetime64[ns]', freq='AS-JAN')

## Create Range of Dates with the <code> pd.date_range() </code> Method (Part -2)
- use of 'start' parameter with 'period' parameter. 
- date_range will generate 'n' Timestamps starting from start date and incrementing according to 'freq' parameter.

In [38]:
pd.date_range(start = "2015-1-1", periods = 12, freq = 'D')

DatetimeIndex(['2015-01-01', '2015-01-02', '2015-01-03', '2015-01-04',
               '2015-01-05', '2015-01-06', '2015-01-07', '2015-01-08',
               '2015-01-09', '2015-01-10', '2015-01-11', '2015-01-12'],
              dtype='datetime64[ns]', freq='D')

In [39]:
pd.date_range(start = "2015-1-1", periods = 12, freq = 'W')

DatetimeIndex(['2015-01-04', '2015-01-11', '2015-01-18', '2015-01-25',
               '2015-02-01', '2015-02-08', '2015-02-15', '2015-02-22',
               '2015-03-01', '2015-03-08', '2015-03-15', '2015-03-22'],
              dtype='datetime64[ns]', freq='W-SUN')

In [40]:
pd.date_range(start = "2015-1-1", periods = 12, freq = 'M')

DatetimeIndex(['2015-01-31', '2015-02-28', '2015-03-31', '2015-04-30',
               '2015-05-31', '2015-06-30', '2015-07-31', '2015-08-31',
               '2015-09-30', '2015-10-31', '2015-11-30', '2015-12-31'],
              dtype='datetime64[ns]', freq='M')

In [41]:
pd.date_range(start = "2015-1-1", periods = 12, freq = 'B')

DatetimeIndex(['2015-01-01', '2015-01-02', '2015-01-05', '2015-01-06',
               '2015-01-07', '2015-01-08', '2015-01-09', '2015-01-12',
               '2015-01-13', '2015-01-14', '2015-01-15', '2015-01-16'],
              dtype='datetime64[ns]', freq='B')

In [42]:
pd.date_range(start = "2015-1-1", periods = 12, freq = 'A')

DatetimeIndex(['2015-12-31', '2016-12-31', '2017-12-31', '2018-12-31',
               '2019-12-31', '2020-12-31', '2021-12-31', '2022-12-31',
               '2023-12-31', '2024-12-31', '2025-12-31', '2026-12-31'],
              dtype='datetime64[ns]', freq='A-DEC')

## Create Range of Dates with the <code> pd.date_range() </code> Method (Part -3)
- use of 'end' and 'periods' parameter.
- it moves backwards.

In [43]:
pd.date_range(end = "1999-12-31", periods = 12, freq = 'D')

DatetimeIndex(['1999-12-20', '1999-12-21', '1999-12-22', '1999-12-23',
               '1999-12-24', '1999-12-25', '1999-12-26', '1999-12-27',
               '1999-12-28', '1999-12-29', '1999-12-30', '1999-12-31'],
              dtype='datetime64[ns]', freq='D')

In [44]:
pd.date_range(end = "1999-12-31", periods = 12, freq = 'B')

DatetimeIndex(['1999-12-16', '1999-12-17', '1999-12-20', '1999-12-21',
               '1999-12-22', '1999-12-23', '1999-12-24', '1999-12-27',
               '1999-12-28', '1999-12-29', '1999-12-30', '1999-12-31'],
              dtype='datetime64[ns]', freq='B')

In [45]:
pd.date_range(end = "1999-12-31", periods = 12, freq = 'A')

DatetimeIndex(['1988-12-31', '1989-12-31', '1990-12-31', '1991-12-31',
               '1992-12-31', '1993-12-31', '1994-12-31', '1995-12-31',
               '1996-12-31', '1997-12-31', '1998-12-31', '1999-12-31'],
              dtype='datetime64[ns]', freq='A-DEC')

## The <code> .dt </code> Accessor
- this is very similar to <code> .str </code> accessor we used for Series. Basically to apply string methods on Series elements, we used <code> .str </code> accessor.
- Similarly, to use <code> DatetimeIndex </code> methods on <code> DatetimeIndex SERIES </code>, we use <code> .dt </code> accessor.

In [46]:
ts = pd.Series(pd.date_range(start = '2015-1-1', end = '2015-1-31', freq = 'D'))
ts.head()

0   2015-01-01
1   2015-01-02
2   2015-01-03
3   2015-01-04
4   2015-01-05
dtype: datetime64[ns]

In [47]:
#Let's find what week day does all timestamps in DatetimeIndex object falls.
ts.dt.dayofweek.head()

0    3
1    4
2    5
3    6
4    0
dtype: int64

In [48]:
#Let's find what week day (name) does all timestamps in DatetimeIndex object falls.
ts.dt.day_name().head()

0    Thursday
1      Friday
2    Saturday
3      Sunday
4      Monday
dtype: object

In [49]:
#Say you want a series of python datetime object from timestamp
ts.dt.to_pydatetime()

array([datetime.datetime(2015, 1, 1, 0, 0),
       datetime.datetime(2015, 1, 2, 0, 0),
       datetime.datetime(2015, 1, 3, 0, 0),
       datetime.datetime(2015, 1, 4, 0, 0),
       datetime.datetime(2015, 1, 5, 0, 0),
       datetime.datetime(2015, 1, 6, 0, 0),
       datetime.datetime(2015, 1, 7, 0, 0),
       datetime.datetime(2015, 1, 8, 0, 0),
       datetime.datetime(2015, 1, 9, 0, 0),
       datetime.datetime(2015, 1, 10, 0, 0),
       datetime.datetime(2015, 1, 11, 0, 0),
       datetime.datetime(2015, 1, 12, 0, 0),
       datetime.datetime(2015, 1, 13, 0, 0),
       datetime.datetime(2015, 1, 14, 0, 0),
       datetime.datetime(2015, 1, 15, 0, 0),
       datetime.datetime(2015, 1, 16, 0, 0),
       datetime.datetime(2015, 1, 17, 0, 0),
       datetime.datetime(2015, 1, 18, 0, 0),
       datetime.datetime(2015, 1, 19, 0, 0),
       datetime.datetime(2015, 1, 20, 0, 0),
       datetime.datetime(2015, 1, 21, 0, 0),
       datetime.datetime(2015, 1, 22, 0, 0),
       datetime.dat

## Import Financial Data using <code> pandas_datareader </code> Library

In [50]:
from pandas_datareader import data

In [56]:
stocks = data.DataReader(name = "MSFT", data_source = "yahoo", start = "2010-01-01", end = "2020-12-31")
stocks.head(3)

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2009-12-31,30.99,30.48,30.98,30.48,31929700.0,23.858992
2010-01-04,31.1,30.59,30.620001,30.950001,38409100.0,24.226894
2010-01-05,31.1,30.639999,30.85,30.959999,49749600.0,24.23472


In [62]:
stocks.values
stocks.index
stocks.columns
stocks.axes

[DatetimeIndex(['2009-12-31', '2010-01-04', '2010-01-05', '2010-01-06',
                '2010-01-07', '2010-01-08', '2010-01-11', '2010-01-12',
                '2010-01-13', '2010-01-14',
                ...
                '2020-06-22', '2020-06-23', '2020-06-24', '2020-06-25',
                '2020-06-26', '2020-06-29', '2020-06-30', '2020-07-01',
                '2020-07-02', '2020-07-06'],
               dtype='datetime64[ns]', name=u'Date', length=2645, freq=None),
 Index([u'High', u'Low', u'Open', u'Close', u'Volume', u'Adj Close'], dtype='object')]

## Selecting Rows from a <code> DataFrame </code> with <code> DatetimeIndex </code>

In [85]:
#Accessing based on the string date
stocks.loc["2010-1-4"]

High         3.110000e+01
Low          3.059000e+01
Open         3.062000e+01
Close        3.095000e+01
Volume       3.840910e+07
Adj Close    2.422689e+01
Name: 2010-01-04 00:00:00, dtype: float64

In [103]:
#Select data for entire month
stocks.loc["2010-1"].head(3)

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-01-04,31.1,30.59,30.620001,30.950001,38409100.0,24.226894
2010-01-05,31.1,30.639999,30.85,30.959999,49749600.0,24.23472
2010-01-06,31.08,30.52,30.879999,30.77,58182400.0,24.085989


In [102]:
#Select data for entire year
stocks.loc["2010"].tail(3)

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-12-29,28.120001,27.879999,27.940001,27.969999,19502500.0,22.350222
2010-12-30,28.0,27.780001,27.92,27.85,20786100.0,22.254335
2010-12-31,27.92,27.629999,27.799999,27.91,24752000.0,22.302277


In [86]:
#NOTE:- Always better to wrap dates into Timestamp
stocks.loc[pd.Timestamp("2010-1-4")]

High         3.110000e+01
Low          3.059000e+01
Open         3.062000e+01
Close        3.095000e+01
Volume       3.840910e+07
Adj Close    2.422689e+01
Name: 2010-01-04 00:00:00, dtype: float64

In [88]:
#It's going to through error, if some value does not exist in index
stocks.loc["2010-1-1"]   #Error

KeyError: u'the label [2010-1-1] is not in the [index]'

In [90]:
#Using iloc
stocks.iloc[0]

High         3.099000e+01
Low          3.048000e+01
Open         3.098000e+01
Close        3.048000e+01
Volume       3.192970e+07
Adj Close    2.385899e+01
Name: 2009-12-31 00:00:00, dtype: float64

In [94]:
#Querying multiple index values
#NOTE:- list of strings represting dates will not work in case of DatetimeIndex

stocks.loc[["2010-1-4"]] #ERROR
stocks.loc[["2010-1-4", "2010-1-5"]] #ERROR

KeyError: u"None of [['2010-1-4']] are in the [index]"

In [95]:
#NOTE:- to provide the list of dates, wrap them into Timestamp
stocks.loc[[pd.Timestamp("2010-1-4"), pd.Timestamp("2010-1-5")]]

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-01-04,31.1,30.59,30.620001,30.950001,38409100.0,24.226894
2010-01-05,31.1,30.639999,30.85,30.959999,49749600.0,24.23472


In [123]:
#NOTE: Always try to use .isin method if you want to provide list to search in Index.
#BENEFIT:- This will not raise error if some value is not present in index.
stocks.loc[stocks.index.isin(["2010-1-4", "2010-1-5"])]

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-01-04,31.1,30.59,30.620001,30.950001,38409100.0,24.226894
2010-01-05,31.1,30.639999,30.85,30.959999,49749600.0,24.23472


In [99]:
#NOTE:- If any of the values do not exist in list provided, it will raise an error.
stocks.loc[[pd.Timestamp("2010-1-4"), pd.Timestamp("2010-1-5"), pd.Timestamp("2010-1-1")]]

Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike
  


Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-01-04,31.1,30.59,30.620001,30.950001,38409100.0,24.226894
2010-01-05,31.1,30.639999,30.85,30.959999,49749600.0,24.23472
2010-01-01,,,,,,


In [105]:
#Slicing
stocks.loc["2013-10-01" : "2013-10-07"]
stocks.truncate(before="2013-10-01", after="2013-10-07")

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2013-10-01,33.610001,33.299999,33.349998,33.580002,36718700.0,28.962389
2013-10-02,34.029999,33.290001,33.360001,33.919998,46946800.0,29.255632
2013-10-03,34.0,33.419998,33.880001,33.860001,38703800.0,29.203878
2013-10-04,33.990002,33.619999,33.689999,33.880001,33008100.0,29.22113
2013-10-07,33.709999,33.200001,33.599998,33.299999,35069300.0,28.720888


In [137]:
#Find stock prices on all your Birthday dates
mask = stocks.index.isin(
    pd.date_range(start="2010-10-20", periods = 10, freq= pd.DateOffset(years = 1))
    )
stocks.loc[mask]

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-10-20,25.4,25.1,25.26,25.309999,56283600.0,20.101162
2011-10-20,27.34,26.4,27.26,27.040001,76300200.0,22.015331
2014-10-20,44.139999,42.810001,43.060001,44.080002,34527900.0,39.109131
2015-10-20,47.810001,47.02,47.439999,47.77,30802200.0,43.518093
2016-10-20,57.52,56.66,57.5,57.25,49455600.0,53.584614
2017-10-20,78.970001,78.220001,78.32,78.809998,22866400.0,75.541382


## <code> Timestamp </code> Object Attributes and Methods

In [144]:
stocks = data.DataReader(name = "MSFT", data_source = "yahoo", start = "2010-01-01", end = "2020-12-31")
stocks.head(3)

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2009-12-31,30.99,30.48,30.98,30.48,31929700.0,23.858992
2010-01-04,31.1,30.59,30.620001,30.950001,38409100.0,24.226894
2010-01-05,31.1,30.639999,30.85,30.959999,49749600.0,24.23472


In [146]:
someday = stocks.index[500]
someday

Timestamp('2011-12-23 00:00:00')

In [162]:
#Attributes
someday.day
someday.month
someday.year
someday.dayofweek
someday.is_month_start
someday.is_quarter_start
someday.is_month_end

False

In [164]:
#Methods
someday.day_name()
someday.month_name()

'December'

In [180]:
#Methods on DatetimeIndex object
stocks.index  #This is a DatetimeIndex object
stocks.index.is_month_end
pd.Series(stocks.index).dt.day_name()  #For Series object use .dt accessor
stocks.index.day_name()

Index([u'Thursday', u'Monday', u'Tuesday', u'Wednesday', u'Thursday',
       u'Friday', u'Monday', u'Tuesday', u'Wednesday', u'Thursday',
       ...
       u'Monday', u'Tuesday', u'Wednesday', u'Thursday', u'Friday', u'Monday',
       u'Tuesday', u'Wednesday', u'Thursday', u'Monday'],
      dtype='object', name=u'Date', length=2645)

In [182]:
#Insert Day of Week column
stocks.insert(0, column="Day of Week", value = stocks.index.day_name())

In [186]:
#Find all the stock prices on Tuesdays
stocks.loc[stocks["Day of Week"] == "Tuesday"]
stocks[stocks.index.day_name() == "Tuesday"].head()

Unnamed: 0_level_0,Day of Week,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2010-01-05,Tuesday,31.1,30.639999,30.85,30.959999,49749600.0,24.23472
2010-01-12,Tuesday,30.4,29.91,30.15,30.07,65912100.0,23.538054
2010-01-19,Tuesday,31.24,30.68,30.75,31.1,46575700.0,24.344313
2010-01-26,Tuesday,29.85,29.09,29.200001,29.5,66639900.0,23.091867
2010-02-02,Tuesday,28.5,28.139999,28.370001,28.459999,54413700.0,22.277782


## The <code> pd.DateOffset </code> Object

In [187]:
stocks = data.DataReader(name = "MSFT", data_source = "yahoo", start = "2010-01-01", end = "2020-12-31")
stocks.head(3)

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2009-12-31,30.99,30.48,30.98,30.48,31929700.0,23.858992
2010-01-04,31.1,30.59,30.620001,30.950001,38409100.0,24.226894
2010-01-05,31.1,30.639999,30.85,30.959999,49749600.0,24.23472


In [196]:
stocks.index

DatetimeIndex(['2009-12-31', '2010-01-04', '2010-01-05', '2010-01-06',
               '2010-01-07', '2010-01-08', '2010-01-11', '2010-01-12',
               '2010-01-13', '2010-01-14',
               ...
               '2020-06-22', '2020-06-23', '2020-06-24', '2020-06-25',
               '2020-06-26', '2020-06-29', '2020-06-30', '2020-07-01',
               '2020-07-02', '2020-07-06'],
              dtype='datetime64[ns]', name=u'Date', length=2645, freq=None)

In [202]:
#relativedelta works with python date and datetime objects
stocks.index.to_pydatetime() + relativedelta.relativedelta(years =10)

array([datetime.datetime(2019, 12, 31, 0, 0),
       datetime.datetime(2020, 1, 4, 0, 0),
       datetime.datetime(2020, 1, 5, 0, 0), ...,
       datetime.datetime(2030, 7, 1, 0, 0),
       datetime.datetime(2030, 7, 2, 0, 0),
       datetime.datetime(2030, 7, 6, 0, 0)], dtype=object)

In [210]:
#Adding days to all the values of DatetimeIndex
#Works similar to relativedelta, but with Timestamps
stocks.index[0] + pd.DateOffset(days = 10)   #Adding to single Timestamp object

#Adding/Subtracting days
stocks.index + pd.DateOffset(days = 10)
stocks.index - pd.DateOffset(days = 10)

#Adding/Subtracting years from all Timestamps
stocks.index + pd.DateOffset(years = 1)
stocks.index - pd.DateOffset(years = 1)

#Using multiple parameters
stocks.index + pd.DateOffset(years=1, months = 1, days = 10)
stocks.index - pd.DateOffset(years=1, months = 1, days = 10, hours = 13, minutes = 10, seconds = 10)

DatetimeIndex(['2008-11-19 10:49:50', '2008-11-23 10:49:50',
               '2008-11-24 10:49:50', '2008-11-25 10:49:50',
               '2008-11-26 10:49:50', '2008-11-27 10:49:50',
               '2008-11-30 10:49:50', '2008-12-01 10:49:50',
               '2008-12-02 10:49:50', '2008-12-03 10:49:50',
               ...
               '2019-05-11 10:49:50', '2019-05-12 10:49:50',
               '2019-05-13 10:49:50', '2019-05-14 10:49:50',
               '2019-05-15 10:49:50', '2019-05-18 10:49:50',
               '2019-05-19 10:49:50', '2019-05-21 10:49:50',
               '2019-05-22 10:49:50', '2019-05-26 10:49:50'],
              dtype='datetime64[ns]', name=u'Date', length=2645, freq=None)

## Timeseries Offsets
- offsets can be used to get MonthEnd, Bussiness Month End, Year End etc dates for a Timestamp object.
- Caveat:- Say you have a date '2019-04-30' and are using offset MonthEnd(), then this date will be converted to '2019-50-31'.

In [211]:
stocks = data.DataReader(name = "MSFT", data_source = "yahoo", start = "2010-01-01", end = "2020-12-31")
stocks.head(3)

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2009-12-31,30.99,30.48,30.98,30.48,31929700.0,23.858992
2010-01-04,31.1,30.59,30.620001,30.950001,38409100.0,24.226894
2010-01-05,31.1,30.639999,30.85,30.959999,49749600.0,24.23472


In [262]:
#Convert all the dates to MonthEnd dates. Here we can't use pd.DateOffset because we don't know how many days
#to add to each date in DatetimeIndex

#Let's use offsets
stocks.index + pd.offsets.MonthEnd()  #Convert to MonthEnd dates
stocks.index + pd.offsets.MonthEnd(-1) #Convert to MonthEnd 1 month back
stocks.index + pd.offsets.MonthEnd(12) #Convert to MonthEnd 1 year forward

#Let's get the Business Month Ends
stocks.index + pd.offsets.BMonthEnd()

#Let's get year end
stocks.index + pd.offsets.YearEnd()

#Let's get Month and Year Begin
stocks.index + pd.offsets.MonthBegin()
stocks.index + pd.offsets.YearBegin(-1)

DatetimeIndex(['2009-01-01', '2010-01-01', '2010-01-01', '2010-01-01',
               '2010-01-01', '2010-01-01', '2010-01-01', '2010-01-01',
               '2010-01-01', '2010-01-01',
               ...
               '2020-01-01', '2020-01-01', '2020-01-01', '2020-01-01',
               '2020-01-01', '2020-01-01', '2020-01-01', '2020-01-01',
               '2020-01-01', '2020-01-01'],
              dtype='datetime64[ns]', name=u'Date', length=2645, freq=None)

## The <code> Timedelta </code> Object
- represents a passage of time
- can be given following arguments: weeks, days, hours, minutes, seconds, milliseconds <b>(can't pass years)</b>
- can be added or subracted from Timestamp object
- different from pd.DateOffsets which basically does not represent anything but is basically used to add/subtract years, months, days etc. 

In [277]:
time_a = pd.Timestamp("2010-1-1")
time_b = pd.Timestamp("2011-12-31")

In [278]:
time_b -time_a

Timedelta('729 days 00:00:00')

In [279]:
time_a - time_b

Timedelta('-729 days +00:00:00')

In [285]:
#Create a Timedelta object
time_a + pd.Timedelta(weeks = 10, days = 10, hours = 21, minutes= 10, seconds = 20)

Timestamp('2010-03-22 21:10:20')

In [284]:
time_a + pd.Timedelta(weeks = 10, days = 10, hours = 21, minutes= 10, seconds = 20)

Timestamp('2010-03-22 21:10:20')

## <code>Timedeltas</code> in a Dataset

In [289]:
ecomm = pd.read_csv('ecommerce.csv', index_col="ID", parse_dates=["order_date", "delivery_date"])
ecomm.head(3)

Unnamed: 0_level_0,order_date,delivery_date
ID,Unnamed: 1_level_1,Unnamed: 2_level_1
1,1998-05-24,1999-02-05
2,1992-04-22,1998-03-06
4,1991-02-10,1992-08-26


In [290]:
ecomm["Time Taken to Deliver"] = ecomm['delivery_date'] - ecomm['order_date'] 

In [291]:
ecomm.head()

Unnamed: 0_level_0,order_date,delivery_date,Time Taken to Deliver
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,1998-05-24,1999-02-05,257 days
2,1992-04-22,1998-03-06,2144 days
4,1991-02-10,1992-08-26,563 days
5,1992-07-21,1997-11-20,1948 days
7,1993-09-02,1998-06-10,1742 days


In [296]:
ecomm["Time Taken to Deliver"].head()

ID
1    257 days
2   2144 days
4    563 days
5   1948 days
7   1742 days
Name: Time Taken to Deliver, dtype: timedelta64[ns]

In [297]:
ecomm["Twice as long"] = ecomm['delivery_date'] + ecomm["Time Taken to Deliver"]

In [298]:
ecomm.head()

Unnamed: 0_level_0,order_date,delivery_date,Time Taken to Deliver,Twice as long
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,1998-05-24,1999-02-05,257 days,1999-10-20
2,1992-04-22,1998-03-06,2144 days,2004-01-18
4,1991-02-10,1992-08-26,563 days,1994-03-12
5,1992-07-21,1997-11-20,1948 days,2003-03-22
7,1993-09-02,1998-06-10,1742 days,2003-03-18


In [301]:
#Find all orders that took more than a year to deliver.
mask = ecomm["Time Taken to Deliver"] > pd.Timedelta(days = 365)

#or 
#mask = ecomm["Time Taken to Deliver"] > "365 days"

In [303]:
ecomm[mask].head()

Unnamed: 0_level_0,order_date,delivery_date,Time Taken to Deliver,Twice as long
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2,1992-04-22,1998-03-06,2144 days,2004-01-18
4,1991-02-10,1992-08-26,563 days,1994-03-12
5,1992-07-21,1997-11-20,1948 days,2003-03-22
7,1993-09-02,1998-06-10,1742 days,2003-03-18
9,1990-01-25,1994-10-02,1711 days,1999-06-09


In [304]:
ecomm["Time Taken to Deliver"].min()

Timedelta('8 days 00:00:00')

In [307]:
max_time = ecomm["Time Taken to Deliver"].max()

In [313]:
pd.Timestamp(max_time.total_seconds(), unit='s')

Timestamp('1979-10-24 00:00:00')