In [1]:
import pandas as pd
import datetime as dt
from pandas_datareader import data

# # Timestamp Object.
#### A pandas Timestamp oject is a Datetime object of pandas.
#### pd.Timestamp() takes argument in string format and gives an output in YYYY-MM-DD hh:mm:ss in 24 hour format.

In [51]:
# Creating a timestamp of current time.

pd.Timestamp(dt.datetime.now())

Timestamp('2020-06-30 00:31:08.608000')

In [10]:
# Creating a timestamp from passed date

pd.Timestamp('2015-03-31')

Timestamp('2015-03-31 00:00:00')

In [11]:
pd.Timestamp(2015/3/30)     # a non-string argument is not right.

Timestamp('1970-01-01 00:00:00.000000022')

In [12]:
# Creating timestamp with dates containing '/'

pd.Timestamp('2015/3/30')

Timestamp('2015-03-30 00:00:00')

In [13]:
# making a Timestamp of 6th May 2015

pd.Timestamp('2015, 5, 6')

Timestamp('2015-05-06 00:00:00')

#### pd.Timestamp() expects the date format in YYYY-MM-DD format. other formats can cause error as shown below.

In [14]:
# making a Timestamp of 4th March 2020.

pd.Timestamp('4/3/2020')

Timestamp('2020-04-03 00:00:00')

In [16]:
# making a Timestamp of 4th March 2020 6:13 in the evening.

pd.Timestamp('4/3/2020 6:13:29 PM')

Timestamp('2020-04-03 18:13:29')

# # DateTimeIndex Object.

#### pd.DateTimeIndex() is a container object that serves as a list of datetime objects.

In [17]:
# Creating a list of dates.
dates = ['2015/11/3', '2077/05/10', '10/05/2020']

# Creating a DatetimeIndex from a list of date strings.
pd.DatetimeIndex(dates)

DatetimeIndex(['2015-11-03', '2077-05-10', '2020-10-05'], dtype='datetime64[ns]', freq=None)

In [19]:
# Creating a list of DateTime Date objects.
dates_dt = [dt.date(2016, 1, 10), dt.date(2014, 11, 10), dt.date(2016, 12, 14)]

# Creating a DatetimeIndex from a list of Datetime date objects.
dtIndex = pd.DatetimeIndex(dates_dt)

In [21]:
# Creating a list.
values = [100, 200, 300]

# Creating a series of a list, with DatetimeIndex as index.
pd.Series(values, index=dtIndex)

2016-01-10    100
2014-11-10    200
2016-12-14    300
dtype: int64

# # pd.to_datetime() method.

#### The pd.to_datetime() method converts a given expression of a datetime, a string or UNIX timestamp, into any appropriate pandas datetime object like Timestamp or datetimeIndex.

In [22]:
# Converting a date string to a Timestamp.

pd.to_datetime('2001-04-19')

Timestamp('2001-04-19 00:00:00')

In [24]:
# Converting a date string with '/' to a Timestamp.

pd.to_datetime('2020/11/25')

Timestamp('2020-11-25 00:00:00')

In [25]:
# Converting a alphabetical date string to a Timestamp.

pd.to_datetime('10th May, 1997')

Timestamp('1997-05-10 00:00:00')

In [26]:
# Converting a list of date strings to a DatetimeIndex.

pd.to_datetime(['2001-04-19', '2020/11/25', '10th May, 1997'])

DatetimeIndex(['2001-04-19', '2020-11-25', '1997-05-10'], dtype='datetime64[ns]', freq=None)

#### IF you have strings in a series that are not in a valid Date form, set the "errors" parameter to 'coerce'.
#### Setting errors='coerce' will represent invalid date strings as NaT (Not a Time).

In [29]:
# A series including invalid date strings.
dates = pd.Series(['2001-04-19', '2020/11/25', 'Banana', '2020-02-31'])

pd.to_datetime(dates, errors='coerce')

0   2001-04-19
1   2020-11-25
2          NaT
3          NaT
dtype: datetime64[ns]

#### When converting UNIX timestamps to pandas Timestamps set "unit" parameter as 's' for second

In [30]:
# Converting UNIX timestamps to pandas Timestamps.

pd.to_datetime([1593452167, 863271000, 3403555200], unit='s')

DatetimeIndex(['2020-06-29 17:36:07', '1997-05-10 13:30:00',
               '2077-11-08 00:00:00'],
              dtype='datetime64[ns]', freq=None)

# # pd.date_range() method.

#### pd.date_range() is used to create a DatetimeIndex() of a range of time.
#### use the "start" & "end" parameter to specify the start and end of the range.
#### use the 'freq' parameter to specify the frequency of the time.

In [32]:
# date_range from 1st jan 2016 to 10th jan 2016.
# freq='D' for 1 day interval.

pd.date_range(start='2016-01-01', end='2016-01-10', freq='D')

DatetimeIndex(['2016-01-01', '2016-01-02', '2016-01-03', '2016-01-04',
               '2016-01-05', '2016-01-06', '2016-01-07', '2016-01-08',
               '2016-01-09', '2016-01-10'],
              dtype='datetime64[ns]', freq='D')

In [33]:
# freq='2D' for 2 day interval.

pd.date_range(start='2016-01-01', end='2016-01-10', freq='2D')

DatetimeIndex(['2016-01-01', '2016-01-03', '2016-01-05', '2016-01-07',
               '2016-01-09'],
              dtype='datetime64[ns]', freq='2D')

In [35]:
# freq='M' for 1 month interval.
pd.date_range(start='2016-01-01', end='2016-11-10', freq='M')

# freq='MS' for 1 month interval from start.
pd.date_range(start='2016-01-01', end='2016-11-10', freq='MS')

DatetimeIndex(['2016-01-01', '2016-02-01', '2016-03-01', '2016-04-01',
               '2016-05-01', '2016-06-01', '2016-07-01', '2016-08-01',
               '2016-09-01', '2016-10-01', '2016-11-01'],
              dtype='datetime64[ns]', freq='MS')

In [37]:
# freq='H' for 1 hour interval.
pd.date_range(start='2016-01-01', end='2016-1-5', freq='H')

# freq='5H' for 5 hour interval.
pd.date_range(start='2016-01-01', end='2016-1-5', freq='5H')

DatetimeIndex(['2016-01-01 00:00:00', '2016-01-01 05:00:00',
               '2016-01-01 10:00:00', '2016-01-01 15:00:00',
               '2016-01-01 20:00:00', '2016-01-02 01:00:00',
               '2016-01-02 06:00:00', '2016-01-02 11:00:00',
               '2016-01-02 16:00:00', '2016-01-02 21:00:00',
               '2016-01-03 02:00:00', '2016-01-03 07:00:00',
               '2016-01-03 12:00:00', '2016-01-03 17:00:00',
               '2016-01-03 22:00:00', '2016-01-04 03:00:00',
               '2016-01-04 08:00:00', '2016-01-04 13:00:00',
               '2016-01-04 18:00:00', '2016-01-04 23:00:00'],
              dtype='datetime64[ns]', freq='5H')

In [38]:
# getting all the business days between 1 & 31 jan 2016.
# freq='B' for business days only.

pd.date_range(start='2016-01-01', end='2016-1-31', freq='B')

DatetimeIndex(['2016-01-01', '2016-01-04', '2016-01-05', '2016-01-06',
               '2016-01-07', '2016-01-08', '2016-01-11', '2016-01-12',
               '2016-01-13', '2016-01-14', '2016-01-15', '2016-01-18',
               '2016-01-19', '2016-01-20', '2016-01-21', '2016-01-22',
               '2016-01-25', '2016-01-26', '2016-01-27', '2016-01-28',
               '2016-01-29'],
              dtype='datetime64[ns]', freq='B')

In [44]:
# freq='W-SUN' for weekday-Sunday.
pd.date_range(start='2016-01-01', end='2016-1-31', freq='W-SUN')

# freq='W-FRI' for weekday-Friday.
pd.date_range(start='2016-01-01', end='2016-1-31', freq='W-Fri')

DatetimeIndex(['2016-01-01', '2016-01-08', '2016-01-15', '2016-01-22',
               '2016-01-29'],
              dtype='datetime64[ns]', freq='W-FRI')

In [45]:
# freq='A' for end of the year.

pd.date_range(start='2016-01-01', end='2030-1-31', freq='A')

DatetimeIndex(['2016-12-31', '2017-12-31', '2018-12-31', '2019-12-31',
               '2020-12-31', '2021-12-31', '2022-12-31', '2023-12-31',
               '2024-12-31', '2025-12-31', '2026-12-31', '2027-12-31',
               '2028-12-31', '2029-12-31'],
              dtype='datetime64[ns]', freq='A-DEC')

### Using the "periods" parameter.

#### you can insert the start or end parameter and then use the periods parameter to mention how many Timestamps you need after or before the mentioned start or end time.

In [47]:
# 10 days after 9-9-2012.

pd.date_range(start='2012-9-9', periods=10, freq='D')

DatetimeIndex(['2012-09-09', '2012-09-10', '2012-09-11', '2012-09-12',
               '2012-09-13', '2012-09-14', '2012-09-15', '2012-09-16',
               '2012-09-17', '2012-09-18'],
              dtype='datetime64[ns]', freq='D')

In [48]:
# 10 business days after 9-9-2012.

pd.date_range(start='2012-9-9', periods=10, freq='B')

DatetimeIndex(['2012-09-10', '2012-09-11', '2012-09-12', '2012-09-13',
               '2012-09-14', '2012-09-17', '2012-09-18', '2012-09-19',
               '2012-09-20', '2012-09-21'],
              dtype='datetime64[ns]', freq='B')

In [49]:
# the week after 28-6-2020.

pd.date_range(start='2020-6-28', periods=7, freq='D')

DatetimeIndex(['2020-06-28', '2020-06-29', '2020-06-30', '2020-07-01',
               '2020-07-02', '2020-07-03', '2020-07-04'],
              dtype='datetime64[ns]', freq='D')

#### Using the 'end' parameter along with periods will give you the number of timestamps before the end time in ascending order.

In [50]:
# 20 days before 31-12-1999.

pd.date_range(end='1999-12-31', periods=20, freq='D')

DatetimeIndex(['1999-12-12', '1999-12-13', '1999-12-14', '1999-12-15',
               '1999-12-16', '1999-12-17', '1999-12-18', '1999-12-19',
               '1999-12-20', '1999-12-21', '1999-12-22', '1999-12-23',
               '1999-12-24', '1999-12-25', '1999-12-26', '1999-12-27',
               '1999-12-28', '1999-12-29', '1999-12-30', '1999-12-31'],
              dtype='datetime64[ns]', freq='D')

# # .dt accessor.

In [4]:
# Creating a new Timeseries.

decade = pd.date_range(start='2000/01/01', end='2010/12/31', freq='30D')

s = pd.Series(decade)
s

0     2000-01-01
1     2000-01-31
2     2000-03-01
3     2000-03-31
4     2000-04-30
         ...    
129   2010-08-06
130   2010-09-05
131   2010-10-05
132   2010-11-04
133   2010-12-04
Length: 134, dtype: datetime64[ns]

In [6]:
# Extracting the "Day" value of every Timestamp in Timeseries.

s.dt.day

0       1
1      31
2       1
3      31
4      30
       ..
129     6
130     5
131     5
132     4
133     4
Length: 134, dtype: int64

In [8]:
# Extracting the weekday name of every Timestamp in Timeseries.

s.dt.day_name()

0       Saturday
1         Monday
2      Wednesday
3         Friday
4         Sunday
         ...    
129       Friday
130       Sunday
131      Tuesday
132     Thursday
133     Saturday
Length: 134, dtype: object

In [9]:
# Extracting the day of week of every Timestamp in Timeseries (monday=0,...sunday=6).

s.dt.dayofweek

0      5
1      0
2      2
3      4
4      6
      ..
129    4
130    6
131    1
132    3
133    5
Length: 134, dtype: int64

In [15]:
# Extracting the weekday name of every Timestamp in Timeseries.


s.dt.weekday_name

0       Saturday
1         Monday
2      Wednesday
3         Friday
4         Sunday
         ...    
129       Friday
130       Sunday
131      Tuesday
132     Thursday
133     Saturday
Length: 134, dtype: object

In [10]:
# Extracting the "month" value of every Timestamp in Timeseries.


s.dt.month

0       1
1       1
2       3
3       3
4       4
       ..
129     8
130     9
131    10
132    11
133    12
Length: 134, dtype: int64

In [12]:
# Extracting the month name of every Timestamp in Timeseries.

s.dt.month_name()

0        January
1        January
2          March
3          March
4          April
         ...    
129       August
130    September
131      October
132     November
133     December
Length: 134, dtype: object

### .dt.is_ attributes are used to create a boolean series.

In [16]:
# Checking if the date of Timestamp falls at month start.

s.dt.is_month_start

0       True
1      False
2       True
3      False
4      False
       ...  
129    False
130    False
131    False
132    False
133    False
Length: 134, dtype: bool

In [17]:
# Checking if the date of Timestamp falls at quarter start.

s.dt.is_quarter_start

0       True
1      False
2      False
3      False
4      False
       ...  
129    False
130    False
131    False
132    False
133    False
Length: 134, dtype: bool

In [18]:
# Checking if the date of Timestamp falls at quarter end.

s.dt.is_quarter_end

0      False
1      False
2      False
3       True
4      False
       ...  
129    False
130    False
131    False
132    False
133    False
Length: 134, dtype: bool

# # pandas_datareader

pandas_datareader is a module that can be used to extract data from several Datasources.
Paid sources require an API key.
It returns the Data in a DataFrame format.

In [33]:
# Getting daily OHLCV of Nvidia fron 1/1/2018 till today.

stocks = data.DataReader('NVDA', data_source='yahoo', start='2018/01/01')
stocks.head(3)

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2018-01-02,199.5,194.5,195.779999,199.350006,8890400.0,197.813309
2018-01-03,213.699997,203.75,204.100006,212.470001,22867600.0,210.832153
2018-01-04,218.050003,212.690002,215.759995,213.589996,14581700.0,211.943512


In [34]:
# Returns an array of  arrays of values in the dataframe.

stocks.values

array([[1.99500000e+02, 1.94500000e+02, 1.95779999e+02, 1.99350006e+02,
        8.89040000e+06, 1.97813309e+02],
       [2.13699997e+02, 2.03750000e+02, 2.04100006e+02, 2.12470001e+02,
        2.28676000e+07, 2.10832153e+02],
       [2.18050003e+02, 2.12690002e+02, 2.15759995e+02, 2.13589996e+02,
        1.45817000e+07, 2.11943512e+02],
       ...,
       [3.80000000e+02, 3.65000000e+02, 3.80000000e+02, 3.66200012e+02,
        1.48021000e+07, 3.66200012e+02],
       [3.68179993e+02, 3.56000000e+02, 3.66790009e+02, 3.68000000e+02,
        8.52650000e+06, 3.68000000e+02],
       [3.81049988e+02, 3.70700012e+02, 3.72559998e+02, 3.77299988e+02,
        5.29195400e+06, 3.77299988e+02]])

In [35]:
# returns an array of row headers of Dataframe.

stocks.index

DatetimeIndex(['2018-01-02', '2018-01-03', '2018-01-04', '2018-01-05',
               '2018-01-08', '2018-01-09', '2018-01-10', '2018-01-11',
               '2018-01-12', '2018-01-16',
               ...
               '2020-06-17', '2020-06-18', '2020-06-19', '2020-06-22',
               '2020-06-23', '2020-06-24', '2020-06-25', '2020-06-26',
               '2020-06-29', '2020-06-30'],
              dtype='datetime64[ns]', name='Date', length=628, freq=None)

In [36]:
# returns an array of column headers.

stocks.columns

Index(['High', 'Low', 'Open', 'Close', 'Volume', 'Adj Close'], dtype='object')

In [37]:
# Returns both column & row headers.

stocks.axes

[DatetimeIndex(['2018-01-02', '2018-01-03', '2018-01-04', '2018-01-05',
                '2018-01-08', '2018-01-09', '2018-01-10', '2018-01-11',
                '2018-01-12', '2018-01-16',
                ...
                '2020-06-17', '2020-06-18', '2020-06-19', '2020-06-22',
                '2020-06-23', '2020-06-24', '2020-06-25', '2020-06-26',
                '2020-06-29', '2020-06-30'],
               dtype='datetime64[ns]', name='Date', length=628, freq=None),
 Index(['High', 'Low', 'Open', 'Close', 'Volume', 'Adj Close'], dtype='object')]

In [2]:
# Getting apple Inc daily OHLCV data

aaple = data.DataReader('AAPL', data_source='yahoo')
aaple.head(3)

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2015-07-06,126.230003,124.849998,124.940002,126.0,28060400.0,116.122704
2015-07-07,126.150002,123.769997,125.889999,125.690002,46946800.0,115.837006
2015-07-08,124.639999,122.540001,124.480003,122.57,60761600.0,112.961594


### accessing rows on a TimeSeries DataFrame.

In [51]:
# extracting the OHLCV on 1-6-20

aaple.loc['2020-06-01']

High         3.223500e+02
Low          3.172100e+02
Open         3.177500e+02
Close        3.218500e+02
Volume       2.019780e+07
Adj Close    3.218500e+02
Name: 2020-06-01 00:00:00, dtype: float64

In [53]:
# extracting the OHLCV on 1-6-20

aaple.loc[pd.Timestamp('2020-06-01')]

High         3.223500e+02
Low          3.172100e+02
Open         3.177500e+02
Close        3.218500e+02
Volume       2.019780e+07
Adj Close    3.218500e+02
Name: 2020-06-01 00:00:00, dtype: float64

In [58]:
# extracting the OHLCV on 1-6-20 & 1-5-20

aaple.loc[[pd.Timestamp('2020-06-01'), pd.Timestamp('2020-05-01')]]

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-06-01,322.350006,317.209991,317.75,321.850006,20197800.0,321.850006
2020-05-01,299.0,285.850006,286.25,289.070007,60154200.0,288.289612


In [60]:
# extracting the OHLCV from 1-5-20 to 1-6-20.

aaple.loc[pd.Timestamp('2020-05-01'): pd.Timestamp('2020-06-01')]

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-05-01,299.0,285.850006,286.25,289.070007,60154200.0,288.289612
2020-05-04,293.690002,286.320007,289.170013,293.160004,33392000.0,292.368561
2020-05-05,301.0,294.459991,295.059998,297.559998,36937800.0,296.756683
2020-05-06,303.23999,298.869995,300.459991,300.630005,35583400.0,299.81839
2020-05-07,305.170013,301.970001,303.220001,303.73999,28803800.0,302.919983
2020-05-08,310.350006,304.290009,305.640015,310.130005,33512000.0,310.130005
2020-05-11,317.049988,307.23999,308.100006,315.01001,36405900.0,315.01001
2020-05-12,319.690002,310.910004,317.829987,311.410004,40575300.0,311.410004
2020-05-13,315.950012,303.209991,312.149994,307.649994,50155600.0,307.649994
2020-05-14,309.790009,301.529999,304.51001,309.540009,39732300.0,309.540009


In [54]:
# extracting the OHLCV on row 24.

aaple.iloc[23]

High         1.174400e+02
Low          1.121000e+02
Open         1.129500e+02
Close        1.154000e+02
Volume       9.931260e+07
Adj Close    1.063537e+02
Name: 2015-08-05 00:00:00, dtype: float64

In [61]:
# extracting the OHLCV on the last row.

aaple.iloc[-1]

High         3.658800e+02
Low          3.600000e+02
Open         3.600800e+02
Close        3.624300e+02
Volume       2.100596e+07
Adj Close    3.624300e+02
Name: 2020-06-30 00:00:00, dtype: float64

In [55]:
# extracting the OHLCV from 24th to 43th row

aaple.iloc[23: 43]

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2015-08-05,117.440002,112.099998,112.949997,115.400002,99312600.0,106.353653
2015-08-06,116.5,114.120003,115.970001,115.129997,52903000.0,106.585083
2015-08-07,116.25,114.5,114.580002,115.519997,38670400.0,106.946136
2015-08-10,119.989998,116.529999,116.529999,119.720001,54951600.0,110.834412
2015-08-11,118.18,113.330002,117.809998,113.489998,97082800.0,105.066818
2015-08-12,115.419998,109.629997,112.529999,115.239998,101217500.0,106.686913
2015-08-13,116.400002,114.540001,116.040001,115.150002,48535800.0,106.6036
2015-08-14,116.309998,114.010002,114.32,115.959999,42929500.0,107.353477
2015-08-17,117.650002,115.5,116.040001,117.160004,40884700.0,108.464417
2015-08-18,117.440002,116.010002,116.43,116.5,34560700.0,107.853416


In [56]:
# extracting the OHLCV on 24th, 46th & 47th row.

aaple.iloc[[23, 45, 46]]

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2015-08-05,117.440002,112.099998,112.949997,115.400002,99312600.0,106.353653
2015-09-04,110.449997,108.510002,108.970001,109.269997,49996300.0,101.160019
2015-09-08,112.559998,110.32,111.75,112.309998,54843600.0,103.974388


### The df.truncate() method would drop the rows before & after the mentioned arguments.

In [62]:
# getting the OHLCV from 1-1-20 to 31-5-20.

aaple.truncate(before='2020-01-01', after='2020-05-31')

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-01-02,300.600006,295.190002,296.239990,300.350006,33870100.0,298.829956
2020-01-03,300.579987,296.500000,297.149994,297.429993,36580700.0,295.924713
2020-01-06,299.959991,292.750000,293.790009,299.799988,29596800.0,298.282715
2020-01-07,300.899994,297.480011,299.839996,298.390015,27218000.0,296.879883
2020-01-08,304.440002,297.160004,297.160004,303.190002,33019800.0,301.655548
...,...,...,...,...,...,...
2020-05-22,319.230011,315.350006,315.769989,318.890015,20450800.0,318.890015
2020-05-26,324.239990,316.500000,323.500000,316.730011,31380500.0,316.730011
2020-05-27,318.709991,313.089996,316.140015,318.109985,28236300.0,318.109985
2020-05-28,323.440002,315.630005,316.769989,318.250000,33390200.0,318.250000


# # pd.DateOffset object

#### Adding time to a given timestamp is not straightforward, eg. adding 3 days to 28th feb of one year might not be the same as other. The DateOffset object takes care of problems like these.

#### keyword args of pd.DateOffset().

    - years
    - months
    - weeks
    - days
    - hours
    - minutes
    - seconds
    - microseconds
    - nanoseconds

In [5]:
# trying to add 5 days to every date in Timeseriesindex

aaple.index + 5

NullFrequencyError: Cannot shift with no freq

In [6]:
aaple.index + pd.DateOffset(days=5)

DatetimeIndex(['2015-07-11', '2015-07-12', '2015-07-13', '2015-07-14',
               '2015-07-15', '2015-07-18', '2015-07-19', '2015-07-20',
               '2015-07-21', '2015-07-22',
               ...
               '2020-06-24', '2020-06-27', '2020-06-28', '2020-06-29',
               '2020-06-30', '2020-07-01', '2020-07-04', '2020-07-05',
               '2020-07-06', '2020-07-07'],
              dtype='datetime64[ns]', name='Date', length=1259, freq=None)

In [7]:
# Subtracting 5 days from each date in DatetimeIndex.

# aaple.index - pd.DateOffset(days=5)

aaple.index + pd.DateOffset(days=-5)

DatetimeIndex(['2015-07-01', '2015-07-02', '2015-07-03', '2015-07-04',
               '2015-07-05', '2015-07-08', '2015-07-09', '2015-07-10',
               '2015-07-11', '2015-07-12',
               ...
               '2020-06-14', '2020-06-17', '2020-06-18', '2020-06-19',
               '2020-06-20', '2020-06-21', '2020-06-24', '2020-06-25',
               '2020-06-26', '2020-06-27'],
              dtype='datetime64[ns]', name='Date', length=1259, freq=None)

In [None]:
aaple.index + pd.DateOffset(weeks=1)     # Adding 1 week.
aaple.index - pd.DateOffset(weeks=1)     # Subtracting 1 week.

aaple.index + pd.DateOffset(months=1)    # Adding 1 month.
aaple.index - pd.DateOffset(months=1)    # Subtracting 1 month.

# # pd.tseries.offsets() methods.

In [12]:
# Offsetting all the timestamps in the timeseries by 3 business days

aaple.index + pd.tseries.offsets.BDay(3)

DatetimeIndex(['2015-07-13', '2015-07-14', '2015-07-15', '2015-07-16',
               '2015-07-17', '2015-07-20', '2015-07-21', '2015-07-22',
               '2015-07-23', '2015-07-24',
               ...
               '2020-06-26', '2020-06-29', '2020-06-30', '2020-07-01',
               '2020-07-02', '2020-07-03', '2020-07-06', '2020-07-07',
               '2020-07-08', '2020-07-09'],
              dtype='datetime64[ns]', name='Date', length=1259, freq=None)

In [14]:
# Custom business days 

aaple.index + pd.tseries.offsets.CDay(holidays=[pd.Timestamp('2020/05/10')])



DatetimeIndex(['2015-07-07', '2015-07-08', '2015-07-09', '2015-07-10',
               '2015-07-13', '2015-07-14', '2015-07-15', '2015-07-16',
               '2015-07-17', '2015-07-20',
               ...
               '2020-06-22', '2020-06-23', '2020-06-24', '2020-06-25',
               '2020-06-26', '2020-06-29', '2020-06-30', '2020-07-01',
               '2020-07-02', '2020-07-03'],
              dtype='datetime64[ns]', name='Date', length=1259, freq=None)

In [19]:
# aaple.index + pd.tseries.offsets.BMonthBegin()
# aaple.index + pd.tseries.offsets.BMonthEnd()
aaple.index + pd.tseries.offsets.BusinessDay()

DatetimeIndex(['2015-07-07', '2015-07-08', '2015-07-09', '2015-07-10',
               '2015-07-13', '2015-07-14', '2015-07-15', '2015-07-16',
               '2015-07-17', '2015-07-20',
               ...
               '2020-06-22', '2020-06-23', '2020-06-24', '2020-06-25',
               '2020-06-26', '2020-06-29', '2020-06-30', '2020-07-01',
               '2020-07-02', '2020-07-03'],
              dtype='datetime64[ns]', name='Date', length=1259, freq=None)

# # pd.Timedelta() Object

#### pd.Timedelta() indicats the difference in time.

#### Valid keywords for directly producing a timedelta object are [weeks, days, hours, minutes, seconds, milliseconds, microseconds, nanoseconds].

In [20]:
# creating timedelta through timestamps.

time_a = pd.Timestamp('2020-03-31')
time_b = pd.Timestamp('2020-03-20')

time_a - time_b

Timedelta('11 days 00:00:00')

In [21]:
time_b - time_a

Timedelta('-11 days +00:00:00')

In [29]:
# Creting Timedelta directly.

pd.Timedelta(days=3)
pd.Timedelta(hours=3, minutes=40)
pd.Timedelta(weeks=1, hours=3, minutes=40)
pd.Timedelta(weeks=1, hours=3, minutes=40, seconds=40)

Timedelta('7 days 03:40:40')

In [39]:
# Creating timedelta through strings.

pd.Timedelta('1 minute')
pd.Timedelta('1 sec')
pd.Timedelta('1 milli')
pd.Timedelta('1 nano')

Timedelta('0 days 00:00:00.001000')

## Woking with Timedelta

In [42]:
ecom = pd.read_csv('Datasets/ecommerce.csv', index_col='ID', parse_dates=['order_date', 'delivery_date'])
ecom.head(3)

Unnamed: 0_level_0,order_date,delivery_date
ID,Unnamed: 1_level_1,Unnamed: 2_level_1
1,1998-05-24,1999-02-05
2,1992-04-22,1998-03-06
4,1991-02-10,1992-08-26


In [43]:
ecom['delivery_date'] - ecom['order_date']

ID
1      257 days
2     2144 days
4      563 days
5     1948 days
7     1742 days
         ...   
990   1684 days
991   2394 days
993   2719 days
994     10 days
997    637 days
Length: 501, dtype: timedelta64[ns]

In [44]:
ecom.insert(2, 'delivery Time', ecom['delivery_date'] - ecom['order_date'])

In [45]:
ecom.head(3)

Unnamed: 0_level_0,order_date,delivery_date,delivery Time
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,1998-05-24,1999-02-05,257 days
2,1992-04-22,1998-03-06,2144 days
4,1991-02-10,1992-08-26,563 days


In [47]:
ecom['delivery Time'] > pd.Timedelta('365 days')

ID
1      False
2       True
4       True
5       True
7       True
       ...  
990     True
991     True
993     True
994    False
997     True
Name: delivery Time, Length: 501, dtype: bool

In [46]:
ecom['delivery Time'] > '365 days'

ID
1      False
2       True
4       True
5       True
7       True
       ...  
990     True
991     True
993     True
994    False
997     True
Name: delivery Time, Length: 501, dtype: bool