# Working with Dates and Times

In [2]:
import pandas as pd
import datetime as dt

## The Timestamp and DatetimeIndex Objects
- Pandas ships with several classes related to datetimes.
- The **Timestamp** similar to `datetime` but more stuff
- **DatetimeIndex** is an index of **Timestamp** objects
- Timestamp constructor will take props of datetime or a datetime obj

In [3]:
pd.Timestamp(2027, 3, 12)

Timestamp('2027-03-12 00:00:00')

In [4]:
pd.Timestamp(2027, 3, 12, 18, 23, 49)

Timestamp('2027-03-12 18:23:49')

In [5]:
pd.Timestamp(dt.date(2028, 10, 23))

Timestamp('2028-10-23 00:00:00')

In [6]:
pd.Timestamp("2025-01-01")

Timestamp('2025-01-01 00:00:00')

In [7]:
pd.Series([pd.Timestamp("2021-03-08 08:35:15")])

0   2021-03-08 08:35:15
dtype: datetime64[ns]

In [8]:
pd.Series([pd.Timestamp("2021-03-08 08:35:15")]).iloc[0] # Series says datetime but at pos is Timestamp

Timestamp('2021-03-08 08:35:15')

In [9]:
pd.DatetimeIndex(["2025-01-01", "2025-02-01"])

DatetimeIndex(['2025-01-01', '2025-02-01'], dtype='datetime64[ns]', freq=None)

In [10]:
pd.DatetimeIndex(["2025-01-01", "2025-02-01"])[0]

Timestamp('2025-01-01 00:00:00')

## Create Range of Dates with pd.date_range Function
- The `date_range` func returns a **DatetimeIndex** holding a sequence of dates
- The func requires 2 of 3 params: `start`, `end` and `period`
- with `start` and `end` pandas will assume a daily interval/period
- Every elem in **DatetimeIndex** is a **Timestamp**

In [11]:
pd.date_range("2021-03-01", "2021-03-25")

DatetimeIndex(['2021-03-01', '2021-03-02', '2021-03-03', '2021-03-04',
               '2021-03-05', '2021-03-06', '2021-03-07', '2021-03-08',
               '2021-03-09', '2021-03-10', '2021-03-11', '2021-03-12',
               '2021-03-13', '2021-03-14', '2021-03-15', '2021-03-16',
               '2021-03-17', '2021-03-18', '2021-03-19', '2021-03-20',
               '2021-03-21', '2021-03-22', '2021-03-23', '2021-03-24',
               '2021-03-25'],
              dtype='datetime64[ns]', freq='D')

In [16]:
pd.date_range(start="2021-03-01", end="2021-03-25", freq="2D")

DatetimeIndex(['2021-03-01', '2021-03-03', '2021-03-05', '2021-03-07',
               '2021-03-09', '2021-03-11', '2021-03-13', '2021-03-15',
               '2021-03-17', '2021-03-19', '2021-03-21', '2021-03-23',
               '2021-03-25'],
              dtype='datetime64[ns]', freq='2D')

In [17]:
pd.date_range("2021-03-01", "2021-03-25", freq="B") # Buisiness Days

DatetimeIndex(['2021-03-01', '2021-03-02', '2021-03-03', '2021-03-04',
               '2021-03-05', '2021-03-08', '2021-03-09', '2021-03-10',
               '2021-03-11', '2021-03-12', '2021-03-15', '2021-03-16',
               '2021-03-17', '2021-03-18', '2021-03-19', '2021-03-22',
               '2021-03-23', '2021-03-24', '2021-03-25'],
              dtype='datetime64[ns]', freq='B')

In [18]:
pd.date_range("2021-03-01 00:00", "2021-03-01 23:00:00", freq="2h")

DatetimeIndex(['2021-03-01 00:00:00', '2021-03-01 02:00:00',
               '2021-03-01 04:00:00', '2021-03-01 06:00:00',
               '2021-03-01 08:00:00', '2021-03-01 10:00:00',
               '2021-03-01 12:00:00', '2021-03-01 14:00:00',
               '2021-03-01 16:00:00', '2021-03-01 18:00:00',
               '2021-03-01 20:00:00', '2021-03-01 22:00:00'],
              dtype='datetime64[ns]', freq='2h')

In [20]:
pd.date_range("2021-03-01", "2021-04-25", freq="W-FRI")

DatetimeIndex(['2021-03-05', '2021-03-12', '2021-03-19', '2021-03-26',
               '2021-04-02', '2021-04-09', '2021-04-16', '2021-04-23'],
              dtype='datetime64[ns]', freq='W-FRI')

In [21]:
pd.date_range("2021-01-01", "2022-01-01", freq="ME")

DatetimeIndex(['2021-01-31', '2021-02-28', '2021-03-31', '2021-04-30',
               '2021-05-31', '2021-06-30', '2021-07-31', '2021-08-31',
               '2021-09-30', '2021-10-31', '2021-11-30', '2021-12-31'],
              dtype='datetime64[ns]', freq='ME')

In [23]:
pd.date_range("2021-01-01", "2050-01-01", freq="YE")

DatetimeIndex(['2021-12-31', '2022-12-31', '2023-12-31', '2024-12-31',
               '2025-12-31', '2026-12-31', '2027-12-31', '2028-12-31',
               '2029-12-31', '2030-12-31', '2031-12-31', '2032-12-31',
               '2033-12-31', '2034-12-31', '2035-12-31', '2036-12-31',
               '2037-12-31', '2038-12-31', '2039-12-31', '2040-12-31',
               '2041-12-31', '2042-12-31', '2043-12-31', '2044-12-31',
               '2045-12-31', '2046-12-31', '2047-12-31', '2048-12-31',
               '2049-12-31'],
              dtype='datetime64[ns]', freq='YE-DEC')

In [24]:
pd.date_range(start="2012-09-09", freq="D", periods=25)

DatetimeIndex(['2012-09-09', '2012-09-10', '2012-09-11', '2012-09-12',
               '2012-09-13', '2012-09-14', '2012-09-15', '2012-09-16',
               '2012-09-17', '2012-09-18', '2012-09-19', '2012-09-20',
               '2012-09-21', '2012-09-22', '2012-09-23', '2012-09-24',
               '2012-09-25', '2012-09-26', '2012-09-27', '2012-09-28',
               '2012-09-29', '2012-09-30', '2012-10-01', '2012-10-02',
               '2012-10-03'],
              dtype='datetime64[ns]', freq='D')

## The dt attribute
- dt prop is of type DatetimeProperties with some attributes/methods to work with datetime
- The DatetimeProperties object has attributes like day month and year to reveal info about each date in series
- day_name written day of the week
- is_month_end and is_quarter_start return boolean series

In [27]:
bunch_dates = pd.Series(pd.date_range(start="2000-01-01", end="2020-12-31", freq="24D 3h"))

In [30]:
bunch_dates.dt.day_of_week

0      5
1      1
2      4
3      0
4      3
      ..
313    3
314    6
315    2
316    5
317    1
Length: 318, dtype: int32

In [31]:
bunch_dates.dt.day

0       1
1      25
2      18
3      13
4       6
       ..
313     3
314    27
315    21
316    14
317     8
Length: 318, dtype: int32

In [33]:
bunch_dates.dt.day_name()

0       Saturday
1        Tuesday
2         Friday
3         Monday
4       Thursday
         ...    
313     Thursday
314       Sunday
315    Wednesday
316     Saturday
317      Tuesday
Length: 318, dtype: object

In [35]:
bunch_dates.dt.is_month_start.value_counts()

False    308
True      10
Name: count, dtype: int64

In [36]:
bunch_dates[bunch_dates.dt.is_quarter_start]

0     2000-01-01 00:00:00
106   2007-01-01 06:00:00
212   2014-01-01 12:00:00
299   2019-10-01 09:00:00
dtype: datetime64[ns]

## Selecting Rows from a DataFrame with a DateTimeIndex
- The iloc accessor is available for index podition-based extraction
- loc accepts strings to get timestamp by lavel/ value.dt objects will not work
- list slicing can be used

In [43]:
stocks_df = pd.read_csv("ibm.csv", parse_dates=["Date"], index_col="Date").sort_index()
stocks = stocks_df.copy()
stocks

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1962-01-02,5.04610,5.04610,4.98716,4.98716,5.935630e+05
1962-01-03,4.98716,5.03292,4.98716,5.03292,4.451750e+05
1962-01-04,5.03292,5.03292,4.98052,4.98052,3.995136e+05
1962-01-05,4.97389,4.97389,4.87511,4.88166,5.593215e+05
1962-01-08,4.88166,4.88166,4.75059,4.78972,8.332738e+05
...,...,...,...,...,...
2023-10-05,140.90000,141.70000,140.19000,141.52000,3.223910e+06
2023-10-06,141.40000,142.94000,140.11000,142.03000,3.511347e+06
2023-10-09,142.30000,142.40000,140.68000,142.20000,2.354396e+06
2023-10-10,142.60000,143.41500,141.72000,142.11000,3.015784e+06


In [44]:
stocks.iloc[300]

Open           3.561240
High           3.574410
Low            3.554500
Close          3.561240
Volume    536491.781438
Name: 1963-03-12 00:00:00, dtype: float64

In [45]:
stocks.loc["2023-01-05"]

Open          142.440
High          142.498
Low           140.010
Close         141.110
Volume    2866648.000
Name: 2023-01-05 00:00:00, dtype: float64

In [46]:
stocks.loc["1962-01-01": "1980-01-01"]

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1962-01-02,5.04610,5.04610,4.98716,4.98716,5.935630e+05
1962-01-03,4.98716,5.03292,4.98716,5.03292,4.451750e+05
1962-01-04,5.03292,5.03292,4.98052,4.98052,3.995136e+05
1962-01-05,4.97389,4.97389,4.87511,4.88166,5.593215e+05
1962-01-08,4.88166,4.88166,4.75059,4.78972,8.332738e+05
...,...,...,...,...,...
1979-12-24,10.46770,10.61020,10.46770,10.61020,1.073289e+06
1979-12-26,10.61020,10.65220,10.53110,10.61020,9.941494e+05
1979-12-27,10.61020,10.62880,10.51170,10.59260,1.545095e+06
1979-12-28,10.57220,10.57220,10.46770,10.51170,1.699724e+06


In [47]:
stocks.loc[pd.Timestamp(2014,3,4)]

Open      1.288700e+02
High      1.298270e+02
Low       1.288020e+02
Close     1.293290e+02
Volume    6.825202e+06
Name: 2014-03-04 00:00:00, dtype: float64

In [49]:
stocks.loc[dt.date(2014,3,4)] # can't use dt must use timestamp

KeyError: datetime.date(2014, 3, 4)

In [51]:
stocks.truncate("2014-03-03", "2014-06-01") # instead of slicing... but who want's to do that?

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2014-03-03,127.201,128.070,126.801,127.807,5.698452e+06
2014-03-04,128.870,129.827,128.802,129.329,6.825202e+06
2014-03-05,129.407,130.344,129.319,129.807,5.027617e+06
2014-03-06,129.963,130.676,129.631,130.159,5.503611e+06
2014-03-07,130.676,131.047,129.837,130.198,5.936539e+06
...,...,...,...,...,...
2014-05-23,129.651,129.866,129.309,129.719,3.673929e+06
2014-05-27,128.909,129.525,128.324,128.900,6.871714e+06
2014-05-28,128.724,128.978,127.612,127.719,5.336975e+06
2014-05-29,128.128,128.206,127.221,128.188,3.955876e+06


## The DateOffset Object

- A **DateOffset** object adds time to a **Timestamp** to arrive at a new **Timestamp**.
- The **DateOffset** constructor accepts `days`, `weeks`, `months`, `years` parameters, and more.
- We can pass a **DateOffset** object to the `freq` parameter of the `pd.date_range` function.

In [53]:
stocks = stocks_df.copy()
stocks

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1962-01-02,5.04610,5.04610,4.98716,4.98716,5.935630e+05
1962-01-03,4.98716,5.03292,4.98716,5.03292,4.451750e+05
1962-01-04,5.03292,5.03292,4.98052,4.98052,3.995136e+05
1962-01-05,4.97389,4.97389,4.87511,4.88166,5.593215e+05
1962-01-08,4.88166,4.88166,4.75059,4.78972,8.332738e+05
...,...,...,...,...,...
2023-10-05,140.90000,141.70000,140.19000,141.52000,3.223910e+06
2023-10-06,141.40000,142.94000,140.11000,142.03000,3.511347e+06
2023-10-09,142.30000,142.40000,140.68000,142.20000,2.354396e+06
2023-10-10,142.60000,143.41500,141.72000,142.11000,3.015784e+06


In [54]:
stocks.index

DatetimeIndex(['1962-01-02', '1962-01-03', '1962-01-04', '1962-01-05',
               '1962-01-08', '1962-01-09', '1962-01-10', '1962-01-11',
               '1962-01-12', '1962-01-15',
               ...
               '2023-09-28', '2023-09-29', '2023-10-02', '2023-10-03',
               '2023-10-04', '2023-10-05', '2023-10-06', '2023-10-09',
               '2023-10-10', '2023-10-11'],
              dtype='datetime64[ns]', name='Date', length=15546, freq=None)

In [57]:
stocks.index + pd.DateOffset(days=5) # Cool add days to every one

DatetimeIndex(['1962-01-07', '1962-01-08', '1962-01-09', '1962-01-10',
               '1962-01-13', '1962-01-14', '1962-01-15', '1962-01-16',
               '1962-01-17', '1962-01-20',
               ...
               '2023-10-03', '2023-10-04', '2023-10-07', '2023-10-08',
               '2023-10-09', '2023-10-10', '2023-10-11', '2023-10-14',
               '2023-10-15', '2023-10-16'],
              dtype='datetime64[ns]', name='Date', length=15546, freq=None)

In [61]:
# Find the stock price every april 12th
pd.date_range(start="1991-04-12", end="2023-04-12", freq=pd.DateOffset(years=1))

DatetimeIndex(['1991-04-12', '1992-04-12', '1993-04-12', '1994-04-12',
               '1995-04-12', '1996-04-12', '1997-04-12', '1998-04-12',
               '1999-04-12', '2000-04-12', '2001-04-12', '2002-04-12',
               '2003-04-12', '2004-04-12', '2005-04-12', '2006-04-12',
               '2007-04-12', '2008-04-12', '2009-04-12', '2010-04-12',
               '2011-04-12', '2012-04-12', '2013-04-12', '2014-04-12',
               '2015-04-12', '2016-04-12', '2017-04-12', '2018-04-12',
               '2019-04-12', '2020-04-12', '2021-04-12', '2022-04-12',
               '2023-04-12'],
              dtype='datetime64[ns]', freq='<DateOffset: years=1>')

In [62]:
stocks[stocks.index.isin(pd.date_range(start="1991-04-12", end="2023-04-12", freq=pd.DateOffset(years=1)))]

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1991-04-12,18.1678,18.1678,17.6954,17.752,21645360.0
1993-04-12,8.305,8.48788,8.28665,8.34598,8518730.0
1994-04-12,8.79793,8.81911,8.59455,8.65281,11495080.0
1995-04-12,14.1607,14.3431,14.0933,14.2562,11586400.0
1996-04-12,18.8636,18.9241,18.0233,18.2517,47247830.0
1999-04-12,60.0758,60.1099,59.2716,60.0543,13666930.0
2000-04-12,78.3089,78.3089,73.1365,74.4931,13522950.0
2001-04-12,63.3393,64.0624,62.5165,63.2075,14290480.0
2002-04-12,57.5306,57.7941,55.4548,56.2414,24056150.0
2004-04-12,61.4342,61.9984,61.3659,61.5933,4736957.0


## Specialized Date Offsets

- Pandas nests more specialized date offsets in `pd.tseries.offsets`.
- We can add a different amount of time to each date (for example, month end, quarter end, year begin).

In [63]:
stocks = stocks_df.copy()
stocks

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1962-01-02,5.04610,5.04610,4.98716,4.98716,5.935630e+05
1962-01-03,4.98716,5.03292,4.98716,5.03292,4.451750e+05
1962-01-04,5.03292,5.03292,4.98052,4.98052,3.995136e+05
1962-01-05,4.97389,4.97389,4.87511,4.88166,5.593215e+05
1962-01-08,4.88166,4.88166,4.75059,4.78972,8.332738e+05
...,...,...,...,...,...
2023-10-05,140.90000,141.70000,140.19000,141.52000,3.223910e+06
2023-10-06,141.40000,142.94000,140.11000,142.03000,3.511347e+06
2023-10-09,142.30000,142.40000,140.68000,142.20000,2.354396e+06
2023-10-10,142.60000,143.41500,141.72000,142.11000,3.015784e+06


In [66]:
stocks.index + pd.tseries.offsets.MonthEnd()

DatetimeIndex(['1962-01-31', '1962-01-31', '1962-01-31', '1962-01-31',
               '1962-01-31', '1962-01-31', '1962-01-31', '1962-01-31',
               '1962-01-31', '1962-01-31',
               ...
               '2023-09-30', '2023-09-30', '2023-10-31', '2023-10-31',
               '2023-10-31', '2023-10-31', '2023-10-31', '2023-10-31',
               '2023-10-31', '2023-10-31'],
              dtype='datetime64[ns]', name='Date', length=15546, freq=None)

In [67]:
stocks.index + pd.tseries.offsets.QuarterBegin(startingMonth=1) # to next quarter

DatetimeIndex(['1962-04-01', '1962-04-01', '1962-04-01', '1962-04-01',
               '1962-04-01', '1962-04-01', '1962-04-01', '1962-04-01',
               '1962-04-01', '1962-04-01',
               ...
               '2023-10-01', '2023-10-01', '2024-01-01', '2024-01-01',
               '2024-01-01', '2024-01-01', '2024-01-01', '2024-01-01',
               '2024-01-01', '2024-01-01'],
              dtype='datetime64[ns]', name='Date', length=15546, freq=None)

In [69]:
stocks.index - pd.tseries.offsets.YearBegin()

DatetimeIndex(['1962-01-01', '1962-01-01', '1962-01-01', '1962-01-01',
               '1962-01-01', '1962-01-01', '1962-01-01', '1962-01-01',
               '1962-01-01', '1962-01-01',
               ...
               '2023-01-01', '2023-01-01', '2023-01-01', '2023-01-01',
               '2023-01-01', '2023-01-01', '2023-01-01', '2023-01-01',
               '2023-01-01', '2023-01-01'],
              dtype='datetime64[ns]', name='Date', length=15546, freq=None)

## Timedeltas

- A **Timedelta** is a pandas object that represents a duration (an amount of time).
- Subtracting two **Timestamp** objects will yield a **Timedelta** object (this applies to subtracting a **Series** from another **Series**).
- The **Timedelta** constructor accepts parameters for time as well as string descriptions.


In [70]:
stocks = stocks_df.copy()
stocks

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1962-01-02,5.04610,5.04610,4.98716,4.98716,5.935630e+05
1962-01-03,4.98716,5.03292,4.98716,5.03292,4.451750e+05
1962-01-04,5.03292,5.03292,4.98052,4.98052,3.995136e+05
1962-01-05,4.97389,4.97389,4.87511,4.88166,5.593215e+05
1962-01-08,4.88166,4.88166,4.75059,4.78972,8.332738e+05
...,...,...,...,...,...
2023-10-05,140.90000,141.70000,140.19000,141.52000,3.223910e+06
2023-10-06,141.40000,142.94000,140.11000,142.03000,3.511347e+06
2023-10-09,142.30000,142.40000,140.68000,142.20000,2.354396e+06
2023-10-10,142.60000,143.41500,141.72000,142.11000,3.015784e+06


In [72]:
pd.Timestamp("2023-03-31 12:30:48") - pd.Timestamp("2023-03-20 19:25:59")

Timedelta('10 days 17:04:49')

In [73]:
pd.Timestamp("2023-03-20 19:25:59") - pd.Timestamp("2023-03-31 12:30:48")

Timedelta('-11 days +06:55:11')

In [74]:
pd.Timedelta(days=3) + pd.Timestamp("2023-03-20")

Timestamp('2023-03-23 00:00:00')

In [75]:
stocks.index + pd.Timedelta(days=3)

DatetimeIndex(['1962-01-05', '1962-01-06', '1962-01-07', '1962-01-08',
               '1962-01-11', '1962-01-12', '1962-01-13', '1962-01-14',
               '1962-01-15', '1962-01-18',
               ...
               '2023-10-01', '2023-10-02', '2023-10-05', '2023-10-06',
               '2023-10-07', '2023-10-08', '2023-10-09', '2023-10-12',
               '2023-10-13', '2023-10-14'],
              dtype='datetime64[ns]', name='Date', length=15546, freq=None)

In [77]:
pd.Timedelta("3 days 2 hours 5 minutes") # jeez so human much magic wow

Timedelta('3 days 02:05:00')

In [81]:
ecommerce_df = pd.read_csv("ecommerce.csv", parse_dates=["order_date", "delivery_date"], date_format="%m/%d/%y", index_col="ID")
ecommerce = ecommerce_df.copy()
ecommerce

Unnamed: 0_level_0,order_date,delivery_date
ID,Unnamed: 1_level_1,Unnamed: 2_level_1
1,1998-05-24,1999-02-05
2,1992-04-22,1998-03-06
4,1991-02-10,1992-08-26
5,1992-07-21,1997-11-20
7,1993-09-02,1998-06-10
...,...,...
990,1991-06-24,1996-02-02
991,1991-09-09,1998-03-30
993,1990-11-16,1998-04-27
994,1993-06-03,1993-06-13


In [83]:
ecommerce["delivery_time"] = ecommerce["delivery_date"] - ecommerce["order_date"]

In [84]:
ecommerce

Unnamed: 0_level_0,order_date,delivery_date,delivery_time
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,1998-05-24,1999-02-05,257 days
2,1992-04-22,1998-03-06,2144 days
4,1991-02-10,1992-08-26,563 days
5,1992-07-21,1997-11-20,1948 days
7,1993-09-02,1998-06-10,1742 days
...,...,...,...
990,1991-06-24,1996-02-02,1684 days
991,1991-09-09,1998-03-30,2394 days
993,1990-11-16,1998-04-27,2719 days
994,1993-06-03,1993-06-13,10 days
