In [196]:
from datetime import datetime, timedelta
import pandas as pd
import numpy as np

In [197]:
datetime_obj = datetime(year=2022, month=11, day=20, hour=16, minute=13, second=15)

print(datetime_obj)

2022-11-20 16:13:15


In [198]:
print(datetime_obj + timedelta(days=365))

2023-11-20 16:13:15


In [199]:
np.datetime64("2023-07-23")

numpy.datetime64('2023-07-23')

In [200]:
np.array(["2023-06-21", datetime.now()], dtype=np.datetime64)

array(['2023-06-21T00:00:00.000000', '2023-08-01T07:13:12.248661'],
      dtype='datetime64[us]')

In [201]:
# convert below string to datetime data type
given_string = "4th of July 1776"
pd.to_datetime(given_string, format="%dth of %B %Y")

Timestamp('1776-07-04 00:00:00')

In [202]:
pd.Timestamp(given_string)

Timestamp('1776-07-04 00:00:00')

In [203]:
pd.to_datetime("4/7/1776", format="%d/%m/%Y")

Timestamp('1776-07-04 00:00:00')

In [204]:
df = pd.read_csv("https://andybek.com/pandas-oil")
df

Unnamed: 0,Date,Price
0,04-Jan-00,23.95
1,05-Jan-00,23.72
2,06-Jan-00,23.55
3,07-Jan-00,23.35
4,10-Jan-00,22.77
...,...,...
5011,24-Sep-19,64.13
5012,25-Sep-19,62.41
5013,26-Sep-19,62.08
5014,27-Sep-19,62.48


In [205]:
df.info(memory_usage="deep")

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5016 entries, 0 to 5015
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Date    5016 non-null   object 
 1   Price   5016 non-null   float64
dtypes: float64(1), object(1)
memory usage: 362.6 KB


In [206]:
df["Date"] = pd.to_datetime(df["Date"], format="%d-%b-%y")
df

Unnamed: 0,Date,Price
0,2000-01-04,23.95
1,2000-01-05,23.72
2,2000-01-06,23.55
3,2000-01-07,23.35
4,2000-01-10,22.77
...,...,...
5011,2019-09-24,64.13
5012,2019-09-25,62.41
5013,2019-09-26,62.08
5014,2019-09-27,62.48


In [207]:
df.info(memory_usage="deep")

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5016 entries, 0 to 5015
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   Date    5016 non-null   datetime64[ns]
 1   Price   5016 non-null   float64       
dtypes: datetime64[ns](1), float64(1)
memory usage: 78.5 KB


In [208]:
df.set_index(keys="Date", inplace=True)

In [209]:
df

Unnamed: 0_level_0,Price
Date,Unnamed: 1_level_1
2000-01-04,23.95
2000-01-05,23.72
2000-01-06,23.55
2000-01-07,23.35
2000-01-10,22.77
...,...
2019-09-24,64.13
2019-09-25,62.41
2019-09-26,62.08
2019-09-27,62.48


In [210]:
df.index

DatetimeIndex(['2000-01-04', '2000-01-05', '2000-01-06', '2000-01-07',
               '2000-01-10', '2000-01-11', '2000-01-12', '2000-01-13',
               '2000-01-14', '2000-01-17',
               ...
               '2019-09-17', '2019-09-18', '2019-09-19', '2019-09-20',
               '2019-09-23', '2019-09-24', '2019-09-25', '2019-09-26',
               '2019-09-27', '2019-09-30'],
              dtype='datetime64[ns]', name='Date', length=5016, freq=None)

In [211]:
# find mean brent price for all leap years in our dataset
df.loc[(df.index.year % 4 == 0) & (df.index.month == 2)].mean()

Price    60.696634
dtype: float64

In [212]:
df.loc[(df.index.is_leap_year) & (df.index.month == 2)].mean()

Price    60.696634
dtype: float64

In [213]:
pd.date_range(start="2020-01-20", periods=10, freq="M")

DatetimeIndex(['2020-01-31', '2020-02-29', '2020-03-31', '2020-04-30',
               '2020-05-31', '2020-06-30', '2020-07-31', '2020-08-31',
               '2020-09-30', '2020-10-31'],
              dtype='datetime64[ns]', freq='M')

In [214]:
pd.date_range(start="2020-01-20", periods=10, freq="MS")

DatetimeIndex(['2020-02-01', '2020-03-01', '2020-04-01', '2020-05-01',
               '2020-06-01', '2020-07-01', '2020-08-01', '2020-09-01',
               '2020-10-01', '2020-11-01'],
              dtype='datetime64[ns]', freq='MS')

In [215]:
pd.date_range(start="2020-01-20", periods=10, freq="SM")

DatetimeIndex(['2020-01-31', '2020-02-15', '2020-02-29', '2020-03-15',
               '2020-03-31', '2020-04-15', '2020-04-30', '2020-05-15',
               '2020-05-31', '2020-06-15'],
              dtype='datetime64[ns]', freq='SM-15')

In [216]:
pd.date_range(start="2020-01-20", periods=10, freq="W")

DatetimeIndex(['2020-01-26', '2020-02-02', '2020-02-09', '2020-02-16',
               '2020-02-23', '2020-03-01', '2020-03-08', '2020-03-15',
               '2020-03-22', '2020-03-29'],
              dtype='datetime64[ns]', freq='W-SUN')

In [217]:
pd.date_range(end="2021 Jan", periods=20, freq="M")

DatetimeIndex(['2019-05-31', '2019-06-30', '2019-07-31', '2019-08-31',
               '2019-09-30', '2019-10-31', '2019-11-30', '2019-12-31',
               '2020-01-31', '2020-02-29', '2020-03-31', '2020-04-30',
               '2020-05-31', '2020-06-30', '2020-07-31', '2020-08-31',
               '2020-09-30', '2020-10-31', '2020-11-30', '2020-12-31'],
              dtype='datetime64[ns]', freq='M')

In [218]:
pd.date_range(end="2021-01", periods=20, freq="M")

DatetimeIndex(['2019-05-31', '2019-06-30', '2019-07-31', '2019-08-31',
               '2019-09-30', '2019-10-31', '2019-11-30', '2019-12-31',
               '2020-01-31', '2020-02-29', '2020-03-31', '2020-04-30',
               '2020-05-31', '2020-06-30', '2020-07-31', '2020-08-31',
               '2020-09-30', '2020-10-31', '2020-11-30', '2020-12-31'],
              dtype='datetime64[ns]', freq='M')

In [219]:
# date of birth for a random pigeon
dob = pd.Timestamp("2020-03-09")
dob

Timestamp('2020-03-09 00:00:00')

In [220]:
dob - 18

TypeError: Addition/subtraction of integers and integer-arrays with Timestamp is no longer supported.  Instead of adding/subtracting `n`, use `n * obj.freq`

In [None]:
dob - pd.DateOffset(days=18)

Timestamp('2020-02-20 00:00:00')

In [None]:
pd.DateOffset(days=18)

<DateOffset: days=18>

In [None]:
df.index + pd.Timedelta(days=3, hours=4)

DatetimeIndex(['2000-01-07 04:00:00', '2000-01-08 04:00:00',
               '2000-01-09 04:00:00', '2000-01-10 04:00:00',
               '2000-01-13 04:00:00', '2000-01-14 04:00:00',
               '2000-01-15 04:00:00', '2000-01-16 04:00:00',
               '2000-01-17 04:00:00', '2000-01-20 04:00:00',
               ...
               '2019-09-20 04:00:00', '2019-09-21 04:00:00',
               '2019-09-22 04:00:00', '2019-09-23 04:00:00',
               '2019-09-26 04:00:00', '2019-09-27 04:00:00',
               '2019-09-28 04:00:00', '2019-09-29 04:00:00',
               '2019-09-30 04:00:00', '2019-10-03 04:00:00'],
              dtype='datetime64[ns]', name='Date', length=5016, freq=None)

In [None]:
df.index + pd.DateOffset(days=3, hours=4)

DatetimeIndex(['2000-01-07 04:00:00', '2000-01-08 04:00:00',
               '2000-01-09 04:00:00', '2000-01-10 04:00:00',
               '2000-01-13 04:00:00', '2000-01-14 04:00:00',
               '2000-01-15 04:00:00', '2000-01-16 04:00:00',
               '2000-01-17 04:00:00', '2000-01-20 04:00:00',
               ...
               '2019-09-20 04:00:00', '2019-09-21 04:00:00',
               '2019-09-22 04:00:00', '2019-09-23 04:00:00',
               '2019-09-26 04:00:00', '2019-09-27 04:00:00',
               '2019-09-28 04:00:00', '2019-09-29 04:00:00',
               '2019-09-30 04:00:00', '2019-10-03 04:00:00'],
              dtype='datetime64[ns]', name='Date', length=5016, freq=None)

In [None]:
# DateOffset -> calendar aware e.g. daylight saving
# TimeDelta -> absolute time

In [None]:
df.index.tz_localize("US/Eastern")

DatetimeIndex(['2000-01-04 00:00:00-05:00', '2000-01-05 00:00:00-05:00',
               '2000-01-06 00:00:00-05:00', '2000-01-07 00:00:00-05:00',
               '2000-01-10 00:00:00-05:00', '2000-01-11 00:00:00-05:00',
               '2000-01-12 00:00:00-05:00', '2000-01-13 00:00:00-05:00',
               '2000-01-14 00:00:00-05:00', '2000-01-17 00:00:00-05:00',
               ...
               '2019-09-17 00:00:00-04:00', '2019-09-18 00:00:00-04:00',
               '2019-09-19 00:00:00-04:00', '2019-09-20 00:00:00-04:00',
               '2019-09-23 00:00:00-04:00', '2019-09-24 00:00:00-04:00',
               '2019-09-25 00:00:00-04:00', '2019-09-26 00:00:00-04:00',
               '2019-09-27 00:00:00-04:00', '2019-09-30 00:00:00-04:00'],
              dtype='datetime64[ns, US/Eastern]', name='Date', length=5016, freq=None)

In [None]:
df.index.tz_localize("US/Eastern").tz

<DstTzInfo 'US/Eastern' LMT-1 day, 19:04:00 STD>

In [None]:
df.index.tz == None

True

In [None]:
a = pd.Timestamp("2021-03-14")

In [None]:
a = a.tz_localize("US/Eastern")
a.tz

<DstTzInfo 'US/Eastern' EST-1 day, 19:00:00 STD>

In [None]:
a

Timestamp('2021-03-14 00:00:00-0500', tz='US/Eastern')

In [None]:
a + pd.DateOffset(days=1) # moves forward 23 hours taking into account daylight savings

Timestamp('2021-03-15 00:00:00-0400', tz='US/Eastern')

In [None]:
a + pd.Timedelta(days=1) # moves forward 24 hours absolutely - isn't calendar aware

Timestamp('2021-03-15 01:00:00-0400', tz='US/Eastern')