In [294]:
from datetime import datetime, timedelta
import pandas as pd
import numpy as np

In [295]:
datetime_obj = datetime(year=2022, month=11, day=20, hour=16, minute=13, second=15)

print(datetime_obj)

2022-11-20 16:13:15


In [296]:
print(datetime_obj + timedelta(days=365))

2023-11-20 16:13:15


In [297]:
np.datetime64("2023-07-23")

numpy.datetime64('2023-07-23')

In [298]:
np.array(["2023-06-21", datetime.now()], dtype=np.datetime64)

array(['2023-06-21T00:00:00.000000', '2023-08-02T18:54:17.986212'],
      dtype='datetime64[us]')

In [299]:
# convert below string to datetime data type
given_string = "4th of July 1776"
pd.to_datetime(given_string, format="%dth of %B %Y")

Timestamp('1776-07-04 00:00:00')

In [300]:
pd.Timestamp(given_string)

Timestamp('1776-07-04 00:00:00')

In [301]:
pd.to_datetime("4/7/1776", format="%d/%m/%Y")

Timestamp('1776-07-04 00:00:00')

In [302]:
df = pd.read_csv("https://andybek.com/pandas-oil")
df

Unnamed: 0,Date,Price
0,04-Jan-00,23.95
1,05-Jan-00,23.72
2,06-Jan-00,23.55
3,07-Jan-00,23.35
4,10-Jan-00,22.77
...,...,...
5011,24-Sep-19,64.13
5012,25-Sep-19,62.41
5013,26-Sep-19,62.08
5014,27-Sep-19,62.48


In [303]:
df.info(memory_usage="deep")

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5016 entries, 0 to 5015
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Date    5016 non-null   object 
 1   Price   5016 non-null   float64
dtypes: float64(1), object(1)
memory usage: 362.6 KB


In [304]:
df["Date"] = pd.to_datetime(df["Date"], format="%d-%b-%y")
df

Unnamed: 0,Date,Price
0,2000-01-04,23.95
1,2000-01-05,23.72
2,2000-01-06,23.55
3,2000-01-07,23.35
4,2000-01-10,22.77
...,...,...
5011,2019-09-24,64.13
5012,2019-09-25,62.41
5013,2019-09-26,62.08
5014,2019-09-27,62.48


In [305]:
df.info(memory_usage="deep")

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5016 entries, 0 to 5015
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   Date    5016 non-null   datetime64[ns]
 1   Price   5016 non-null   float64       
dtypes: datetime64[ns](1), float64(1)
memory usage: 78.5 KB


In [306]:
df.set_index(keys="Date", inplace=True)

In [307]:
df

Unnamed: 0_level_0,Price
Date,Unnamed: 1_level_1
2000-01-04,23.95
2000-01-05,23.72
2000-01-06,23.55
2000-01-07,23.35
2000-01-10,22.77
...,...
2019-09-24,64.13
2019-09-25,62.41
2019-09-26,62.08
2019-09-27,62.48


In [308]:
df.index

DatetimeIndex(['2000-01-04', '2000-01-05', '2000-01-06', '2000-01-07',
               '2000-01-10', '2000-01-11', '2000-01-12', '2000-01-13',
               '2000-01-14', '2000-01-17',
               ...
               '2019-09-17', '2019-09-18', '2019-09-19', '2019-09-20',
               '2019-09-23', '2019-09-24', '2019-09-25', '2019-09-26',
               '2019-09-27', '2019-09-30'],
              dtype='datetime64[ns]', name='Date', length=5016, freq=None)

In [309]:
df["day_name"] = df.index.day_name()
df.groupby("day_name").mean().idxmin()

Price    Monday
dtype: object

In [310]:
df.groupby("day_name").mean().idxmin()["Price"]

'Monday'

In [311]:
df.drop(columns="day_name", inplace=True)
df

Unnamed: 0_level_0,Price
Date,Unnamed: 1_level_1
2000-01-04,23.95
2000-01-05,23.72
2000-01-06,23.55
2000-01-07,23.35
2000-01-10,22.77
...,...
2019-09-24,64.13
2019-09-25,62.41
2019-09-26,62.08
2019-09-27,62.48


In [312]:
# find mean brent price for all leap years in our dataset
df.loc[(df.index.year % 4 == 0) & (df.index.month == 2)].mean()

Price    60.696634
dtype: float64

In [313]:
df.loc[(df.index.is_leap_year) & (df.index.month_name() == "February")].mean()

Price    60.696634
dtype: float64

In [314]:
a = df.reset_index()
a.loc[(a["Date"].dt.is_leap_year) & (a["Date"].dt.month == 2)]["Price"].mean()

60.696633663366335

In [315]:
pd.date_range(start="2020-01-20", periods=10, freq="M")

DatetimeIndex(['2020-01-31', '2020-02-29', '2020-03-31', '2020-04-30',
               '2020-05-31', '2020-06-30', '2020-07-31', '2020-08-31',
               '2020-09-30', '2020-10-31'],
              dtype='datetime64[ns]', freq='M')

In [316]:
pd.date_range(start="2020-01-20", periods=10, freq="3S")

DatetimeIndex(['2020-01-20 00:00:00', '2020-01-20 00:00:03',
               '2020-01-20 00:00:06', '2020-01-20 00:00:09',
               '2020-01-20 00:00:12', '2020-01-20 00:00:15',
               '2020-01-20 00:00:18', '2020-01-20 00:00:21',
               '2020-01-20 00:00:24', '2020-01-20 00:00:27'],
              dtype='datetime64[ns]', freq='3S')

In [317]:
pd.date_range(end="2020-01-20", periods=10, freq="SM")

DatetimeIndex(['2019-08-31', '2019-09-15', '2019-09-30', '2019-10-15',
               '2019-10-31', '2019-11-15', '2019-11-30', '2019-12-15',
               '2019-12-31', '2020-01-15'],
              dtype='datetime64[ns]', freq='SM-15')

In [318]:
pd.date_range(start="2020-01-20", periods=10, freq="W")

DatetimeIndex(['2020-01-26', '2020-02-02', '2020-02-09', '2020-02-16',
               '2020-02-23', '2020-03-01', '2020-03-08', '2020-03-15',
               '2020-03-22', '2020-03-29'],
              dtype='datetime64[ns]', freq='W-SUN')

In [319]:
pd.date_range(end="2021 Jan", periods=20, freq="M")

DatetimeIndex(['2019-05-31', '2019-06-30', '2019-07-31', '2019-08-31',
               '2019-09-30', '2019-10-31', '2019-11-30', '2019-12-31',
               '2020-01-31', '2020-02-29', '2020-03-31', '2020-04-30',
               '2020-05-31', '2020-06-30', '2020-07-31', '2020-08-31',
               '2020-09-30', '2020-10-31', '2020-11-30', '2020-12-31'],
              dtype='datetime64[ns]', freq='M')

In [320]:
pd.date_range(end="2021-01", periods=20, freq="M")

DatetimeIndex(['2019-05-31', '2019-06-30', '2019-07-31', '2019-08-31',
               '2019-09-30', '2019-10-31', '2019-11-30', '2019-12-31',
               '2020-01-31', '2020-02-29', '2020-03-31', '2020-04-30',
               '2020-05-31', '2020-06-30', '2020-07-31', '2020-08-31',
               '2020-09-30', '2020-10-31', '2020-11-30', '2020-12-31'],
              dtype='datetime64[ns]', freq='M')

In [321]:
# date of birth for a random pigeon
dob = pd.Timestamp("2020-03-09")
dob

Timestamp('2020-03-09 00:00:00')

In [322]:
dob - pd.Timedelta(days=18)

Timestamp('2020-02-20 00:00:00')

In [323]:
dob - pd.DateOffset(days=18)

Timestamp('2020-02-20 00:00:00')

In [324]:
pd.DateOffset(years=18)

<DateOffset: years=18>

In [325]:
pd.DateOffset(year=18)

<DateOffset: year=18>

In [326]:
df.index + pd.Timedelta(hours=18)

DatetimeIndex(['2000-01-04 18:00:00', '2000-01-05 18:00:00',
               '2000-01-06 18:00:00', '2000-01-07 18:00:00',
               '2000-01-10 18:00:00', '2000-01-11 18:00:00',
               '2000-01-12 18:00:00', '2000-01-13 18:00:00',
               '2000-01-14 18:00:00', '2000-01-17 18:00:00',
               ...
               '2019-09-17 18:00:00', '2019-09-18 18:00:00',
               '2019-09-19 18:00:00', '2019-09-20 18:00:00',
               '2019-09-23 18:00:00', '2019-09-24 18:00:00',
               '2019-09-25 18:00:00', '2019-09-26 18:00:00',
               '2019-09-27 18:00:00', '2019-09-30 18:00:00'],
              dtype='datetime64[ns]', name='Date', length=5016, freq=None)

In [None]:
df.index + pd.DateOffset(days=3, hours=4)

DatetimeIndex(['2000-01-07 04:00:00', '2000-01-08 04:00:00',
               '2000-01-09 04:00:00', '2000-01-10 04:00:00',
               '2000-01-13 04:00:00', '2000-01-14 04:00:00',
               '2000-01-15 04:00:00', '2000-01-16 04:00:00',
               '2000-01-17 04:00:00', '2000-01-20 04:00:00',
               ...
               '2019-09-20 04:00:00', '2019-09-21 04:00:00',
               '2019-09-22 04:00:00', '2019-09-23 04:00:00',
               '2019-09-26 04:00:00', '2019-09-27 04:00:00',
               '2019-09-28 04:00:00', '2019-09-29 04:00:00',
               '2019-09-30 04:00:00', '2019-10-03 04:00:00'],
              dtype='datetime64[ns]', name='Date', length=5016, freq=None)

In [None]:
# DateOffset -> calendar aware e.g. daylight saving
# TimeDelta -> absolute time

In [None]:
df.index.tz_localize("US/Eastern")

DatetimeIndex(['2000-01-04 00:00:00-05:00', '2000-01-05 00:00:00-05:00',
               '2000-01-06 00:00:00-05:00', '2000-01-07 00:00:00-05:00',
               '2000-01-10 00:00:00-05:00', '2000-01-11 00:00:00-05:00',
               '2000-01-12 00:00:00-05:00', '2000-01-13 00:00:00-05:00',
               '2000-01-14 00:00:00-05:00', '2000-01-17 00:00:00-05:00',
               ...
               '2019-09-17 00:00:00-04:00', '2019-09-18 00:00:00-04:00',
               '2019-09-19 00:00:00-04:00', '2019-09-20 00:00:00-04:00',
               '2019-09-23 00:00:00-04:00', '2019-09-24 00:00:00-04:00',
               '2019-09-25 00:00:00-04:00', '2019-09-26 00:00:00-04:00',
               '2019-09-27 00:00:00-04:00', '2019-09-30 00:00:00-04:00'],
              dtype='datetime64[ns, US/Eastern]', name='Date', length=5016, freq=None)

In [None]:
df.index.tz_localize("US/Eastern").tz

<DstTzInfo 'US/Eastern' LMT-1 day, 19:04:00 STD>

In [None]:
df.index.tz == None

True

In [None]:
a = pd.Timestamp("2021-03-14")

In [None]:
a = a.tz_localize("US/Eastern")
a.tz

<DstTzInfo 'US/Eastern' EST-1 day, 19:00:00 STD>

In [None]:
a

Timestamp('2021-03-14 00:00:00-0500', tz='US/Eastern')

In [None]:
a + pd.DateOffset(days=1) # moves forward 23 hours taking into account daylight savings

Timestamp('2021-03-15 00:00:00-0400', tz='US/Eastern')

In [None]:
a + pd.Timedelta(days=1) # moves forward 24 hours absolutely - isn't calendar aware

Timestamp('2021-03-15 01:00:00-0400', tz='US/Eastern')

In [327]:
# computing difference between two numpy date objects (creative approach)

np.datetime64("2022-12-11 12:00:00") - np.datetime64("2022-12-11 00:00:00") # by default it's seconds

numpy.timedelta64(43200,'s')

In [329]:
(np.datetime64("2022-12-11 12:00:00") - np.datetime64("2022-12-11 00:00:00")) / np.timedelta64(1, "h")

12.0

In [331]:
(np.datetime64("2022-12-11 12:00:00") - np.datetime64("2022-12-11 00:00:00")) / np.timedelta64(1, "D")

0.5

In [332]:
(np.datetime64("2022-12-11 12:00:00") - np.datetime64("2022-12-11 00:00:00")) / np.timedelta64(1, "m")

720.0

In [333]:
np.datetime64("2022") + np.timedelta64(20, "D")

numpy.datetime64('2022-01-21')

In [334]:
ts = pd.Timestamp(2022, 12, 9, 15)
ts

Timestamp('2022-12-09 15:00:00')

In [335]:
ts.day_name()

'Friday'

In [336]:
ts + pd.offsets.BusinessDay(n=5)

Timestamp('2022-12-16 15:00:00')

In [337]:
(ts + pd.offsets.BusinessDay(n=5)).day_name()

'Friday'

In [349]:
(pd.Timestamp(1234567891011121314) - pd.Timestamp(12345678910111213))

Timedelta('14146 days 02:10:12.101010101')

In [350]:
(pd.Timestamp(1234567891011121314) - pd.Timestamp(12345678910111213)) / np.timedelta64(1, "W")

2020.8700596908236

In [353]:
(pd.to_datetime(1234567891011121314) - pd.to_datetime(12345678910111213)).dt.weeks

AttributeError: 'Timedelta' object has no attribute 'dt'

In [355]:
ser = pd.Series(data=[pd.to_datetime(1234567891011121314), pd.to_datetime(1234567891011121314)])
ser

0   2009-02-13 23:31:31.011121314
1   2009-02-13 23:31:31.011121314
dtype: datetime64[ns]

In [365]:
(ser - pd.to_datetime(12345678910111213)).dt.year

AttributeError: 'TimedeltaProperties' object has no attribute 'year'