# Creating a DatetimeIndex

In [2]:
import pandas as pd
import numpy as np
import datetime as dt
import pytz

In [203]:
pd.__version__

'1.3.2'

### Python's `Datetime`, pandas `Timestamp`, and `panda.to_datetime()`

In [4]:
dt1 = dt.datetime(2021,1,1)
dt2 = pd.Timestamp(2021,1,1)
dt3 = pd.to_datetime('2021-1-1')

In [5]:
print(dt1)
print(dt2)
print(dt3)

2021-01-01 00:00:00
2021-01-01 00:00:00
2021-01-01 00:00:00


In [6]:
print(type(dt1))
print(type(dt2))
print(type(dt3))

<class 'datetime.datetime'>
<class 'pandas._libs.tslibs.timestamps.Timestamp'>
<class 'pandas._libs.tslibs.timestamps.Timestamp'>


In [7]:
dt1 == dt2 == dt3

True

In [8]:
dt.datetime(2021,1,1) == pd.to_datetime('2021-1-1')

True

### `pandas.to_datetime()`

In [9]:
dates = ['2021-1-1', '2021-1-2']
pd_dates = pd.to_datetime(dates)
print(pd_dates)
print(type(pd_dates))

DatetimeIndex(['2021-01-01', '2021-01-02'], dtype='datetime64[ns]', freq=None)
<class 'pandas.core.indexes.datetimes.DatetimeIndex'>


In [10]:
print(pd_dates[0])
print(type(pd_dates[0]))

2021-01-01 00:00:00
<class 'pandas._libs.tslibs.timestamps.Timestamp'>


In [15]:
dates = ['2021-01-01', # date str format %Y-%m-%d
         '2/1/2021', # date str format %m/%d/%Y
         '03-01-2021', # date  str format %m-%d-%Y
         'April 1, 2021', # date  str format %B %d, %Y
         '20210501', # date str format %Y%m%d
          np.datetime64('2021-07-01'), # numpy datetime64
          dt.datetime(2021, 8, 1), # python datetime
          pd.Timestamp(2021,9,1) # pandas Timestamp
          ]

In [16]:
parsed_dates = pd.to_datetime(
                 dates,
                 infer_datetime_format=True,
                 errors='coerce'
                 )

print(parsed_dates)

DatetimeIndex(['2021-01-01', '2021-02-01', '2021-03-01', '2021-04-01',
               '2021-05-01', '2021-07-01', '2021-08-01', '2021-09-01'],
              dtype='datetime64[ns]', freq=None)


In [17]:
pd.DatetimeIndex(dates) == parsed_dates

array([ True,  True,  True,  True,  True,  True,  True,  True])

In [18]:
parsed_dates = pd.DatetimeIndex(dates)
parsed_dates

DatetimeIndex(['2021-01-01', '2021-02-01', '2021-03-01', '2021-04-01',
               '2021-05-01', '2021-07-01', '2021-08-01', '2021-09-01'],
              dtype='datetime64[ns]', freq=None)

In [19]:
print(f'Name of Day : {parsed_dates.day_name()}')
print(f'Month : {parsed_dates.month}')
print(f'Year : {parsed_dates.year}')
print(f'Days in Month : {parsed_dates.days_in_month}')
print(f'Quarter {parsed_dates.quarter}')
print(f'Quarter Start : {parsed_dates.is_quarter_start}')
print(f'Leap Year : {parsed_dates.is_leap_year}')
print(f'Month Start : {parsed_dates.is_month_start}')
print(f'Month End : {parsed_dates.is_month_end}')
print(f'Year Start : {parsed_dates.is_year_start}')


Name of Day : Index(['Friday', 'Monday', 'Monday', 'Thursday', 'Saturday', 'Thursday',
       'Sunday', 'Wednesday'],
      dtype='object')
Month : Int64Index([1, 2, 3, 4, 5, 7, 8, 9], dtype='int64')
Year : Int64Index([2021, 2021, 2021, 2021, 2021, 2021, 2021, 2021], dtype='int64')
Days in Month : Int64Index([31, 28, 31, 30, 31, 31, 31, 30], dtype='int64')
Quarter Int64Index([1, 1, 1, 2, 2, 3, 3, 3], dtype='int64')
Quarter Start : [ True False False  True False  True False False]
Leap Year : [False False False False False False False False]
Month Start : [ True  True  True  True  True  True  True  True]
Month End : [False False False False False False False False]
Year Start : [ True False False False False False False False]


In [20]:
pd.date_range(start='2021-01-01', periods=3, freq='D')

DatetimeIndex(['2021-01-01', '2021-01-02', '2021-01-03'], dtype='datetime64[ns]', freq='D')

In [21]:
pd.date_range(start='2021-01-01',
               end='2021-01-03',
               freq='D')


DatetimeIndex(['2021-01-01', '2021-01-02', '2021-01-03'], dtype='datetime64[ns]', freq='D')

# Using Format Arguemtns to DateTime

In [22]:
import pandas as pd
import datetime as dt

Using Python `datetime.strptime()`

In [23]:
dt.datetime.strptime('1/1/2022', '%m/%d/%Y').date()

datetime.date(2022, 1, 1)

In [24]:
dt.datetime.strptime('1 January, 2022', '%d %B, %Y').date()

datetime.date(2022, 1, 1)

In [25]:
dt.datetime.strptime('1-Jan-2022', '%d-%b-%Y').date()

datetime.date(2022, 1, 1)

In [26]:
dt.datetime.strptime('Saturday, January 1, 2022', '%A, %B %d, %Y').date()

datetime.date(2022, 1, 1)

In [27]:
dt_1 = dt.datetime.strptime('1/1/2022', '%m/%d/%Y')
dt_1.__str__()
str(dt_1)

'2022-01-01 00:00:00'

In [28]:
print(dt_1)

2022-01-01 00:00:00


Using `pandas.to_datetime()`

In [29]:
pd.to_datetime('1/1/2022', format='%m/%d/%Y')

Timestamp('2022-01-01 00:00:00')

In [30]:
pd.to_datetime('1 January, 2022', format='%d %B, %Y')

Timestamp('2022-01-01 00:00:00')

In [31]:
pd.to_datetime('1-Jan-2022', format='%d-%b-%Y')

Timestamp('2022-01-01 00:00:00')

In [32]:
pd.to_datetime('Saturday, January 1, 2022', format='%A, %B %d, %Y')

Timestamp('2022-01-01 00:00:00')

In [33]:
dt_2 = pd.to_datetime('1/1/2022', format='%m/%d/%Y')
print(dt_2)

2022-01-01 00:00:00


In [34]:
pd.to_datetime('1-Jan-2022')

Timestamp('2022-01-01 00:00:00')

In [35]:
pd.to_datetime('Saturday, January 1, 2022')

Timestamp('2022-01-01 00:00:00')

In [36]:
str(dt_2)
dt_2.__str__()

'2022-01-01 00:00:00'

In [37]:
dt_1 == dt_2

True

In [38]:
dt_1.date()

datetime.date(2022, 1, 1)

In [39]:
dt_2.date()

datetime.date(2022, 1, 1)

In [40]:
type(dt_1)
type(dt_2)

pandas._libs.tslibs.timestamps.Timestamp

In [41]:
isinstance(pd.DatetimeIndex, pd.Timestamp)

False

In [42]:
isinstance(dt_1, dt.datetime)

True

In [43]:
isinstance(dt_2, dt.datetime)

True

In [44]:
isinstance(dt_1, pd.Timestamp)

False

In [45]:
isinstance(dt_2, pd.Timestamp)

True

In [46]:
issubclass(pd.Timestamp, dt.datetime)

True

### Tranforming a pandas DataFrame to a time series DataFrame

In [47]:
df = pd.DataFrame(
        {'Date': ['January 1, 2022', 'January 2, 2022', 'January 3, 2022'],
         'Sales': [23000, 19020, 21000]}
            )
df

Unnamed: 0,Date,Sales
0,"January 1, 2022",23000
1,"January 2, 2022",19020
2,"January 3, 2022",21000


In [48]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Date    3 non-null      object
 1   Sales   3 non-null      int64 
dtypes: int64(1), object(1)
memory usage: 176.0+ bytes


In [49]:
df['Date'] = pd.to_datetime(df['Date'])
df.set_index('Date', inplace=True)
df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 3 entries, 2022-01-01 to 2022-01-03
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   Sales   3 non-null      int64
dtypes: int64(1)
memory usage: 48.0 bytes


# Working with Unix EPOCH timestamps

In [72]:
import time
epoch_time = time.time()
print(epoch_time)
print(type(epoch_time))

1635220203.582277
<class 'float'>


In [184]:
import pandas as pd
t = pd.to_datetime(1635220133.855169, unit='s')
print(t)

2021-10-26 03:48:53.855169024


In [106]:
t = pd.to_datetime(1635220133.855169, unit='s', )

In [112]:
t.tz_localize('UTC').tz_convert('US/Pacific')

Timestamp('2021-10-25 20:48:53.855169024-0700', tz='US/Pacific')

In [168]:
df = pd.DataFrame(
        {'unix_epoch': [1641110340,  1641196740, 1641283140, 1641369540],
                'Sales': [23000, 19020, 21000, 17030]}
                )
df

Unnamed: 0,unix_epoch,Sales
0,1641110340,23000
1,1641196740,19020
2,1641283140,21000
3,1641369540,17030


In [181]:
df['Date'] = pd.to_datetime(df['unix_epoch'], unit='s')
df['Date'] = df['Date'].dt.tz_localize('UTC').dt.tz_convert('US/Pacific')
# df.set_index('Date', inplace=True)
df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 4 entries, 2022-01-01 23:59:00-08:00 to 2022-01-04 23:59:00-08:00
Data columns (total 3 columns):
 #   Column      Non-Null Count  Dtype                     
---  ------      --------------  -----                     
 0   unix_epoch  4 non-null      int64                     
 1   Sales       4 non-null      int64                     
 2   Date        4 non-null      datetime64[ns, US/Pacific]
dtypes: datetime64[ns, US/Pacific](1), int64(2)
memory usage: 128.0 bytes


In [180]:
df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 4 entries, 2022-01-01 23:59:00-08:00 to 2022-01-04 23:59:00-08:00
Data columns (total 2 columns):
 #   Column      Non-Null Count  Dtype
---  ------      --------------  -----
 0   unix_epoch  4 non-null      int64
 1   Sales       4 non-null      int64
dtypes: int64(2)
memory usage: 96.0 bytes


In [178]:
df.index = df.index.date
df

Unnamed: 0,unix_epoch,Sales
2022-01-01,1641110340,23000
2022-01-02,1641196740,19020
2022-01-03,1641283140,21000
2022-01-04,1641369540,17030


In [187]:
t = pd.to_datetime(1635220133.855169, unit='s', origin='unix')

In [195]:
df = pd.DataFrame(
        {'Date': pd.date_range('01-01-2022', periods=5),
        'order' : range(5)}
                 )
df

Unnamed: 0,Date,order
0,2022-01-01,0
1,2022-01-02,1
2,2022-01-03,2
3,2022-01-04,3
4,2022-01-05,4


In [197]:
(df['Date'] -  pd.Timestamp("1970-01-01")) // pd.Timedelta("1s")

0    1640995200
1    1641081600
2    1641168000
3    1641254400
4    1641340800
Name: Date, dtype: int64

# Working with Date Offsets and Time Deltas

In [201]:
df = pd.DataFrame(
        {       
                'sales': [300, 400, 500, 600, 700, 800],
                'date': pd.date_range('2021-01-01', periods=6, freq='D')
        }
)
df.set_index('date', inplace=True)
df

Unnamed: 0_level_0,sales
date,Unnamed: 1_level_1
2021-01-01,300
2021-01-02,400
2021-01-03,500
2021-01-04,600
2021-01-05,700
2021-01-06,800


In [208]:
df['td_1'] = df.index + pd.Timedelta(days=1)
df

Unnamed: 0_level_0,sales,td_1
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2021-01-03,300,2021-01-04
2021-01-04,400,2021-01-05
2021-01-05,500,2021-01-06
2021-01-06,600,2021-01-07
2021-01-07,700,2021-01-08
2021-01-08,800,2021-01-09


In [210]:
df['do_1'] =  df.index + pd.DateOffset(days=1)
df

Unnamed: 0_level_0,sales,td_1,do_1
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2021-01-03,300,2021-01-04,2021-01-04
2021-01-04,400,2021-01-05,2021-01-05
2021-01-05,500,2021-01-06,2021-01-06
2021-01-06,600,2021-01-07,2021-01-07
2021-01-07,700,2021-01-08,2021-01-08
2021-01-08,800,2021-01-09,2021-01-09


In [303]:
df = pd.DataFrame(
        {       
                'sales': [300, 400, 500, 600, 700, 800, 900, 1000, 800, 700],
                'date': pd.date_range('2021-01-01 00:00:00', periods=10, freq='D', tz='US/Pacific')
        }
)
#df.set_index('date', inplace=True)
df

Unnamed: 0,sales,date
0,300,2021-01-01 00:00:00-08:00
1,400,2021-01-02 00:00:00-08:00
2,500,2021-01-03 00:00:00-08:00
3,600,2021-01-04 00:00:00-08:00
4,700,2021-01-05 00:00:00-08:00
5,800,2021-01-06 00:00:00-08:00
6,900,2021-01-07 00:00:00-08:00
7,1000,2021-01-08 00:00:00-08:00
8,800,2021-01-09 00:00:00-08:00
9,700,2021-01-10 00:00:00-08:00


In [271]:
df['date'] - pd.Timedelta('1 day 5 hours 22 seconds')

0   2020-12-30 18:59:38-08:00
1   2020-12-31 18:59:38-08:00
2   2021-01-01 18:59:38-08:00
3   2021-01-02 18:59:38-08:00
4   2021-01-03 18:59:38-08:00
5   2021-01-04 18:59:38-08:00
6   2021-01-05 18:59:38-08:00
7   2021-01-06 18:59:38-08:00
8   2021-01-07 18:59:38-08:00
9   2021-01-08 18:59:38-08:00
Name: date, dtype: datetime64[ns, US/Pacific]

In [278]:
df['date'] - pd.DateOffset(days=1, hours=5, seconds=22)

0   2020-12-30 18:59:38-08:00
1   2020-12-31 18:59:38-08:00
2   2021-01-01 18:59:38-08:00
3   2021-01-02 18:59:38-08:00
4   2021-01-03 18:59:38-08:00
5   2021-01-04 18:59:38-08:00
6   2021-01-05 18:59:38-08:00
7   2021-01-06 18:59:38-08:00
8   2021-01-07 18:59:38-08:00
9   2021-01-08 18:59:38-08:00
Name: date, dtype: datetime64[ns, US/Pacific]

In [304]:
df['day_name'] = df['date'].dt.day_name() 
df['business_d'] = df['date'] + pd.offsets.BDay(0)
df['day_name_b'] = df['business_d'].dt.day_name()
df

Unnamed: 0,sales,date,day_name,business_d,day_name_b
0,300,2021-01-01 00:00:00-08:00,Friday,2021-01-01 00:00:00-08:00,Friday
1,400,2021-01-02 00:00:00-08:00,Saturday,2021-01-04 00:00:00-08:00,Monday
2,500,2021-01-03 00:00:00-08:00,Sunday,2021-01-04 00:00:00-08:00,Monday
3,600,2021-01-04 00:00:00-08:00,Monday,2021-01-04 00:00:00-08:00,Monday
4,700,2021-01-05 00:00:00-08:00,Tuesday,2021-01-05 00:00:00-08:00,Tuesday
5,800,2021-01-06 00:00:00-08:00,Wednesday,2021-01-06 00:00:00-08:00,Wednesday
6,900,2021-01-07 00:00:00-08:00,Thursday,2021-01-07 00:00:00-08:00,Thursday
7,1000,2021-01-08 00:00:00-08:00,Friday,2021-01-08 00:00:00-08:00,Friday
8,800,2021-01-09 00:00:00-08:00,Saturday,2021-01-11 00:00:00-08:00,Monday
9,700,2021-01-10 00:00:00-08:00,Sunday,2021-01-11 00:00:00-08:00,Monday


In [330]:
from pandas.tseries.holiday import USFederalHolidayCalendar as usholiday

In [331]:
bday_us = pd.offsets.CustomBusinessDay(n=1, calendar=usholiday())

In [333]:
df['hol_offset'] = (df['date'] + bday_us)
df['day_name_h'] =  df['hol_offset'].dt.day_name()
df



Unnamed: 0,sales,date,day_name,business_d,day_name_b,hol_offset,day_name_h
0,300,2021-01-01 00:00:00-08:00,Friday,2021-01-01 00:00:00-08:00,Friday,2021-01-04 00:00:00-08:00,Monday
1,400,2021-01-02 00:00:00-08:00,Saturday,2021-01-04 00:00:00-08:00,Monday,2021-01-04 00:00:00-08:00,Monday
2,500,2021-01-03 00:00:00-08:00,Sunday,2021-01-04 00:00:00-08:00,Monday,2021-01-04 00:00:00-08:00,Monday
3,600,2021-01-04 00:00:00-08:00,Monday,2021-01-04 00:00:00-08:00,Monday,2021-01-05 00:00:00-08:00,Tuesday
4,700,2021-01-05 00:00:00-08:00,Tuesday,2021-01-05 00:00:00-08:00,Tuesday,2021-01-06 00:00:00-08:00,Wednesday
5,800,2021-01-06 00:00:00-08:00,Wednesday,2021-01-06 00:00:00-08:00,Wednesday,2021-01-07 00:00:00-08:00,Thursday
6,900,2021-01-07 00:00:00-08:00,Thursday,2021-01-07 00:00:00-08:00,Thursday,2021-01-08 00:00:00-08:00,Friday
7,1000,2021-01-08 00:00:00-08:00,Friday,2021-01-08 00:00:00-08:00,Friday,2021-01-11 00:00:00-08:00,Monday
8,800,2021-01-09 00:00:00-08:00,Saturday,2021-01-11 00:00:00-08:00,Monday,2021-01-11 00:00:00-08:00,Monday
9,700,2021-01-10 00:00:00-08:00,Sunday,2021-01-11 00:00:00-08:00,Monday,2021-01-11 00:00:00-08:00,Monday
