# Creating a DatetimeIndex

In [9]:
import pandas as pd
import numpy as np
import datetime as dt
import pytz

In [51]:
pd.__version__

'1.3.2'

### Python's `Datetime`, pandas `Timestamp`, and `panda.to_datetime()`

In [66]:
dt1 = dt.datetime(2021,1,1)
dt2 = pd.Timestamp(2021,1,1)
dt3 = pd.to_datetime('2021-1-1')

In [89]:
print(dt1)
print(dt2)
print(dt3)

2021-01-01 00:00:00
2021-01-01 00:00:00
2021-01-01 00:00:00


In [68]:
print(type(dt1))
print(type(dt2))
print(type(dt3))

<class 'datetime.datetime'>
<class 'pandas._libs.tslibs.timestamps.Timestamp'>
<class 'pandas._libs.tslibs.timestamps.Timestamp'>


In [74]:
dt1 == dt2 == dt3

True

In [61]:
dt.datetime(2021,1,1) == pd.to_datetime('2021-1-1')

True

### `pandas.to_datetime()`

In [93]:
dates = ['2021-1-1', '2021-1-2']
pd_dates = pd.to_datetime(dates)
print(pd_dates)
print(type(pd_dates))

DatetimeIndex(['2021-01-01', '2021-01-02'], dtype='datetime64[ns]', freq=None)
<class 'pandas.core.indexes.datetimes.DatetimeIndex'>


In [94]:
print(pd_dates[0])
print(type(pd_dates[0]))

2021-01-01 00:00:00
<class 'pandas._libs.tslibs.timestamps.Timestamp'>


In [101]:
dates = ['2021-01-01', # date str format %Y-%m-%d
         '2/1/2021', # date str format %m/%d/%Y
         '03-01-2021', # date  str format %m-%d-%Y
         'April 1, 2021', # date  str format %B %d, %Y
         '20210501', # date str format %Y%m%d
          np.datetime64('2021-07-01'), # numpy datetime64
          datetime.datetime(2021, 8, 1), # python datetime
          pd.Timestamp(2021,9,1) # pandas Timestamp
          ]

In [104]:
parsed_dates = pd.to_datetime(
                 dates,
                 infer_datetime_format=True,
                 errors='coerce'
                 )

print(parsed_dates)

DatetimeIndex(['2021-01-01', '2021-02-01', '2021-03-01', '2021-04-01',
               '2021-05-01', '2021-07-01', '2021-08-01', '2021-09-01'],
              dtype='datetime64[ns]', freq=None)


In [105]:
pd.DatetimeIndex(dates) == parsed_dates

array([ True,  True,  True,  True,  True,  True,  True,  True])

In [15]:
parsed_dates = pd.DatetimeIndex(dates)
parsed_dates

DatetimeIndex(['2021-01-01', '2021-02-01', '2021-03-01', '2021-04-01',
               '2021-05-01', '2021-07-01', '2021-08-01'],
              dtype='datetime64[ns]', freq=None)

In [121]:
print(f'Name of Day : {parsed_dates.day_name()}')
print(f'Month : {parsed_dates.month}')
print(f'Year : {parsed_dates.year}')
print(f'Days in Month : {parsed_dates.days_in_month}')
print(f'Quarter {parsed_dates.quarter}')
print(f'Quarter Start : {parsed_dates.is_quarter_start}')
print(f'Leap Year : {parsed_dates.is_leap_year}')
print(f'Month Start : {parsed_dates.is_month_start}')
print(f'Month End : {parsed_dates.is_month_end}')
print(f'Year Start : {parsed_dates.is_year_start}')


Name of Day : Index(['Friday', 'Monday', 'Monday', 'Thursday', 'Saturday', 'Thursday',
       'Sunday', 'Wednesday'],
      dtype='object')
Month : Int64Index([1, 2, 3, 4, 5, 7, 8, 9], dtype='int64')
Year : Int64Index([2021, 2021, 2021, 2021, 2021, 2021, 2021, 2021], dtype='int64')
Days in Month : Int64Index([31, 28, 31, 30, 31, 31, 31, 30], dtype='int64')
Quarter Int64Index([1, 1, 1, 2, 2, 3, 3, 3], dtype='int64')
Quarter Start : [ True False False  True False  True False False]
Leap Year : [False False False False False False False False]
Month Start : [ True  True  True  True  True  True  True  True]
Month End : [False False False False False False False False]
Year Start : [ True False False False False False False False]


In [19]:
pd.date_range(start='2021-01-01', periods=3, freq='D')

DatetimeIndex(['2021-01-01', '2021-01-02', '2021-01-03'], dtype='datetime64[ns]', freq='D')

In [20]:
pd.date_range(start='2021-01-01',
               end='2021-01-03',
               freq='D')


DatetimeIndex(['2021-01-01', '2021-01-02', '2021-01-03'], dtype='datetime64[ns]', freq='D')

# Using Format Arguemtns to DateTime

In [14]:
import pandas as pd
import datetime as dt

Using Python `datetime.strptime()`

In [45]:
dt.datetime.strptime('1/1/2022', '%m/%d/%Y').date()

datetime.date(2022, 1, 1)

In [51]:
dt.datetime.strptime('1 January, 2022', '%d %B, %Y').date()

datetime.date(2022, 1, 1)

In [52]:
dt.datetime.strptime('1-Jan-2022', '%d-%b-%Y').date()

datetime.date(2022, 1, 1)

In [39]:
dt.datetime.strptime('Saturday, January 1, 2022', '%A, %B %d, %Y').date()

datetime.date(2022, 1, 1)

In [61]:
dt_1 = dt.datetime.strptime('1/1/2022', '%m/%d/%Y')
dt_1.__str__()
str(dt_1)

'2022-01-01 00:00:00'

In [55]:
print(dt_1)

2022-01-01 00:00:00


Using `pandas.to_datetime()`

In [36]:
pd.to_datetime('1/1/2022', format='%m/%d/%Y')

Timestamp('2022-01-01 00:00:00')

In [37]:
pd.to_datetime('1 January, 2022', format='%d %B, %Y')

Timestamp('2022-01-01 00:00:00')

In [38]:
pd.to_datetime('1-Jan-2022', format='%d-%b-%Y')

Timestamp('2022-01-01 00:00:00')

In [40]:
pd.to_datetime('Saturday, January 1, 2022', format='%A, %B %d, %Y')

Timestamp('2022-01-01 00:00:00')

In [62]:
dt_2 = pd.to_datetime('1/1/2022', format='%m/%d/%Y')
print(dt_2)

2022-01-01 00:00:00


In [102]:
pd.to_datetime('1-Jan-2022')

Timestamp('2022-01-01 00:00:00')

In [103]:
pd.to_datetime('Saturday, January 1, 2022')

Timestamp('2022-01-01 00:00:00')

In [66]:
str(dt_2)
dt_2.__str__()

'2022-01-01 00:00:00'

In [67]:
dt_1 == dt_2

True

In [87]:
dt_1.date()

datetime.date(2022, 1, 1)

In [88]:
dt_2.date()

datetime.date(2022, 1, 1)

In [86]:
type(dt_1)
type(dt_2)

pandas._libs.tslibs.timestamps.Timestamp

In [75]:
isinstance(pd.DatetimeIndex, pd.Timestamp)

False

In [81]:
isinstance(dt_1, dt.datetime)

True

In [82]:
isinstance(dt_2, dt.datetime)

True

In [79]:
isinstance(dt_1, pd.Timestamp)

False

In [90]:
isinstance(dt_2, pd.Timestamp)

True

In [100]:
issubclass(pd.Timestamp, dt.datetime)

True

### Tranforming a pandas Series and DataFrame

In [121]:
# transforming a pandas series string dates to datetime using to_datetime 
pd.Series(['1-JAN-2022', '2-JAN-2022', '2-JAN-2022']).apply(pd.to_datetime)

0   2022-01-01
1   2022-01-02
2   2022-01-02
dtype: datetime64[ns]

In [122]:
# transforming a pandas dataframe string dates to datetime using to_datetime
pd.DataFrame({'Date': ['January 1, 2022', 'January 2, 2022', 'January 3, 2022']}).apply(pd.to_datetime)

Unnamed: 0,Date
0,2022-01-01
1,2022-01-02
2,2022-01-03
