# Creating a DatetimeIndex

In [50]:
import pandas as pd
import numpy as np
import datetime as dt
import pytz

In [51]:
pd.__version__

'1.3.2'

### Python's `Datetime`, pandas `Timestamp`, and `panda.to_datetime()`

In [66]:
dt1 = dt.datetime(2021,1,1)
dt2 = pd.Timestamp(2021,1,1)
dt3 = pd.to_datetime('2021-1-1')

In [89]:
print(dt1)
print(dt2)
print(dt3)

2021-01-01 00:00:00
2021-01-01 00:00:00
2021-01-01 00:00:00


In [68]:
print(type(dt1))
print(type(dt2))
print(type(dt3))

<class 'datetime.datetime'>
<class 'pandas._libs.tslibs.timestamps.Timestamp'>
<class 'pandas._libs.tslibs.timestamps.Timestamp'>


In [74]:
dt1 == dt2 == dt3

True

In [61]:
dt.datetime(2021,1,1) == pd.to_datetime('2021-1-1')

True

### `pandas.to_datetime()`

In [93]:
dates = ['2021-1-1', '2021-1-2']
pd_dates = pd.to_datetime(dates)
print(pd_dates)
print(type(pd_dates))

DatetimeIndex(['2021-01-01', '2021-01-02'], dtype='datetime64[ns]', freq=None)
<class 'pandas.core.indexes.datetimes.DatetimeIndex'>


In [94]:
print(pd_dates[0])
print(type(pd_dates[0]))

2021-01-01 00:00:00
<class 'pandas._libs.tslibs.timestamps.Timestamp'>


In [101]:
dates = ['2021-01-01', # date str format %Y-%m-%d
         '2/1/2021', # date str format %m/%d/%Y
         '03-01-2021', # date  str format %m-%d-%Y
         'April 1, 2021', # date  str format %B %d, %Y
         '20210501', # date str format %Y%m%d
          np.datetime64('2021-07-01'), # numpy datetime64
          datetime.datetime(2021, 8, 1), # python datetime
          pd.Timestamp(2021,9,1) # pandas Timestamp
          ]

In [104]:
parsed_dates = pd.to_datetime(
                 dates,
                 infer_datetime_format=True,
                 errors='coerce'
                 )

print(parsed_dates)

DatetimeIndex(['2021-01-01', '2021-02-01', '2021-03-01', '2021-04-01',
               '2021-05-01', '2021-07-01', '2021-08-01', '2021-09-01'],
              dtype='datetime64[ns]', freq=None)


In [105]:
pd.DatetimeIndex(dates) == parsed_dates

array([ True,  True,  True,  True,  True,  True,  True,  True])

In [15]:
parsed_dates = pd.DatetimeIndex(dates)
parsed_dates

DatetimeIndex(['2021-01-01', '2021-02-01', '2021-03-01', '2021-04-01',
               '2021-05-01', '2021-07-01', '2021-08-01'],
              dtype='datetime64[ns]', freq=None)

In [121]:
print(f'Name of Day : {parsed_dates.day_name()}')
print(f'Month : {parsed_dates.month}')
print(f'Year : {parsed_dates.year}')
print(f'Days in Month : {parsed_dates.days_in_month}')
print(f'Quarter {parsed_dates.quarter}')
print(f'Quarter Start : {parsed_dates.is_quarter_start}')
print(f'Leap Year : {parsed_dates.is_leap_year}')
print(f'Month Start : {parsed_dates.is_month_start}')
print(f'Month End : {parsed_dates.is_month_end}')
print(f'Year Start : {parsed_dates.is_year_start}')


Name of Day : Index(['Friday', 'Monday', 'Monday', 'Thursday', 'Saturday', 'Thursday',
       'Sunday', 'Wednesday'],
      dtype='object')
Month : Int64Index([1, 2, 3, 4, 5, 7, 8, 9], dtype='int64')
Year : Int64Index([2021, 2021, 2021, 2021, 2021, 2021, 2021, 2021], dtype='int64')
Days in Month : Int64Index([31, 28, 31, 30, 31, 31, 31, 30], dtype='int64')
Quarter Int64Index([1, 1, 1, 2, 2, 3, 3, 3], dtype='int64')
Quarter Start : [ True False False  True False  True False False]
Leap Year : [False False False False False False False False]
Month Start : [ True  True  True  True  True  True  True  True]
Month End : [False False False False False False False False]
Year Start : [ True False False False False False False False]


In [19]:
pd.date_range(start='2021-01-01', periods=3, freq='D')

DatetimeIndex(['2021-01-01', '2021-01-02', '2021-01-03'], dtype='datetime64[ns]', freq='D')

In [20]:
pd.date_range(start='2021-01-01',
               end='2021-01-03',
               freq='D')


DatetimeIndex(['2021-01-01', '2021-01-02', '2021-01-03'], dtype='datetime64[ns]', freq='D')

# Using Format Arguemtns to DateTime

In [27]:
dates = pd.date_range('1/1/2000', periods=8)
dates

DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-03', '2000-01-04',
               '2000-01-05', '2000-01-06', '2000-01-07', '2000-01-08'],
              dtype='datetime64[ns]', freq='D')

In [21]:
pd.to_datetime("2010/11/12", format="%Y/%m/%d")

Timestamp('2010-11-12 00:00:00')

In [37]:
dates.strftime('%Y-%m-%d')

Index(['2000-01-01', '2000-01-02', '2000-01-03', '2000-01-04', '2000-01-05',
       '2000-01-06', '2000-01-07', '2000-01-08'],
      dtype='object')

In [35]:
df = pd.DataFrame(np.random.randn(5, 3),
                    index=pd.date_range('1/1/2000', periods=5),
                    columns=list('ABC'))
df

Unnamed: 0,A,B,C
2000-01-01,-0.361949,-1.423005,1.330813
2000-01-02,1.018177,-0.742165,0.138619
2000-01-03,-0.953045,-0.273332,1.656213
2000-01-04,-0.426666,-2.463929,-0.342579
2000-01-05,0.766352,1.063507,0.93402


In [33]:
pd.to_datetime(df.index, format="%m/%Y/%d")

DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-03', '2000-01-04',
               '2000-01-05'],
              dtype='datetime64[ns]', freq='D')

In [46]:
df = pd.DataFrame(
     {"year": [2015, 2016], "month": [2, 3], "day": [4, 5], "hour": [2, 3]})        
df

Unnamed: 0,year,month,day,hour
0,2015,2,4,2
1,2016,3,5,3


In [47]:
pd.to_datetime(df)

0   2015-02-04 02:00:00
1   2016-03-05 03:00:00
dtype: datetime64[ns]