# Pandas Time Series Analysis Part 4: to_datetime
## Inspired by Dhruv Patel's Code Basics YouTube Channel¶

In [1]:
from IPython.display import YouTubeVideo

# Youtube
YouTubeVideo('igWjq3jtLYI')

In [3]:
import pandas as pd 

In [7]:
## Converting dates with different format to one uniform date format
dates = ['2017-01-05', 'Jan 5, 2017', '01/05/2017', '2017.01.05', '2017/01/05','20170105']
pd.to_datetime(dates)

DatetimeIndex(['2017-01-05', '2017-01-05', '2017-01-05', '2017-01-05',
               '2017-01-05', '2017-01-05'],
              dtype='datetime64[ns]', freq=None)

In [8]:
# to_datetime method can handle time as well
dt = ['2017-01-05 2:30:00 PM', 'Jan 5, 2017 14:30:00', '01/05/2016', '2017.01.05', '2017/01/05','20170105']
pd.to_datetime(dt)

DatetimeIndex(['2017-01-05 14:30:00', '2017-01-05 14:30:00',
               '2016-01-05 00:00:00', '2017-01-05 00:00:00',
               '2017-01-05 00:00:00', '2017-01-05 00:00:00'],
              dtype='datetime64[ns]', freq=None)

### In US the date format is : MM/DD/YYYY
### In Europe the date format is : DD/MM/YYYY 

In [11]:
# US Format 
pd.to_datetime('5/1/2017')

Timestamp('2017-05-01 00:00:00')

In [12]:
# European format 
pd.to_datetime('5/1/2017', dayfirst=True)

Timestamp('2017-01-05 00:00:00')

In [13]:
# Using own custom format
pd.to_datetime('5*1*2017', dayfirst=True, format = '%d*%m*%Y') 

Timestamp('2017-01-05 00:00:00')

In [14]:
# If a garbage value is passed to the to_datetime method, then an exception is thrown
dates = ['2017-01-05', 'Jan 5, 2017', '01/05/2017', '2017.01.05', '2017/01/05','20170105','abc']
pd.to_datetime(dates)

ValueError: ('Unknown string format:', 'abc')

In [15]:
# If a garbage value is passed to the to_datetime method, pass the argument error = 'ignore' to ignore the error and 
# it will not perform the datetime conversion
dates = ['2017-01-05', 'Jan 5, 2017', '01/05/2017', '2017.01.05', '2017/01/05','20170105','abc']
pd.to_datetime(dates, errors='ignore')

Index(['2017-01-05', 'Jan 5, 2017', '01/05/2017', '2017.01.05', '2017/01/05',
       '20170105', 'abc'],
      dtype='object')

In [16]:
# pass errors = 'coerce' to stamp that value as NaT (meaning Not a Time)
dates = ['2017-01-05', 'Jan 5, 2017', '01/05/2017', '2017.01.05', '2017/01/05','20170105','abc']
pd.to_datetime(dates, errors='coerce')

DatetimeIndex(['2017-01-05', '2017-01-05', '2017-01-05', '2017-01-05',
               '2017-01-05', '2017-01-05',        'NaT'],
              dtype='datetime64[ns]', freq=None)

In [17]:
# If datetime is comes in Unix epoch format that is number of seconds that has elapsed since 01/01/1970
ts = 1577642200
pd.to_datetime(ts, unit = 's') # The output will be in GMT format

Timestamp('2019-12-29 17:56:40')

In [19]:
# Converting into a datetime index
ts = 1577642200
dt = pd.to_datetime([ts], unit = 's') # The output will be in GMT format
dt 

DatetimeIndex(['2019-12-29 17:56:40'], dtype='datetime64[ns]', freq=None)

In [20]:
# Converting date time to epoch
dt.view('int64') 

array([1577642200000000000], dtype=int64)