Most common problem in data analysis is lack of uniformity in the structure of input data

In [8]:
# We want to create dates with different format of January 5th, 2017
# The datetime helps us convert these dates to single type
# Europe date format: dd/mm/yyyy     US date format: mm/dd/yyyy    datetime format: yyyy/dd/mm

import pandas as pd

dates =['2017-01-05 2:30:00 PM','Jan 5, 2017 14:30:00','01/05/2017','2017.01.05','2017/01/05','20170105','01-05-2017']

pd.to_datetime(dates)

DatetimeIndex(['2017-01-05 14:30:00', '2017-01-05 14:30:00',
               '2017-01-05 00:00:00', '2017-01-05 00:00:00',
               '2017-01-05 00:00:00', '2017-01-05 00:00:00',
               '2017-01-05 00:00:00'],
              dtype='datetime64[ns]', freq=None)

In [9]:
# If we parse an invalid date type, an error will be raised.
# We can make it a string type, parse an argument in an 'errors' argument and it will return as 'NaT' (Not a timestamp)

pd.to_datetime(dates, errors='coerce')

DatetimeIndex(['2017-01-05 14:30:00', '2017-01-05 14:30:00',
               '2017-01-05 00:00:00', '2017-01-05 00:00:00',
               '2017-01-05 00:00:00', '2017-01-05 00:00:00',
               '2017-01-05 00:00:00'],
              dtype='datetime64[ns]', freq=None)

In [10]:
# We can parse a 'dayfirst' function to make dd the first in the date

pd.to_datetime('5/1/2017', dayfirst=True)

Timestamp('2017-01-05 00:00:00')

In [8]:
# We can use the $ string as a dilimeter and parse the string format.

pd.to_datetime('5$1$2017', format='%d$%m$%Y')

Timestamp('2017-01-05 00:00:00')

Epoch (Unix time) is number of seconds that have passed since January 1, 1970  00:00:00 UTC

In [12]:
# We can convert a unix time number into a timestamp using the datetime.

t = 1501356749
pd.to_datetime(t, unit='s')

Timestamp('2017-07-29 19:32:29')

In [13]:
dt = pd.to_datetime([t], unit='s')
dt

DatetimeIndex(['2017-07-29 19:32:29'], dtype='datetime64[ns]', freq=None)

In [14]:
# To convert back to unix time

dt.view('int64')

array([1501356749000000000], dtype=int64)