[Reference](https://medium.com/@ethan.duong1120/working-with-date-and-time-in-pandas-data-science-journey-fc6d599ea90a)

In [1]:
import pandas as pd
import numpy as np
from datetime import datetime

# 1. Convert string to datatime and handle missing values

In [2]:
df = pd.DataFrame({'date': ['2016-6-10 20:30:0',
                            '2016-7-1 19:45:30',
                            '2013-10-12 4:5:1'],
                   'value': [2, 3, 4]})
df

Unnamed: 0,date,value
0,2016-6-10 20:30:0,2
1,2016-7-1 19:45:30,3
2,2013-10-12 4:5:1,4


In [3]:
df['date'] = pd.to_datetime(df['date'], dayfirst = False)

In [4]:
df['date'] = pd.to_datetime(df['date'], format="%Y-%d-%m %H:%M:%S")

In [5]:
# Ignore it: (the incorrect or null value still be read in string format)
df['date'] = pd.to_datetime(df['date'], errors='ignore')

# Eliminate it (let s assume there are null values in date column:
df = df.dropna(subset=['date'])
# if you want to drop null value in multiple columns you can just add more
# column name in the subset (beside date)

# 2. Assemble datetime from multiple columns.

In [6]:
df = pd.DataFrame({'id': ['1', '2', '3', '4'],
                   'name': ['Ethan', 'Alison', 'Jolie', 'nick'],
                   'date': ['2022-01-01', '2022-01-02', '2022-01-03','2022-01-04' ],
                   'time': ['12:00:00', '13:00:00', '14:00:00', '15:00:00']})

In [7]:
df['datetime'] = pd.to_datetime(df['date'] + ' ' + df['time'])

# 3. Extract year month day from a date column.

In [8]:
df['year']= df['datetime'].dt.year
df['month']= df['datetime'].dt.month
df['day']= df['datetime'].dt.day

# 4. Select data between two dates

In [9]:
start_date = '2022-01-02'
end_date = '2022-01-04'

mask = (df['datetime'] >= start_date) & (df['datetime'] <= end_date)
result = df.loc[mask]

print(result)

  id    name        date      time            datetime  year  month  day
1  2  Alison  2022-01-02  13:00:00 2022-01-02 13:00:00  2022      1    2
2  3   Jolie  2022-01-03  14:00:00 2022-01-03 14:00:00  2022      1    3


# 5. Calculate the duration between two dates.

In [10]:
df['second_datetime'] = pd.to_datetime('2022-01-06 12:00:00')

In [11]:
df['duration'] = df['second_datetime'] - df['datetime']

In [12]:
df['duration_days'] = df['duration'].dt.days

# 6. Select data with a specific year and perform aggregations.

In [15]:
# #Change candy to numeric and change birth_daty to datetime
# df['candy'] = pd.to_numeric(df['candy'])
# df['birth_day'] = pd.to_datetime(df['birth_day'])

In [16]:
# #get the year from birth_day
# year_born = df['birth_day'].dt.year

# #get data for people born in 2022, then group by year born, sum up
# df_day = df[year_born == 2022].groupby(year_born).sum()
# df_day