# Working with dates and time in Pandas

Source article for this example: https://towardsdatascience.com/basic-time-series-manipulation-with-pandas-4432afee64ea

## Creating dummy data

In [1]:
import pandas as pd
from datetime import datetime
import numpy as np

# Create some dummy data to represent a problem
date_rng = pd.date_range(start='1/1/2017', end='5/15/2017', freq='3H')
df = pd.DataFrame(date_rng, columns=['date'])
df['Column 1'] = np.random.randint(0,100,size=(len(date_rng)))
df['Column 2'] = np.random.randint(0,100,size=(len(date_rng)))
df['Column 3'] = np.random.randint(0,100,size=(len(date_rng)))
df['Column 4'] = np.random.randint(0,100,size=(len(date_rng)))
                               
# Set datetimes as index instead of the default '0, 1, 2' etc.  
df['datetime'] = pd.to_datetime(df['date'])
df = df.set_index('datetime')
df.drop(['date'], axis=1, inplace=True)
                               
df

Unnamed: 0_level_0,Column 1,Column 2,Column 3,Column 4
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2017-01-01 00:00:00,18,69,6,45
2017-01-01 03:00:00,37,43,78,42
2017-01-01 06:00:00,12,84,96,38
2017-01-01 09:00:00,28,55,20,24
2017-01-01 12:00:00,10,13,69,31
...,...,...,...,...
2017-05-14 12:00:00,71,30,71,68
2017-05-14 15:00:00,69,3,44,91
2017-05-14 18:00:00,95,83,59,17
2017-05-14 21:00:00,5,29,85,89


## Extract fast statistics

In [2]:
df.describe()

Unnamed: 0,Column 1,Column 2,Column 3,Column 4
count,1073.0,1073.0,1073.0,1073.0
mean,51.251631,49.361603,50.230196,49.688723
std,28.954965,28.052684,28.68941,28.141434
min,0.0,0.0,0.0,0.0
25%,28.0,25.0,25.0,27.0
50%,53.0,49.0,50.0,50.0
75%,76.0,73.0,75.0,73.0
max,99.0,99.0,99.0,99.0


## Extracting data for specific day of the month

In [69]:
df[df.index.day == 2]

Unnamed: 0_level_0,Column 1,Column 2,Column 3,Column 4
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2017-01-02 00:00:00,95,96,71,49
2017-01-02 03:00:00,81,86,53,75
2017-01-02 06:00:00,71,14,32,38
2017-01-02 09:00:00,41,19,62,64
2017-01-02 12:00:00,65,14,7,52
2017-01-02 15:00:00,48,30,36,49
2017-01-02 18:00:00,11,44,76,60
2017-01-02 21:00:00,5,46,15,51
2017-02-02 00:00:00,44,22,39,74
2017-02-02 03:00:00,29,94,5,18


## Extracting data for specific date

In [71]:
df['2017-01-04']

Unnamed: 0_level_0,Column 1,Column 2,Column 3,Column 4
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2017-01-04 00:00:00,62,13,97,89
2017-01-04 03:00:00,20,0,16,2
2017-01-04 06:00:00,78,40,7,10
2017-01-04 09:00:00,86,19,76,83
2017-01-04 12:00:00,52,66,98,62
2017-01-04 15:00:00,82,47,30,12
2017-01-04 18:00:00,38,42,75,9
2017-01-04 21:00:00,77,63,91,35


## Extracting data between two dates

In [72]:
df['2017-03-04':'2017-03-06']

Unnamed: 0_level_0,Column 1,Column 2,Column 3,Column 4
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2017-03-04 00:00:00,77,74,71,13
2017-03-04 03:00:00,19,80,50,0
2017-03-04 06:00:00,38,77,32,77
2017-03-04 09:00:00,13,24,70,41
2017-03-04 12:00:00,79,0,26,71
2017-03-04 15:00:00,39,25,4,19
2017-03-04 18:00:00,60,46,27,81
2017-03-04 21:00:00,80,75,25,93
2017-03-05 00:00:00,19,84,96,66
2017-03-05 03:00:00,54,86,99,7


## Compute statistics over time interval

There are many built-in arguments for the `resample` method, some basic ones include:  
* `'H'` for hours
* `'D'` for days
* `'B'` for business days (weekdays)
* `'M'` for months
* `'Y'` for years

In [74]:
df.resample('M').mean()

Unnamed: 0_level_0,Column 1,Column 2,Column 3,Column 4
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2017-01-31,50.471774,49.907258,50.721774,46.310484
2017-02-28,50.991071,49.852679,48.607143,50.040179
2017-03-31,46.491935,51.91129,47.225806,48.125
2017-04-30,50.891667,48.695833,51.525,52.175
2017-05-31,50.557522,45.699115,44.681416,49.548673


In [76]:
df.resample('5D').max()

Unnamed: 0_level_0,Column 1,Column 2,Column 3,Column 4
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2017-01-01,96,96,98,99
2017-01-06,97,99,99,97
2017-01-11,99,97,95,93
2017-01-16,97,96,99,97
2017-01-21,91,99,98,98
2017-01-26,95,98,98,95
2017-01-31,97,99,99,94
2017-02-05,97,99,84,99
2017-02-10,97,98,98,99
2017-02-15,98,92,94,96
