# Data/Time Funcionality

In [1]:
import pandas as pd
import numpy as np

## Timestamp

In [2]:
pd.Timestamp('9/1/2019 10:05AM')

Timestamp('2019-09-01 10:05:00')

In [3]:
pd.Timestamp(2019, 12, 20, 0, 0)

Timestamp('2019-12-20 00:00:00')

In [4]:
pd.Timestamp(2019, 12, 20, 0, 0).isoweekday()

5

In [6]:
pd.Timestamp(2019, 12, 20, 5, 2, 23).second

23

## Period

In [8]:
# For span of time

# Perido for January 2016
pd.Period('1/2016')

Period('2016-01', 'M')

In [9]:
pd.Period('3/5/2016')

Period('2016-03-05', 'D')

In [11]:
# Arithmetic
pd.Period('1/2016') + 5

Period('2016-06', 'M')

In [12]:
# If we want two days less
pd.Period('3/5/2016') - 2

Period('2016-03-03', 'D')

## DatetimeIndex and PeriodIndex

In [13]:
# Datetime based indeces
t1 = pd.Series(list('abc'), [pd.Timestamp('2016-09-01'), pd.Timestamp('2016-09-02'),
                            pd.Timestamp('2016-09-03')])

t1

2016-09-01    a
2016-09-02    b
2016-09-03    c
dtype: object

In [14]:
type(t1.index)

pandas.core.indexes.datetimes.DatetimeIndex

In [15]:
# Period based index
t2 = pd.Series(list('def'), [pd.Period('2016-09'), pd.Period('2016-10'),
                            pd.Period('2016-11')])

t2

2016-09    d
2016-10    e
2016-11    f
Freq: M, dtype: object

In [16]:
type(t2.index)

pandas.core.indexes.period.PeriodIndex

## Converting to Datetime

In [17]:
d1 = ['2 June 2013', 'Aug 29, 2014', '2015-06-26', '7/12/16']

ts3 = pd.DataFrame(np.random.randint(10, 100, (4, 2)), index = d1, columns = list('ab'))

ts3

Unnamed: 0,a,b
2 June 2013,80,70
"Aug 29, 2014",98,36
2015-06-26,84,62
7/12/16,74,39


In [18]:
# to_datetime
ts3.index = pd.to_datetime(ts3.index)
ts3

Unnamed: 0,a,b
2013-06-02,80,70
2014-08-29,98,36
2015-06-26,84,62
2016-07-12,74,39


In [19]:
pd.to_datetime('4,7,12', dayfirst = True)

Timestamp('2012-07-04 00:00:00')

## Timedelta

In [20]:
# Timedeltas are differences in time

pd.Timestamp('9/3/2016') - pd.Timestamp('9/1/2016')

Timedelta('2 days 00:00:00')

In [22]:
pd.Timestamp('9/2/2016 8:10AM') + pd.Timedelta('12D 3H')

Timestamp('2016-09-14 11:10:00')

## Offset

In [23]:
# Similar to timedelta, but it follows specific calendar duration rules

pd.Timestamp('9/4/2016').weekday()

6

In [24]:
# add timestamp with a week ahead
pd.Timestamp('9/4/2016') + pd.offsets.Week()

Timestamp('2016-09-11 00:00:00')

In [25]:
pd.Timestamp('9/4/2016') + pd.offsets.MonthEnd()

Timestamp('2016-09-30 00:00:00')

## Working with Dates in a DataFrame

In [27]:
# 9 measurements, biweekly, every Sunday, starting in October 2016

dates = pd.date_range('10-01-2016', periods = 9, freq = '2W-SUN')
dates

DatetimeIndex(['2016-10-02', '2016-10-16', '2016-10-30', '2016-11-13',
               '2016-11-27', '2016-12-11', '2016-12-25', '2017-01-08',
               '2017-01-22'],
              dtype='datetime64[ns]', freq='2W-SUN')

In [28]:
# frequency = businses day
pd.date_range('10-01-2016', periods = 9, freq = 'B')

DatetimeIndex(['2016-10-03', '2016-10-04', '2016-10-05', '2016-10-06',
               '2016-10-07', '2016-10-10', '2016-10-11', '2016-10-12',
               '2016-10-13'],
              dtype='datetime64[ns]', freq='B')

In [29]:
# Quarterly, starting in June

pd.date_range('04-01-2016', periods = 12, freq = 'QS-JUN')

DatetimeIndex(['2016-06-01', '2016-09-01', '2016-12-01', '2017-03-01',
               '2017-06-01', '2017-09-01', '2017-12-01', '2018-03-01',
               '2018-06-01', '2018-09-01', '2018-12-01', '2019-03-01'],
              dtype='datetime64[ns]', freq='QS-JUN')

In [31]:
# Going back to the first example
dates = pd.date_range('10-01-2016', periods = 9, freq = '2W-SUN')

df = pd.DataFrame({'Count 1' : 100 + np.random.randint(-5, 10, 9).cumsum(),
                  'Count 2' : 100 + np.random.randint(-5, 10, 9)}, index =dates)
df

Unnamed: 0,Count 1,Count 2
2016-10-02,103,103
2016-10-16,101,106
2016-10-30,97,106
2016-11-13,93,103
2016-11-27,93,97
2016-12-11,96,96
2016-12-25,104,99
2017-01-08,103,105
2017-01-22,102,102


In [37]:
df.index.weekday

Int64Index([6, 6, 6, 6, 6, 6, 6, 6, 6], dtype='int64')

In [38]:
# diff() -> different between each date's value
df.diff()

Unnamed: 0,Count 1,Count 2
2016-10-02,,
2016-10-16,-2.0,3.0
2016-10-30,-4.0,0.0
2016-11-13,-4.0,-3.0
2016-11-27,0.0,-6.0
2016-12-11,3.0,-1.0
2016-12-25,8.0,3.0
2017-01-08,-1.0,6.0
2017-01-22,-1.0,-3.0


In [39]:
# resample
# mean count for each month in our DataFrame
df.resample('M').mean()

Unnamed: 0,Count 1,Count 2
2016-10-31,100.333333,105.0
2016-11-30,93.0,100.0
2016-12-31,100.0,97.5
2017-01-31,102.5,103.5


In [40]:
# datetime indexing and slicing
df['2017']

Unnamed: 0,Count 1,Count 2
2017-01-08,103,105
2017-01-22,102,102


In [41]:
df['2016-12']

Unnamed: 0,Count 1,Count 2
2016-12-11,96,96
2016-12-25,104,99


In [42]:
# Match on a range of dates
# 2016-12 and onwards
df['2016-12':]

Unnamed: 0,Count 1,Count 2
2016-12-11,96,96
2016-12-25,104,99
2017-01-08,103,105
2017-01-22,102,102
