# Time Series

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

In [5]:
rng = pd.date_range('2011/1/1', periods=72, freq='H')

In [4]:
ts = pd.Series(np.random.randn(len(rng)), index=rng)
ts.head()

2011-01-01 00:00:00    1.816846
2011-01-01 01:00:00   -0.998269
2011-01-01 02:00:00   -1.940087
2011-01-01 03:00:00    0.736687
2011-01-01 04:00:00    2.176403
Freq: H, dtype: float64

In [6]:
cvt=ts.asfreq('45min', method='pad')
cvt.head()

2011-01-01 00:00:00    1.816846
2011-01-01 00:45:00    1.816846
2011-01-01 01:30:00   -0.998269
2011-01-01 02:15:00   -1.940087
2011-01-01 03:00:00    0.736687
Freq: 45T, dtype: float64

In [7]:
ts.resample('D').mean()

2011-01-01   -0.162211
2011-01-02    0.115601
2011-01-03    0.219592
Freq: D, dtype: float64

## classes
----------------------------
|class | remarks | create|
|------|---------|-------|
|Timestamp | represent a single time stamp | to_datetime, timestamp|
|DatetimeIndex | index of Timestamp | to_datetime, date_range, DatetimeIndex |
|Period | represents a single time span | Period |
|PeriodIndex | index of Period | period_range, PeriodIndex|

In [10]:
from datetime import datetime
pd.Timestamp(datetime(2017,1,1))

Timestamp('2017-01-01 00:00:00')

In [11]:
pd.Period('2017-1')

Period('2017-01', 'M')

In [12]:
pd.Period('2017-1', freq='D')

Period('2017-01-01', 'D')

Timestamp -> DatetimeIndex; Period -> PeriodIndex

In [14]:
dates = [pd.Timestamp(x) for x in ('2012-05-01', '2012-05-02', '2012-05-03')]
ts = pd.Series(np.random.randn(3), dates)

In [17]:
periods = [pd.Period(x) for x in ('2012-01', '2012-02', '2012-03')]
ts = pd.Series(np.random.randn(3), periods)

In [18]:
ts

2012-01   -0.076480
2012-02    1.489065
2012-03    0.612038
Freq: M, dtype: float64

In [19]:
pd.to_datetime(pd.Series(['Jul 31, 2009', '2010-01-10', None]))

0   2009-07-31
1   2010-01-10
2          NaT
dtype: datetime64[ns]

In [21]:
pd.to_datetime(['2005/11/23', '2010.12.31'])

DatetimeIndex(['2005-11-23', '2010-12-31'], dtype='datetime64[ns]', freq=None)

In [24]:
df = pd.DataFrame({'year': [2015, 2016],
                   'month': [2, 3],
                   'day': [4,5],
                   'hour': [2, 3]})
pd.to_datetime(df[['year','month','day']])

0   2015-02-04
1   2016-03-05
dtype: datetime64[ns]

In [25]:
# epoch timestamps
pd.to_datetime([1349720105, 1349806505, 1349892905, 1349979305, 1350065705], unit='s')

DatetimeIndex(['2012-10-08 18:15:05', '2012-10-09 18:15:05',
               '2012-10-10 18:15:05', '2012-10-11 18:15:05',
               '2012-10-12 18:15:05'],
              dtype='datetime64[ns]', freq=None)

In [27]:
stamps = pd.date_range('2012-10-08 18:15:05', periods=4, freq='D')
stamps

DatetimeIndex(['2012-10-08 18:15:05', '2012-10-09 18:15:05',
               '2012-10-10 18:15:05', '2012-10-11 18:15:05'],
              dtype='datetime64[ns]', freq='D')

In [28]:
# business day as the frequency
index = pd.bdate_range('2012-1-1', periods=250)
index

DatetimeIndex(['2012-01-02', '2012-01-03', '2012-01-04', '2012-01-05',
               '2012-01-06', '2012-01-09', '2012-01-10', '2012-01-11',
               '2012-01-12', '2012-01-13',
               ...
               '2012-12-03', '2012-12-04', '2012-12-05', '2012-12-06',
               '2012-12-07', '2012-12-10', '2012-12-11', '2012-12-12',
               '2012-12-13', '2012-12-14'],
              dtype='datetime64[ns]', length=250, freq='B')

In [None]:
pd.date_range(start, end, freq='BM')
pd.date_range(start, end, freq='W')

# indexing

In [29]:
rng = pd.date_range('2011-01-31', '2011-12-30', freq='BM')

In [32]:
ts = pd.Series(np.random.randn(len(rng)), index=rng)
ts[::2].index

DatetimeIndex(['2011-01-31', '2011-03-31', '2011-05-31', '2011-07-29',
               '2011-09-30', '2011-11-30'],
              dtype='datetime64[ns]', freq='2BM')

In [33]:
ts['1/31/2011']

-0.53376668219686685

In [34]:
ts['2011']

2011-01-31   -0.533767
2011-02-28   -0.275813
2011-03-31    0.728291
2011-04-29   -1.519962
2011-05-31    1.068631
2011-06-30    0.118934
2011-07-29    1.962992
2011-08-31   -0.407807
2011-09-30    0.072374
2011-10-31    0.468694
2011-11-30   -0.794578
2011-12-30    0.410783
Freq: BM, dtype: float64

In [37]:
dft = pd.DataFrame(np.random.randn(1000, 1), columns=['A'], index=pd.date_range('20130101', periods=1000, freq='T'))
dft

Unnamed: 0,A
2013-01-01 00:00:00,-1.974428
2013-01-01 00:01:00,-1.148284
2013-01-01 00:02:00,-0.380720
2013-01-01 00:03:00,0.699184
2013-01-01 00:04:00,-0.977866
2013-01-01 00:05:00,0.397468
2013-01-01 00:06:00,1.159124
2013-01-01 00:07:00,0.942133
2013-01-01 00:08:00,-1.162891
2013-01-01 00:09:00,0.949470


In [38]:
dft2 = pd.DataFrame(np.random.randn(20, 1),
                    columns=['A'],
                    index=pd.MultiIndex.from_product([pd.date_range('20130101',
                                                                    periods=10,
                                                                    freq='12H'),
                                                     ['a', 'b']]))
dft2

Unnamed: 0,Unnamed: 1,A
2013-01-01 00:00:00,a,0.876886
2013-01-01 00:00:00,b,-0.564818
2013-01-01 12:00:00,a,0.997184
2013-01-01 12:00:00,b,2.986819
2013-01-02 00:00:00,a,-0.761274
2013-01-02 00:00:00,b,1.470748
2013-01-02 12:00:00,a,-0.418003
2013-01-02 12:00:00,b,0.412899
2013-01-03 00:00:00,a,-0.024298
2013-01-03 00:00:00,b,-0.663589


In [40]:
# offset

from pandas.tseries.offsets import MonthBegin
pd.Timestamp('2014-01-02')+MonthBegin(n=1)

Timestamp('2014-02-01 00:00:00')

In [41]:
pd.Timestamp('2014-01-02') + MonthBegin(n=4)

Timestamp('2014-05-01 00:00:00')

In [45]:
# holiday
from pandas.tseries.offsets import *
from pandas.tseries.holiday import Holiday, USMemorialDay, AbstractHolidayCalendar, nearest_workday, MO
class ExampleCalendar(AbstractHolidayCalendar):
        rules = [
                 USMemorialDay,
                 Holiday('July 4th', month=7, day=4, observance=nearest_workday),
                 Holiday('Columbus Day', month=10, day=1,
                 offset=DateOffset(weekday=MO(2))), #same as 2*Week(weekday=2)
                ]

cal = ExampleCalendar()
cal.holidays()

DatetimeIndex(['1970-05-25', '1970-07-03', '1970-10-12', '1971-05-31',
               '1971-07-05', '1971-10-11', '1972-05-29', '1972-07-04',
               '1972-10-09', '1973-05-28',
               ...
               '2027-10-11', '2028-05-29', '2028-07-04', '2028-10-09',
               '2029-05-28', '2029-07-04', '2029-10-08', '2030-05-27',
               '2030-07-04', '2030-10-14'],
              dtype='datetime64[ns]', length=183, freq=None)

In [46]:
# use CDay - calendar day
from pandas.tseries.offsets import CDay
pd.DatetimeIndex(start='7/1/2012', end='7/10/2012', freq=CDay(calendar=cal)).to_pydatetime()

array([datetime.datetime(2012, 7, 2, 0, 0),
       datetime.datetime(2012, 7, 3, 0, 0),
       datetime.datetime(2012, 7, 5, 0, 0),
       datetime.datetime(2012, 7, 6, 0, 0),
       datetime.datetime(2012, 7, 9, 0, 0),
       datetime.datetime(2012, 7, 10, 0, 0)], dtype=object)

In [47]:
offset = CustomBusinessDay(calendar=cal)
datetime(2012, 5, 25) + offset

Timestamp('2012-05-29 00:00:00')

# shiftting and lagging

In [52]:
ts = ts[:5]
ts

2011-01-31   -0.533767
2011-02-28   -0.275813
2011-03-31    0.728291
2011-04-29   -1.519962
2011-05-31    1.068631
Freq: BM, dtype: float64

In [51]:
ts.shift(1)

2011-01-31         NaN
2011-02-28   -0.533767
2011-03-31   -0.275813
2011-04-29    0.728291
2011-05-31   -1.519962
Freq: BM, dtype: float64

# frequency conversion

In [53]:
ts.asfreq(BDay(), method='pad')

2011-01-31   -0.533767
2011-02-01   -0.533767
2011-02-02   -0.533767
2011-02-03   -0.533767
2011-02-04   -0.533767
2011-02-07   -0.533767
2011-02-08   -0.533767
2011-02-09   -0.533767
2011-02-10   -0.533767
2011-02-11   -0.533767
2011-02-14   -0.533767
2011-02-15   -0.533767
2011-02-16   -0.533767
2011-02-17   -0.533767
2011-02-18   -0.533767
2011-02-21   -0.533767
2011-02-22   -0.533767
2011-02-23   -0.533767
2011-02-24   -0.533767
2011-02-25   -0.533767
2011-02-28   -0.275813
2011-03-01   -0.275813
2011-03-02   -0.275813
2011-03-03   -0.275813
2011-03-04   -0.275813
2011-03-07   -0.275813
2011-03-08   -0.275813
2011-03-09   -0.275813
2011-03-10   -0.275813
2011-03-11   -0.275813
                ...   
2011-04-20    0.728291
2011-04-21    0.728291
2011-04-22    0.728291
2011-04-25    0.728291
2011-04-26    0.728291
2011-04-27    0.728291
2011-04-28    0.728291
2011-04-29   -1.519962
2011-05-02   -1.519962
2011-05-03   -1.519962
2011-05-04   -1.519962
2011-05-05   -1.519962
2011-05-06 

In [54]:
ts.resample('5min').mean()

2011-01-31 00:00:00   -0.533767
2011-01-31 00:05:00         NaN
2011-01-31 00:10:00         NaN
2011-01-31 00:15:00         NaN
2011-01-31 00:20:00         NaN
2011-01-31 00:25:00         NaN
2011-01-31 00:30:00         NaN
2011-01-31 00:35:00         NaN
2011-01-31 00:40:00         NaN
2011-01-31 00:45:00         NaN
2011-01-31 00:50:00         NaN
2011-01-31 00:55:00         NaN
2011-01-31 01:00:00         NaN
2011-01-31 01:05:00         NaN
2011-01-31 01:10:00         NaN
2011-01-31 01:15:00         NaN
2011-01-31 01:20:00         NaN
2011-01-31 01:25:00         NaN
2011-01-31 01:30:00         NaN
2011-01-31 01:35:00         NaN
2011-01-31 01:40:00         NaN
2011-01-31 01:45:00         NaN
2011-01-31 01:50:00         NaN
2011-01-31 01:55:00         NaN
2011-01-31 02:00:00         NaN
2011-01-31 02:05:00         NaN
2011-01-31 02:10:00         NaN
2011-01-31 02:15:00         NaN
2011-01-31 02:20:00         NaN
2011-01-31 02:25:00         NaN
                         ...   
2011-05-

In [55]:
ts[:2].resample('250L').asfreq()
ts[:2].resample('250L').ffill()
ts[:2].resample('250L').ffill(limit=2)

2011-01-31 00:00:00.000   -0.533767
2011-01-31 00:00:00.250   -0.533767
2011-01-31 00:00:00.500   -0.533767
2011-01-31 00:00:00.750         NaN
2011-01-31 00:00:01.000         NaN
2011-01-31 00:00:01.250         NaN
2011-01-31 00:00:01.500         NaN
2011-01-31 00:00:01.750         NaN
2011-01-31 00:00:02.000         NaN
2011-01-31 00:00:02.250         NaN
2011-01-31 00:00:02.500         NaN
2011-01-31 00:00:02.750         NaN
2011-01-31 00:00:03.000         NaN
2011-01-31 00:00:03.250         NaN
2011-01-31 00:00:03.500         NaN
2011-01-31 00:00:03.750         NaN
2011-01-31 00:00:04.000         NaN
2011-01-31 00:00:04.250         NaN
2011-01-31 00:00:04.500         NaN
2011-01-31 00:00:04.750         NaN
2011-01-31 00:00:05.000         NaN
2011-01-31 00:00:05.250         NaN
2011-01-31 00:00:05.500         NaN
2011-01-31 00:00:05.750         NaN
2011-01-31 00:00:06.000         NaN
2011-01-31 00:00:06.250         NaN
2011-01-31 00:00:06.500         NaN
2011-01-31 00:00:06.750     

# timezone

In [57]:
rng = pd.date_range('3/6/2012 00:00', periods=15, freq='D')
rng.tz

In [58]:
rng_pytz = pd.date_range('3/6/2012 00:00', periods=10, freq='D', tz='Europe/London')
rng_pytz.tz
                         

<DstTzInfo 'Europe/London' LMT-1 day, 23:59:00 STD>

In [59]:
ts = pd.Series(np.random.randn(len(rng)), rng)
ts_utc = ts.tz_localize('UTC')
ts_utc

2012-03-06 00:00:00+00:00    0.197446
2012-03-07 00:00:00+00:00   -0.840098
2012-03-08 00:00:00+00:00   -0.701958
2012-03-09 00:00:00+00:00    1.108848
2012-03-10 00:00:00+00:00    0.230378
2012-03-11 00:00:00+00:00   -1.807682
2012-03-12 00:00:00+00:00    1.220024
2012-03-13 00:00:00+00:00    0.376381
2012-03-14 00:00:00+00:00   -0.409129
2012-03-15 00:00:00+00:00   -0.845655
2012-03-16 00:00:00+00:00   -1.176222
2012-03-17 00:00:00+00:00    0.489170
2012-03-18 00:00:00+00:00   -1.498749
2012-03-19 00:00:00+00:00   -2.167898
2012-03-20 00:00:00+00:00   -1.095112
Freq: D, dtype: float64

In [60]:
ts_utc.tz_convert('US/Eastern')

2012-03-05 19:00:00-05:00    0.197446
2012-03-06 19:00:00-05:00   -0.840098
2012-03-07 19:00:00-05:00   -0.701958
2012-03-08 19:00:00-05:00    1.108848
2012-03-09 19:00:00-05:00    0.230378
2012-03-10 19:00:00-05:00   -1.807682
2012-03-11 20:00:00-04:00    1.220024
2012-03-12 20:00:00-04:00    0.376381
2012-03-13 20:00:00-04:00   -0.409129
2012-03-14 20:00:00-04:00   -0.845655
2012-03-15 20:00:00-04:00   -1.176222
2012-03-16 20:00:00-04:00    0.489170
2012-03-17 20:00:00-04:00   -1.498749
2012-03-18 20:00:00-04:00   -2.167898
2012-03-19 20:00:00-04:00   -1.095112
Freq: D, dtype: float64