# Adjusting time series frequencies in pandas by re-indexing

In [1]:
import pandas as pd
import numpy as np

In [2]:
index1 = pd.date_range('1,1,2000',periods=6,freq='W')
index1

DatetimeIndex(['2000-01-02', '2000-01-09', '2000-01-16', '2000-01-23',
               '2000-01-30', '2000-02-06'],
              dtype='datetime64[ns]', freq='W-SUN')

In [3]:
weekly = pd.DataFrame(np.random.rand(6,3),index=index1)
weekly

Unnamed: 0,0,1,2
2000-01-02,0.462937,0.148111,0.904395
2000-01-09,0.53667,0.756014,0.539099
2000-01-16,0.933224,0.696488,0.337029
2000-01-23,0.85137,0.80745,0.432043
2000-01-30,0.108456,0.701116,0.833512
2000-02-06,0.115465,0.438584,0.519949


In [4]:
index2 = pd.date_range('1,1,2000',periods=6,freq='D')
index2

DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-03', '2000-01-04',
               '2000-01-05', '2000-01-06'],
              dtype='datetime64[ns]', freq='D')

In [5]:
daily = pd.DataFrame(np.random.rand(6,3),index=index2)
daily

Unnamed: 0,0,1,2
2000-01-01,0.930628,0.400878,0.120822
2000-01-02,0.382823,0.075878,0.918818
2000-01-03,0.084312,0.547121,0.210465
2000-01-04,0.704303,0.458266,0.168958
2000-01-05,0.570862,0.702283,0.209352
2000-01-06,0.351475,0.272788,0.191419


In [6]:
weekly.reindex(daily.index)

Unnamed: 0,0,1,2
2000-01-01,,,
2000-01-02,0.462937,0.148111,0.904395
2000-01-03,,,
2000-01-04,,,
2000-01-05,,,
2000-01-06,,,


In [7]:
#Forward fill the NaN values created by reindexing
weekly.reindex(daily.index, method='ffill')

Unnamed: 0,0,1,2
2000-01-01,,,
2000-01-02,0.462937,0.148111,0.904395
2000-01-03,0.462937,0.148111,0.904395
2000-01-04,0.462937,0.148111,0.904395
2000-01-05,0.462937,0.148111,0.904395
2000-01-06,0.462937,0.148111,0.904395


In [8]:
weekly

Unnamed: 0,0,1,2
2000-01-02,0.462937,0.148111,0.904395
2000-01-09,0.53667,0.756014,0.539099
2000-01-16,0.933224,0.696488,0.337029
2000-01-23,0.85137,0.80745,0.432043
2000-01-30,0.108456,0.701116,0.833512
2000-02-06,0.115465,0.438584,0.519949


In [9]:
weekly.cumsum()
#Cumulative sum

Unnamed: 0,0,1,2
2000-01-02,0.462937,0.148111,0.904395
2000-01-09,0.999607,0.904125,1.443494
2000-01-16,1.932831,1.600614,1.780523
2000-01-23,2.784201,2.408064,2.212566
2000-01-30,2.892657,3.10918,3.046079
2000-02-06,3.008123,3.547764,3.566028


In [10]:
#Percentage change
returns = weekly.pct_change()
returns

Unnamed: 0,0,1,2
2000-01-02,,,
2000-01-09,0.159273,4.104373,-0.403912
2000-01-16,0.738916,-0.078736,-0.374829
2000-01-23,-0.087711,0.159316,0.281918
2000-01-30,-0.87261,-0.131692,0.929232
2000-02-06,0.064624,-0.374449,-0.376194


In [11]:
#Cumulative product of returns
z = (1+returns).cumprod()
z.ix[0] = 1
z

Unnamed: 0,0,1,2
2000-01-02,1.0,1.0,1.0
2000-01-09,1.159273,5.104373,0.596088
2000-01-16,2.015879,4.702475,0.372657
2000-01-23,1.839063,5.451656,0.477715
2000-01-30,0.234279,4.733719,0.921624
2000-02-06,0.249419,2.961184,0.574914


In [12]:
#BM is business month end frequency
z.resample('BM').last().pct_change()

Unnamed: 0,0,1,2
2000-01-31,,,
2000-02-29,0.064624,-0.374449,-0.376194


In [13]:
rng = pd.date_range('2012-06-01 09:30', '2012-06-01 15:59', freq='T')

In [14]:
#Daterange for the open and close times for the New York Stock Exchange. One day with minute intervals.
rng

DatetimeIndex(['2012-06-01 09:30:00', '2012-06-01 09:31:00',
               '2012-06-01 09:32:00', '2012-06-01 09:33:00',
               '2012-06-01 09:34:00', '2012-06-01 09:35:00',
               '2012-06-01 09:36:00', '2012-06-01 09:37:00',
               '2012-06-01 09:38:00', '2012-06-01 09:39:00',
               ...
               '2012-06-01 15:50:00', '2012-06-01 15:51:00',
               '2012-06-01 15:52:00', '2012-06-01 15:53:00',
               '2012-06-01 15:54:00', '2012-06-01 15:55:00',
               '2012-06-01 15:56:00', '2012-06-01 15:57:00',
               '2012-06-01 15:58:00', '2012-06-01 15:59:00'],
              dtype='datetime64[ns]', length=390, freq='T')

In [15]:
#Add 3 more business days worth of one minute intervals
rng = rng.append([rng + pd.offsets.BDay(i) for i in range(1,4)])

In [16]:
rng

DatetimeIndex(['2012-06-01 09:30:00', '2012-06-01 09:31:00',
               '2012-06-01 09:32:00', '2012-06-01 09:33:00',
               '2012-06-01 09:34:00', '2012-06-01 09:35:00',
               '2012-06-01 09:36:00', '2012-06-01 09:37:00',
               '2012-06-01 09:38:00', '2012-06-01 09:39:00',
               ...
               '2012-06-06 15:50:00', '2012-06-06 15:51:00',
               '2012-06-06 15:52:00', '2012-06-06 15:53:00',
               '2012-06-06 15:54:00', '2012-06-06 15:55:00',
               '2012-06-06 15:56:00', '2012-06-06 15:57:00',
               '2012-06-06 15:58:00', '2012-06-06 15:59:00'],
              dtype='datetime64[ns]', length=1560, freq=None)

In [17]:
wizardry = pd.Series(range(len(rng)), index=rng)
wizardry.head()

2012-06-01 09:30:00    0
2012-06-01 09:31:00    1
2012-06-01 09:32:00    2
2012-06-01 09:33:00    3
2012-06-01 09:34:00    4
dtype: int32

In [18]:
from datetime import time

In [19]:
time(10,0).hour == pd.datetime(17,5,22,10).hour

True

In [20]:
#Select values at 10 am
wizardry.at_time(time(10,0))

2012-06-01 10:00:00      30
2012-06-04 10:00:00     420
2012-06-05 10:00:00     810
2012-06-06 10:00:00    1200
dtype: int32

In [21]:
wizardry.between_time(time(10,0), time(10,1))

2012-06-01 10:00:00      30
2012-06-01 10:01:00      31
2012-06-04 10:00:00     420
2012-06-04 10:01:00     421
2012-06-05 10:00:00     810
2012-06-05 10:01:00     811
2012-06-06 10:00:00    1200
2012-06-06 10:01:00    1201
dtype: int32

In [22]:
#Obtain a random sample of len(indexer)-700
indexer = np.sort(np.random.permutation(len(wizardry))[700:])

In [23]:
wizardry[indexer].head()

2012-06-01 09:31:00    1
2012-06-01 09:32:00    2
2012-06-01 09:33:00    3
2012-06-01 09:35:00    5
2012-06-01 09:38:00    8
dtype: int32

In [24]:
#Change some values to nan for the purpose of demonstrating the asof method
wizardry[indexer]= np.nan

In [25]:
#B is business day frequency
selection = pd.date_range('2012-06-02', periods=4, freq='B')
selection

DatetimeIndex(['2012-06-04', '2012-06-05', '2012-06-06', '2012-06-07'], dtype='datetime64[ns]', freq='B')

In [26]:
#Passing an array of timestamps to the asof method will return an array of the last valid values at or before each timestamp.
wizardry.asof(selection)

2012-06-04     389.0
2012-06-05     778.0
2012-06-06    1169.0
2012-06-07    1557.0
Freq: B, dtype: float64