(c) 2016 - present. Enplus Advisors, Inc.

# Time Series in `pandas`

In [1]:
import pandas as pd
import numpy as np

np.random.seed(1)

def random_series(dts):
    res = pd.Series(np.random.randn(len(dts)), index=dts)
    return res

## Time Series

No new classes for Series and DataFrame. 

A "timeseries" is a `Series` with a time index of some type.

In [2]:
dt = pd.Timestamp('2000-01-01 00:00')
dt

Timestamp('2000-01-01 00:00:00')

In [3]:
s1 = pd.Series(101.7, index=[dt])
s1

2000-01-01    101.7
dtype: float64

## Time Series Utility Functions

In [4]:
# Create a year's worth of business dates
dts = pd.date_range('2000-01-01', '2001-12-31', freq='B')
dts[:5]

DatetimeIndex(['2000-01-03', '2000-01-04', '2000-01-05', '2000-01-06',
               '2000-01-07'],
              dtype='datetime64[ns]', freq='B')

In [5]:
ts = pd.Series(np.random.randn(len(dts)), index=dts)
ts[:5]

2000-01-03    1.624345
2000-01-04   -0.611756
2000-01-05   -0.528172
2000-01-06   -1.072969
2000-01-07    0.865408
Freq: B, dtype: float64

## Selecting from a Time Series

In [6]:
ts['2000-03-20']

0.5866231911821976

In [7]:
ts['2000-03-20':'2000-03-24']

2000-03-20    0.586623
2000-03-21    0.838983
2000-03-22    0.931102
2000-03-23    0.285587
2000-03-24    0.885141
Freq: B, dtype: float64

In [8]:
ts['2000-02'][:7]

2000-02-01    1.144724
2000-02-02    0.901591
2000-02-03    0.502494
2000-02-04    0.900856
2000-02-07   -0.683728
2000-02-08   -0.122890
2000-02-09   -0.935769
Freq: B, dtype: float64

In [9]:
ts['2000'][:5]

2000-01-03    1.624345
2000-01-04   -0.611756
2000-01-05   -0.528172
2000-01-06   -1.072969
2000-01-07    0.865408
Freq: B, dtype: float64

These only work on `Serie`s, not `DataFrame`s

## Lagging/Shifting

Shift the values

In [10]:
ts2k = ts['2000-01'].copy()
ts2k.shift(1).iloc[[0, 1, 2, -2, -1]]

2000-01-03         NaN
2000-01-04    1.624345
2000-01-05   -0.611756
2000-01-28    0.042214
2000-01-31    0.582815
dtype: float64

Shift the index.

In [11]:
ts2k.tshift(1).iloc[[0, 1, 2, -2, -1]]

2000-01-04    1.624345
2000-01-05   -0.611756
2000-01-06   -0.528172
2000-01-31    0.582815
2000-02-01   -1.100619
dtype: float64

## Changing Frequencies: Resampling

Resampling is similar to grouping, expect with time and notions of
forwards and backwards.

In [12]:
dts1 = pd.date_range('2000-01-01', '2000-03-31', freq='D')
ts3 = pd.Series(np.random.randn(len(dts1)), index=dts1)

In [13]:
grp = ts3.resample('M')

## Resampling is like grouping

In [14]:
grp.mean()

2000-01-31   -0.040936
2000-02-29    0.364342
2000-03-31    0.032829
Freq: M, dtype: float64

In [15]:
grp.agg(['mean', 'std'])

Unnamed: 0,mean,std
2000-01-31,-0.040936,1.093864
2000-02-29,0.364342,1.098044
2000-03-31,0.032829,1.040346


## Filling Data Forward

In [16]:
dts2 = pd.date_range('2000-01-01', '2000-01-10', freq='D')
ts4 = pd.Series(np.random.randn(len(dts2)), index=dts2)
ts4[[3, 4, 5, 7]] = np.nan
ts4

2000-01-01    0.785227
2000-01-02   -0.664868
2000-01-03   -1.945047
2000-01-04         NaN
2000-01-05         NaN
2000-01-06         NaN
2000-01-07    0.816044
2000-01-08         NaN
2000-01-09    0.393109
2000-01-10   -1.823920
Freq: D, dtype: float64

## Filling Data Forward

In [17]:
ts4.ffill()

2000-01-01    0.785227
2000-01-02   -0.664868
2000-01-03   -1.945047
2000-01-04   -1.945047
2000-01-05   -1.945047
2000-01-06   -1.945047
2000-01-07    0.816044
2000-01-08    0.816044
2000-01-09    0.393109
2000-01-10   -1.823920
Freq: D, dtype: float64

## Filling Data Backwards

In [18]:
ts4.bfill()

2000-01-01    0.785227
2000-01-02   -0.664868
2000-01-03   -1.945047
2000-01-04    0.816044
2000-01-05    0.816044
2000-01-06    0.816044
2000-01-07    0.816044
2000-01-08    0.393109
2000-01-09    0.393109
2000-01-10   -1.823920
Freq: D, dtype: float64

## Filling with limits

In [19]:
dts2 = pd.date_range('2000-01-01', '2000-01-10', freq='D')
ts4 = pd.Series(np.random.randn(len(dts2)), index=dts2)
ts4[5:] = np.nan
ts4

2000-01-01    1.167075
2000-01-02   -0.039669
2000-01-03    0.885826
2000-01-04    0.189862
2000-01-05    0.798064
2000-01-06         NaN
2000-01-07         NaN
2000-01-08         NaN
2000-01-09         NaN
2000-01-10         NaN
Freq: D, dtype: float64

## Filling with limits

In [20]:
ts4.ffill(limit=2)

2000-01-01    1.167075
2000-01-02   -0.039669
2000-01-03    0.885826
2000-01-04    0.189862
2000-01-05    0.798064
2000-01-06    0.798064
2000-01-07    0.798064
2000-01-08         NaN
2000-01-09         NaN
2000-01-10         NaN
Freq: D, dtype: float64

## Aligning Dates

In [21]:
dts_m = pd.date_range('2000-01-01', '2000-12-31', freq='MS')
t_bill = random_series(dts_m)

dts_d = pd.date_range('2000-01-01', '2000-12-31', freq='D')
s_days = random_series(dts_d)

In [22]:
t_bill_daily = t_bill.reindex(s_days.index, method='ffill')
macro_df = pd.concat([s_days, t_bill_daily], axis=1, keys=['SP500', 'TBILL'])
macro_df.resample('M').mean().head(6)

Unnamed: 0,SP500,TBILL
2000-01-31,-0.164126,-0.039178
2000-02-29,0.172625,-0.228754
2000-03-31,-0.008647,-0.179612
2000-04-30,-0.167046,0.501725
2000-05-31,-0.228726,-0.593344
2000-06-30,0.123855,0.510308


## Rolling Calculations

In [23]:
macro_df.SP500.rolling(10).std().head()

2000-01-01   NaN
2000-01-02   NaN
2000-01-03   NaN
2000-01-04   NaN
2000-01-05   NaN
Freq: D, Name: SP500, dtype: float64