# TimeSeries


## Resampling


In [1]:
import numpy as np
import pandas as pd
np.random.seed(12345)
import matplotlib.pyplot as plt
plt.rc('figure', figsize=(5, 4))
PREVIOUS_MAX_ROWS = pd.options.display.max_rows
pd.options.display.max_rows = 20
np.set_printoptions(precision=4, suppress=True)

## Create TS monthly and resample to quarters

In [2]:
rng = pd.period_range('2021-01-01', '2023-12-31', freq='M')

In [9]:
ts = pd.Series(np.random.randn(len(rng)), index=rng)
ts

2021-01   -0.589488
2021-02    1.581700
2021-03   -0.528735
2021-04    0.457002
2021-05    0.929969
             ...   
2023-08    0.498791
2023-09   -0.823991
2023-10    1.320566
2023-11    0.507965
2023-12   -0.653438
Freq: M, Length: 36, dtype: float64

In [31]:
ts.asfreq('A-JAN', how='start')
ts.asfreq('Q-JAN', how='start')

2021Q4   -0.589488
2022Q1    1.581700
2022Q1   -0.528735
2022Q1    0.457002
2022Q2    0.929969
            ...   
2024Q3    0.498791
2024Q3   -0.823991
2024Q3    1.320566
2024Q4    0.507965
2024Q4   -0.653438
Freq: Q-JAN, Length: 36, dtype: float64

## Convert to Periods (Quarters)

In [52]:
myPeriods = 7
rng = pd.date_range('2021-01-01', periods=myPeriods, freq='M')
ts = pd.Series(np.random.randn(myPeriods), index=rng)
print(ts)
pts = ts.to_period("Q")
pts

2021-01-31    1.270025
2021-02-28   -0.974378
2021-03-31   -0.634709
2021-04-30   -0.395701
2021-05-31   -0.289436
2021-06-30   -0.734297
2021-07-31   -0.728505
Freq: M, dtype: float64


2021Q1    1.270025
2021Q1   -0.974378
2021Q1   -0.634709
2021Q2   -0.395701
2021Q2   -0.289436
2021Q2   -0.734297
2021Q3   -0.728505
Freq: Q-DEC, dtype: float64

In [56]:
pts
pts.to_timestamp(how='end')

2021-03-31 23:59:59.999999999    1.270025
2021-03-31 23:59:59.999999999   -0.974378
2021-03-31 23:59:59.999999999   -0.634709
2021-06-30 23:59:59.999999999   -0.395701
2021-06-30 23:59:59.999999999   -0.289436
2021-06-30 23:59:59.999999999   -0.734297
2021-09-30 23:59:59.999999999   -0.728505
dtype: float64

## Macrodata Example


In [72]:
data = pd.read_csv('NeutralData/macrodata.csv')
print(data.head(5).year, data.head(5).quarter)
#print(data.year, data.quarter)

0    1959.0
1    1959.0
2    1959.0
3    1959.0
4    1960.0
Name: year, dtype: float64 0    1.0
1    2.0
2    3.0
3    4.0
4    1.0
Name: quarter, dtype: float64


In [73]:
index = pd.PeriodIndex(year=data.year, quarter=data.quarter,
                       freq='Q-DEC')
index
#data.index = index
#data.infl

PeriodIndex(['1959Q1', '1959Q2', '1959Q3', '1959Q4', '1960Q1', '1960Q2',
             '1960Q3', '1960Q4', '1961Q1', '1961Q2',
             ...
             '2007Q2', '2007Q3', '2007Q4', '2008Q1', '2008Q2', '2008Q3',
             '2008Q4', '2009Q1', '2009Q2', '2009Q3'],
            dtype='period[Q-DEC]', length=203, freq='Q-DEC')

## Resampling

In [74]:
rng = pd.date_range('2000-01-01', periods=100, freq='D')
ts = pd.Series(np.random.randn(len(rng)), index=rng)
ts
ts.resample('M').mean()
ts.resample('M', kind='period').mean()

2000-01    0.012930
2000-02   -0.187559
2000-03   -0.295292
2000-04   -0.355802
Freq: M, dtype: float64

In [75]:
rng = pd.date_range('2000-01-01', periods=12, freq='T')
ts = pd.Series(np.arange(12), index=rng)
ts

2000-01-01 00:00:00     0
2000-01-01 00:01:00     1
2000-01-01 00:02:00     2
2000-01-01 00:03:00     3
2000-01-01 00:04:00     4
2000-01-01 00:05:00     5
2000-01-01 00:06:00     6
2000-01-01 00:07:00     7
2000-01-01 00:08:00     8
2000-01-01 00:09:00     9
2000-01-01 00:10:00    10
2000-01-01 00:11:00    11
Freq: T, dtype: int64

In [76]:
ts.resample('5min', closed='right').sum()

1999-12-31 23:55:00     0
2000-01-01 00:00:00    15
2000-01-01 00:05:00    40
2000-01-01 00:10:00    11
Freq: 5T, dtype: int64

In [80]:
ts.resample('5min').ohlc()

Unnamed: 0,open,high,low,close
2000-01-01 00:00:00,0,4,0,4
2000-01-01 00:05:00,5,9,5,9
2000-01-01 00:10:00,10,11,10,11


## Upsampling + Interpolation


In [81]:
frame = pd.DataFrame(np.random.randn(2, 4),
                     index=pd.date_range('1/1/2000', periods=2,
                                         freq='W-WED'),
                     columns=['Colorado', 'Texas', 'New York', 'Ohio'])
frame

Unnamed: 0,Colorado,Texas,New York,Ohio
2000-01-05,-1.230179,0.571078,0.060061,-0.225524
2000-01-12,1.349726,1.3503,-0.386653,0.86599


In [82]:
df_daily = frame.resample('D').asfreq()
df_daily

Unnamed: 0,Colorado,Texas,New York,Ohio
2000-01-05,-1.230179,0.571078,0.060061,-0.225524
2000-01-06,,,,
2000-01-07,,,,
2000-01-08,,,,
2000-01-09,,,,
2000-01-10,,,,
2000-01-11,,,,
2000-01-12,1.349726,1.3503,-0.386653,0.86599


In [83]:
frame.resample('D').ffill()

Unnamed: 0,Colorado,Texas,New York,Ohio
2000-01-05,-1.230179,0.571078,0.060061,-0.225524
2000-01-06,-1.230179,0.571078,0.060061,-0.225524
2000-01-07,-1.230179,0.571078,0.060061,-0.225524
2000-01-08,-1.230179,0.571078,0.060061,-0.225524
2000-01-09,-1.230179,0.571078,0.060061,-0.225524
2000-01-10,-1.230179,0.571078,0.060061,-0.225524
2000-01-11,-1.230179,0.571078,0.060061,-0.225524
2000-01-12,1.349726,1.3503,-0.386653,0.86599


In [84]:

frame.resample('D').ffill(limit=2)

Unnamed: 0,Colorado,Texas,New York,Ohio
2000-01-05,-1.230179,0.571078,0.060061,-0.225524
2000-01-06,-1.230179,0.571078,0.060061,-0.225524
2000-01-07,-1.230179,0.571078,0.060061,-0.225524
2000-01-08,,,,
2000-01-09,,,,
2000-01-10,,,,
2000-01-11,,,,
2000-01-12,1.349726,1.3503,-0.386653,0.86599


In [85]:
frame.resample('W-THU').ffill()

Unnamed: 0,Colorado,Texas,New York,Ohio
2000-01-06,-1.230179,0.571078,0.060061,-0.225524
2000-01-13,1.349726,1.3503,-0.386653,0.86599
