In [1]:
# 时间序列分析在金融数据分析中占重要的地位

In [2]:
from datetime import datetime

In [3]:
import pandas as pd

In [4]:
date = datetime(2016,1,1)

In [5]:
date

datetime.datetime(2016, 1, 1, 0, 0)

In [6]:
date = pd.Timestamp(date)

In [7]:
date

Timestamp('2016-01-01 00:00:00')

In [8]:
type(date)

pandas._libs.tslibs.timestamps.Timestamp

In [10]:
ts = pd.Series(1, index=[date])

In [11]:
ts

2016-01-01    1
dtype: int64

In [12]:
ts.index

DatetimeIndex(['2016-01-01'], dtype='datetime64[ns]', freq=None)

In [13]:
ts.values

array([1], dtype=int64)

In [14]:
ts.index[0]

Timestamp('2016-01-01 00:00:00')

In [15]:
# Timestamp不接受列表等可迭代的对象

In [35]:
dates = ['2019-01-01','2019-01-02','2019-01-03']

In [36]:
ts = pd.Series([1,2,3], index=pd.to_datetime(dates))

In [37]:
ts

2019-01-01    1
2019-01-02    2
2019-01-03    3
dtype: int64

In [19]:
ts.index[0]

Timestamp('2019-01-01 00:00:00')

In [20]:
ts.index

DatetimeIndex(['2019-01-01', '2019-01-02', '2019-01-03'], dtype='datetime64[ns]', freq=None)

In [21]:
ts.values

array([1, 2, 3], dtype=int64)

In [22]:
# 截取时间数据

In [23]:
ts['20190101']

1

In [24]:
ts['2019-01-01']

1

In [25]:
ts['2019/01/01']

1

In [26]:
ts

2019-01-01    1
2019-01-02    2
2019-01-03    3
dtype: int64

In [27]:
ts['2019']

2019-01-01    1
2019-01-02    2
2019-01-03    3
dtype: int64

In [38]:
ts['2019-01':'2019-02']

2019-01-01    1
2019-01-02    2
2019-01-03    3
dtype: int64

In [31]:
ts[pd.Timestamp('2019-02-01')] = 4

In [39]:
ts.truncate(after='2019-01-02')

2019-01-01    1
2019-01-02    2
dtype: int64

In [40]:
# 滞后或者超前操作
ts.shift(1) # 正数为滞后

2019-01-01    NaN
2019-01-02    1.0
2019-01-03    2.0
dtype: float64

In [41]:
ts.shift(-1) # 负数为超前

2019-01-01    2.0
2019-01-02    3.0
2019-01-03    NaN
dtype: float64

In [42]:
price = pd.Series([20.34, 20.56, 21.01, 20.65, 21.34],
index=pd.to_datetime(['2019-01-01', '2019-01-02', '2019-01-03', '2019-01-04', '2019-01-05']))

In [44]:
# 列举一个收益率的例子
(price - price.shift(1)) / price.shift(1)

2019-01-01         NaN
2019-01-02    0.010816
2019-01-03    0.021887
2019-01-04   -0.017135
2019-01-05    0.033414
dtype: float64

In [45]:
# 高低频时间数据转化
# 对于时间序列数据，往往需要在高低频数据之间进行转换，比如，当我们手头的数据为日度数据时，如果需要计算月度的收益率，一般情况下，如果没有指定freq的值，默认会将freq设为None


In [46]:
ts.index.freq is None

True

In [47]:
# 可以通过resample()来修改数据的频率

In [48]:
rts = ts.resample('M', how='first')

In [49]:
rts

2019-01-31    1
Freq: M, dtype: int64

In [50]:
index = pd.date_range('1/1/2000', periods=9, freq='T')

In [51]:
series = pd.Series(range(9), index=index)

In [52]:
series

2000-01-01 00:00:00    0
2000-01-01 00:01:00    1
2000-01-01 00:02:00    2
2000-01-01 00:03:00    3
2000-01-01 00:04:00    4
2000-01-01 00:05:00    5
2000-01-01 00:06:00    6
2000-01-01 00:07:00    7
2000-01-01 00:08:00    8
Freq: T, dtype: int64

In [53]:
series.resample('3T').sum()

2000-01-01 00:00:00     3
2000-01-01 00:03:00    12
2000-01-01 00:06:00    21
Freq: 3T, dtype: int64

In [54]:
series.resample('2T').mean()

2000-01-01 00:00:00    0.5
2000-01-01 00:02:00    2.5
2000-01-01 00:04:00    4.5
2000-01-01 00:06:00    6.5
2000-01-01 00:08:00    8.0
Freq: 2T, dtype: float64

In [55]:
series.resample('3T', label='right').sum()

2000-01-01 00:03:00     3
2000-01-01 00:06:00    12
2000-01-01 00:09:00    21
Freq: 3T, dtype: int64

In [57]:
series.resample('3T', label='right', closed='right').sum()

2000-01-01 00:00:00     0
2000-01-01 00:03:00     6
2000-01-01 00:06:00    15
2000-01-01 00:09:00    15
Freq: 3T, dtype: int64

In [58]:
series.resample('30S').asfreq()[0:5]

2000-01-01 00:00:00    0.0
2000-01-01 00:00:30    NaN
2000-01-01 00:01:00    1.0
2000-01-01 00:01:30    NaN
2000-01-01 00:02:00    2.0
Freq: 30S, dtype: float64