# 时间序列

## 日期和时间数据的类型及工具

In [71]:
import numpy as np
import pandas as pd

from datetime import datetime

now = datetime.now()

now

datetime.datetime(2025, 3, 28, 20, 19, 44, 649224)

In [72]:
now.year, now.month, now.day, now.hour, now.minute, now.second

(2025, 3, 28, 20, 19, 44)

In [73]:
delta = datetime(2023, 10, 1) - datetime(2022, 10, 1)
delta

datetime.timedelta(days=365)

In [74]:
delta.days, delta.seconds

(365, 0)

In [75]:
from datetime import timedelta

start = datetime(2022, 10, 1)

start + timedelta(12)

datetime.datetime(2022, 10, 13, 0, 0)

In [76]:
stamp = datetime(2025, 3, 28)

str(stamp)

'2025-03-28 00:00:00'

In [77]:
stamp.strftime('%Y-%m-%d %H:%M:%S')

'2025-03-28 00:00:00'

In [78]:
value = '2011-01-03'

datetime.strptime(value, '%Y-%m-%d')

datetime.datetime(2011, 1, 3, 0, 0)

In [79]:
datestrs = ['7/6/2011', '8/6/2011', '9/6/2011']
[datetime.strptime(x, '%m/%d/%Y') for x in datestrs]

[datetime.datetime(2011, 7, 6, 0, 0),
 datetime.datetime(2011, 8, 6, 0, 0),
 datetime.datetime(2011, 9, 6, 0, 0)]

In [80]:
datestrs = ['7/2/2012', '8/2/2012', '9/2/2012']
dates = pd.to_datetime(datestrs)
dates

DatetimeIndex(['2012-07-02', '2012-08-02', '2012-09-02'], dtype='datetime64[ns]', freq=None)

In [81]:
idx = pd.to_datetime(datestrs + [None])
idx

DatetimeIndex(['2012-07-02', '2012-08-02', '2012-09-02', 'NaT'], dtype='datetime64[ns]', freq=None)

In [82]:
dates = [
    datetime(2011, 1, 3),
    datetime(2011, 1, 4),
    datetime(2011, 1, 5),
    datetime(2011, 1, 6),
    datetime(2011, 1, 7),
    datetime(2011, 1, 8),
]

ts = pd.Series(np.random.randn(6), index=dates)
ts

2011-01-03    0.984552
2011-01-04   -0.638314
2011-01-05    0.993275
2011-01-06    0.427843
2011-01-07   -1.708857
2011-01-08   -0.148260
dtype: float64

In [83]:
ts.index

DatetimeIndex(['2011-01-03', '2011-01-04', '2011-01-05', '2011-01-06',
               '2011-01-07', '2011-01-08'],
              dtype='datetime64[ns]', freq=None)

In [84]:
ts + ts[::2]

2011-01-03    1.969105
2011-01-04         NaN
2011-01-05    1.986549
2011-01-06         NaN
2011-01-07   -3.417714
2011-01-08         NaN
dtype: float64

In [85]:
stamp = ts.index[2]

stamp

Timestamp('2011-01-05 00:00:00')

In [86]:
ts[stamp]

np.float64(0.9932745773405326)

In [87]:
ts['2011-01-07']

np.float64(-1.7088570211983114)

In [88]:
longer_ts = pd.Series(np.random.standard_normal(1000), index=pd.date_range('2025-01-01', periods=1000))
longer_ts.head()

2025-01-01    0.408303
2025-01-02    0.356721
2025-01-03    0.302894
2025-01-04   -1.073530
2025-01-05    0.459745
Freq: D, dtype: float64

In [89]:
longer_ts['2025']

2025-01-01    0.408303
2025-01-02    0.356721
2025-01-03    0.302894
2025-01-04   -1.073530
2025-01-05    0.459745
                ...   
2025-12-27   -0.533955
2025-12-28    0.382238
2025-12-29    0.455097
2025-12-30   -1.924233
2025-12-31    0.813540
Freq: D, Length: 365, dtype: float64

In [90]:
longer_ts['2026-01']

2026-01-01   -0.857359
2026-01-02    0.958700
2026-01-03   -0.455277
2026-01-04   -1.115878
2026-01-05   -0.365896
2026-01-06    0.633114
2026-01-07    0.327125
2026-01-08   -0.275786
2026-01-09    0.382544
2026-01-10   -2.704246
2026-01-11    1.438224
2026-01-12    0.383622
2026-01-13   -0.264404
2026-01-14   -0.570013
2026-01-15    1.254717
2026-01-16    0.881050
2026-01-17   -1.695738
2026-01-18    2.192465
2026-01-19    0.865825
2026-01-20    0.827045
2026-01-21    1.842350
2026-01-22    0.076046
2026-01-23   -1.740361
2026-01-24   -0.564678
2026-01-25   -1.742779
2026-01-26    1.183461
2026-01-27    0.113579
2026-01-28   -2.594508
2026-01-29    0.310105
2026-01-30    1.673449
2026-01-31   -1.574614
Freq: D, dtype: float64

In [91]:
ts[datetime(2011, 1, 7):]

2011-01-07   -1.708857
2011-01-08   -0.148260
dtype: float64

In [92]:
ts['2011-01-06':'2011-01-07']

2011-01-06    0.427843
2011-01-07   -1.708857
dtype: float64

In [93]:
ts.truncate(after='2011-01-05', before='2011-01-02')

2011-01-03    0.984552
2011-01-04   -0.638314
2011-01-05    0.993275
dtype: float64

In [94]:
dates = pd.DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03', '2011-01-01', '2011-01-05'])
ts = pd.Series([1, 2, 3, 4, 5], index=dates)

ts.index.is_unique

False

In [95]:
ts[datetime(2011, 1, 1)]

2011-01-01    1
2011-01-01    4
dtype: int64

In [96]:
ts['2011-01-02']

np.int64(2)

In [97]:
grouped = ts.groupby(level=0)
grouped.mean()

2011-01-01    2.5
2011-01-02    2.0
2011-01-03    3.0
2011-01-05    5.0
dtype: float64

## 日期的范围、频率以及移位

In [98]:
ts

2011-01-01    1
2011-01-02    2
2011-01-03    3
2011-01-01    4
2011-01-05    5
dtype: int64

In [99]:
ts.resample('D').mean()

2011-01-01    2.5
2011-01-02    2.0
2011-01-03    3.0
2011-01-04    NaN
2011-01-05    5.0
Freq: D, dtype: float64

In [100]:
pd.date_range('2017-01-01', periods=100, freq='D')

DatetimeIndex(['2017-01-01', '2017-01-02', '2017-01-03', '2017-01-04',
               '2017-01-05', '2017-01-06', '2017-01-07', '2017-01-08',
               '2017-01-09', '2017-01-10', '2017-01-11', '2017-01-12',
               '2017-01-13', '2017-01-14', '2017-01-15', '2017-01-16',
               '2017-01-17', '2017-01-18', '2017-01-19', '2017-01-20',
               '2017-01-21', '2017-01-22', '2017-01-23', '2017-01-24',
               '2017-01-25', '2017-01-26', '2017-01-27', '2017-01-28',
               '2017-01-29', '2017-01-30', '2017-01-31', '2017-02-01',
               '2017-02-02', '2017-02-03', '2017-02-04', '2017-02-05',
               '2017-02-06', '2017-02-07', '2017-02-08', '2017-02-09',
               '2017-02-10', '2017-02-11', '2017-02-12', '2017-02-13',
               '2017-02-14', '2017-02-15', '2017-02-16', '2017-02-17',
               '2017-02-18', '2017-02-19', '2017-02-20', '2017-02-21',
               '2017-02-22', '2017-02-23', '2017-02-24', '2017-02-25',
      

In [101]:
# BM = business end of mouth, 每月最后一个工作日
pd.date_range('2025-01-01', '2025-12-31', freq='BM')

  pd.date_range('2025-01-01', '2025-12-31', freq='BM')


DatetimeIndex(['2025-01-31', '2025-02-28', '2025-03-31', '2025-04-30',
               '2025-05-30', '2025-06-30', '2025-07-31', '2025-08-29',
               '2025-09-30', '2025-10-31', '2025-11-28', '2025-12-31'],
              dtype='datetime64[ns]', freq='BME')

In [102]:
pd.date_range('2017-01-01 12:45:12', periods=10, normalize=True)

DatetimeIndex(['2017-01-01', '2017-01-02', '2017-01-03', '2017-01-04',
               '2017-01-05', '2017-01-06', '2017-01-07', '2017-01-08',
               '2017-01-09', '2017-01-10'],
              dtype='datetime64[ns]', freq='D')

In [103]:
from pandas.tseries.offsets import Hour, Minute

hour = Hour()
hour

<Hour>

In [104]:
hour_four = Hour(4)

In [105]:
pd.date_range('2018-01-01', periods=10, freq=hour_four)

DatetimeIndex(['2018-01-01 00:00:00', '2018-01-01 04:00:00',
               '2018-01-01 08:00:00', '2018-01-01 12:00:00',
               '2018-01-01 16:00:00', '2018-01-01 20:00:00',
               '2018-01-02 00:00:00', '2018-01-02 04:00:00',
               '2018-01-02 08:00:00', '2018-01-02 12:00:00'],
              dtype='datetime64[ns]', freq='4h')

In [106]:
Hour(2) + Minute(30)

<150 * Minutes>

In [107]:
pd.date_range('2017-01-20', periods=10, freq='1h30min')

DatetimeIndex(['2017-01-20 00:00:00', '2017-01-20 01:30:00',
               '2017-01-20 03:00:00', '2017-01-20 04:30:00',
               '2017-01-20 06:00:00', '2017-01-20 07:30:00',
               '2017-01-20 09:00:00', '2017-01-20 10:30:00',
               '2017-01-20 12:00:00', '2017-01-20 13:30:00'],
              dtype='datetime64[ns]', freq='90min')

In [108]:
ts = pd.Series(np.random.standard_normal(4), index=pd.date_range('1/1/2000', periods=4, freq='M'))
ts

  ts = pd.Series(np.random.standard_normal(4), index=pd.date_range('1/1/2000', periods=4, freq='M'))


2000-01-31    1.628501
2000-02-29   -0.052740
2000-03-31    0.592470
2000-04-30    0.095666
Freq: ME, dtype: float64

In [109]:
ts.shift(2)

2000-01-31         NaN
2000-02-29         NaN
2000-03-31    1.628501
2000-04-30   -0.052740
Freq: ME, dtype: float64

In [110]:
ts.shift(-2)

2000-01-31    0.592470
2000-02-29    0.095666
2000-03-31         NaN
2000-04-30         NaN
Freq: ME, dtype: float64

In [111]:
ts/ts.shift(1) - 1

2000-01-31          NaN
2000-02-29    -1.032385
2000-03-31   -12.233844
2000-04-30    -0.838530
Freq: ME, dtype: float64

In [112]:
ts.shift(2, freq='M')

  ts.shift(2, freq='M')


2000-03-31    1.628501
2000-04-30   -0.052740
2000-05-31    0.592470
2000-06-30    0.095666
Freq: ME, dtype: float64

In [113]:
ts.shift(3, freq='D')

2000-02-03    1.628501
2000-03-03   -0.052740
2000-04-03    0.592470
2000-05-03    0.095666
dtype: float64

In [114]:
ts.shift(1, freq="90T")

  ts.shift(1, freq="90T")


2000-01-31 01:30:00    1.628501
2000-02-29 01:30:00   -0.052740
2000-03-31 01:30:00    0.592470
2000-04-30 01:30:00    0.095666
dtype: float64

In [115]:
now = datetime.now()
now

datetime.datetime(2025, 3, 28, 20, 19, 45, 18078)

In [116]:
now + 3 * Hour()

Timestamp('2025-03-28 23:19:45.018078')

In [117]:
from pandas.tseries.offsets import Day, MonthEnd
now + MonthEnd()

Timestamp('2025-03-31 20:19:45.018078')

In [118]:
now + MonthEnd(2)

Timestamp('2025-04-30 20:19:45.018078')

In [119]:
offset = MonthEnd()
offset.rollforward(now)

Timestamp('2025-03-31 20:19:45.018078')

In [120]:
offset.rollback(now)

Timestamp('2025-02-28 20:19:45.018078')

In [121]:
ts = pd.Series(np.random.standard_normal(20), index=pd.date_range('1/1/2000', periods=20, freq='4D'))
ts

2000-01-01    0.634470
2000-01-05    0.040859
2000-01-09    1.611831
2000-01-13   -0.400015
2000-01-17    0.124177
2000-01-21    0.123349
2000-01-25    0.878091
2000-01-29   -1.481709
2000-02-02   -1.273665
2000-02-06    0.665482
2000-02-10    0.333651
2000-02-14    1.610702
2000-02-18    2.279449
2000-02-22   -0.076128
2000-02-26    0.137221
2000-03-01    0.167140
2000-03-05   -0.415046
2000-03-09    0.050813
2000-03-13    1.568849
2000-03-17    0.230658
Freq: 4D, dtype: float64

In [122]:
ts.groupby(MonthEnd().rollforward).mean()

2000-01-31    0.191382
2000-02-29    0.525245
2000-03-31    0.320483
dtype: float64

In [123]:
ts.resample('M').mean()

  ts.resample('M').mean()


2000-01-31    0.191382
2000-02-29    0.525245
2000-03-31    0.320483
Freq: ME, dtype: float64

In [124]:
import pytz

pytz.common_timezones[-5:]

['US/Eastern', 'US/Hawaii', 'US/Mountain', 'US/Pacific', 'UTC']

In [125]:
stamp = pd.Timestamp('2011-01-03')

stamp_utc = stamp.tz_localize('UTC')
stamp_utc

Timestamp('2011-01-03 00:00:00+0000', tz='UTC')

In [126]:
stamp_utc.tz_convert('US/Eastern')

Timestamp('2011-01-02 19:00:00-0500', tz='US/Eastern')

In [127]:
stamp_utc.tz_convert('Asia/Shanghai')

Timestamp('2011-01-03 08:00:00+0800', tz='Asia/Shanghai')

In [128]:
stamp_shanghai= pd.Timestamp('2025-01-01 12:00:00', tz='Asia/Shanghai')
stamp_shanghai

Timestamp('2025-01-01 12:00:00+0800', tz='Asia/Shanghai')

In [129]:
stamp_shanghai.tz_convert('America/New_York').value == stamp_shanghai.value

True

## 周期及算数运算

In [130]:
p = pd.Period('2025', freq='Y-DEC')
p

Period('2025', 'Y-DEC')

In [131]:
p + 5

Period('2030', 'Y-DEC')

In [132]:
p - 2

Period('2023', 'Y-DEC')

In [133]:
pd.Period('2028', freq='Y-DEC') - p

<3 * YearEnds: month=12>

In [134]:
periods = pd.period_range('2025-01-01', '2025-12-31', freq='M')
periods

PeriodIndex(['2025-01', '2025-02', '2025-03', '2025-04', '2025-05', '2025-06',
             '2025-07', '2025-08', '2025-09', '2025-10', '2025-11', '2025-12'],
            dtype='period[M]')

In [135]:
values = ['2023Q3', '2023Q4', '2024Q1']
periods = pd.PeriodIndex(values, freq='Q-DEC')
periods

PeriodIndex(['2023Q3', '2023Q4', '2024Q1'], dtype='period[Q-DEC]')

In [136]:
p = pd.Period('2011', freq='Y-DEC')

p.asfreq('M', how='start')

Period('2011-01', 'M')

In [137]:
p = pd.Period('2025', freq='Y-JUN')
p.asfreq('M', how='start')

Period('2024-07', 'M')

In [138]:
p.asfreq('M', how='end')

Period('2025-06', 'M')

## 移动窗口函数

In [139]:
close_px_all = pd.read_csv('examples/stock_px.csv', parse_dates=True, index_col=0)
close_px = close_px_all[['AAPL', 'MSFT', 'XOM']]
close_px = close_px.resample('B').ffill()
close_px.head()
close_px_all.head()

Unnamed: 0,AAPL,MSFT,XOM,SPX
2003-01-02,7.4,21.11,29.22,909.03
2003-01-03,7.45,21.14,29.24,908.59
2003-01-06,7.45,21.52,29.96,929.01
2003-01-07,7.43,21.93,28.95,922.93
2003-01-08,7.28,21.31,28.83,909.93


In [140]:
close_px['AAPL'].plot()


A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.2.4 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "/Users/luoran/miniconda3/envs/pydata-book/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/Users/luoran/miniconda3/envs/pydata-book/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/Users/luoran/miniconda3/envs/pydata-book/lib/python3.10/site-packages/ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "/Users/luoran/miniconda3/envs/pydata-book/lib/python3.10/site-packages/traitlets/con

AttributeError: _ARRAY_API not found

ImportError: matplotlib is required for plotting when the default backend "matplotlib" is selected.

In [None]:
close_px['AAPL'].rolling(250).mean().plot()

In [None]:
std250 = close_px['AAPL'].pct_change().rolling(250, min_periods=10).std()
std250.plot()

In [None]:
expanding_mean = std250.expanding().mean()
expanding_mean.plot()

In [None]:
close_px.rolling(60).mean().plot(logy=True)

In [None]:
close_px.rolling('20D').mean()

In [None]:
appl_px = close_px['AAPL']['2006':'2007']
ma30 = appl_px.rolling(30, min_periods=1).mean()

ewma30 = appl_px.ewm(span=30).mean()

appl_px.plot(style='k-', label='Price')
ma30.plot(style='k--', label='MA(30)')
ewma30.plot(style='k-.', label='EWMA(30)')


In [None]:
spx_px = close_px_all['SPX']
spx_rets = spx_px.pct_change()
returns = close_px.pct_change()

coor = returns['AAPL'].rolling(250, min_periods=100).corr(spx_rets)
coor.plot()

In [None]:
corr = returns.rolling(125, min_periods=100).corr(spx_rets)
corr.plot()

In [None]:
from scipy.stats import percentileofscore

def score_at_2percent(x):
    return percentileofscore(x, 0.02)

result = returns['AAPL'].rolling(250).apply(score_at_2percent)

result.plot()