###  date_range方法的使用

In [2]:
import pandas as pd

pd.date_range('2021-1-1', periods=10, freq='W')

DatetimeIndex(['2021-01-03', '2021-01-10', '2021-01-17', '2021-01-24',
               '2021-01-31', '2021-02-07', '2021-02-14', '2021-02-21',
               '2021-02-28', '2021-03-07'],
              dtype='datetime64[ns]', freq='W-SUN')

### resample方法的使用

In [6]:
# 将1分钟的时间序列转换成3分钟
import pandas as pd
index = pd.date_range('2021-1-1', periods=9, freq='min')
# 这代码创建了一个包含9个日期时间索引的 DatetimeIndex 对象，起始日期为 '2021-01-01'，
# 以分钟为频率。每个日期时间点之间相差一分钟。

s = pd.Series(range(9), index=index)
print(s)

s.resample('3min').sum()



2021-01-01 00:00:00    0
2021-01-01 00:01:00    1
2021-01-01 00:02:00    2
2021-01-01 00:03:00    3
2021-01-01 00:04:00    4
2021-01-01 00:05:00    5
2021-01-01 00:06:00    6
2021-01-01 00:07:00    7
2021-01-01 00:08:00    8
Freq: min, dtype: int64


2021-01-01 00:00:00     3
2021-01-01 00:03:00    12
2021-01-01 00:06:00    21
Freq: 3min, dtype: int64

### 降采样处理

In [14]:
import pandas as pd

df = pd.read_excel('data/msb课程销售记录.xlsx')
df = df[['订单付款时间', '实际支付金额']]
df = df.set_index('订单付款时间')

df.resample('W').sum()

Unnamed: 0_level_0,实际支付金额
订单付款时间,Unnamed: 1_level_1
2020-05-10,14480
2020-05-17,26340
2020-05-24,8880
2020-05-31,11860
2020-06-07,30940
2020-06-14,55900
2020-06-21,30960
2020-06-28,0
2020-07-05,0
2020-07-12,24960


In [15]:
import pandas as pd

df = pd.read_excel('data/msb课程销售记录.xlsx')
df = df[['订单付款时间', '实际支付金额']]
df = df.set_index('订单付款时间')

df.resample('W', closed='left').sum()

Unnamed: 0_level_0,实际支付金额
订单付款时间,Unnamed: 1_level_1
2020-05-10,14480
2020-05-17,26340
2020-05-24,0
2020-05-31,20740
2020-06-07,30940
2020-06-14,55900
2020-06-21,0
2020-06-28,30960
2020-07-05,0
2020-07-12,24960


###  升采样处理

In [21]:
import pandas as pd 

index = pd.date_range('2020-1-1', periods=2)
s = pd.Series(range(1,3), index=index)
print(s)

print(s.resample('6h').asfreq())   # asfreq() 是一个用于将时间序列对象按照指定频率重采样的方法。

# 使用前值填充
s.resample('6h').ffill()

# 使用后值填充
s.resample('6h').bfill()

2020-01-01    1
2020-01-02    2
Freq: D, dtype: int64
2020-01-01 00:00:00    1.0
2020-01-01 06:00:00    NaN
2020-01-01 12:00:00    NaN
2020-01-01 18:00:00    NaN
2020-01-02 00:00:00    2.0
Freq: 6h, dtype: float64


2020-01-01 00:00:00    1
2020-01-01 06:00:00    2
2020-01-01 12:00:00    2
2020-01-01 18:00:00    2
2020-01-02 00:00:00    2
Freq: 6h, dtype: int64

###  时间序列数据汇总

In [27]:
# ohlc 表示开盘价(Open)、最高价(High)、最低价(Low)、收盘价(Close)

import pandas as pd 

index = pd.date_range('2020-2-3', periods=12, freq='min')
s =pd.Series(range(12), index=index)
s

2020-02-03 00:00:00     0
2020-02-03 00:01:00     1
2020-02-03 00:02:00     2
2020-02-03 00:03:00     3
2020-02-03 00:04:00     4
2020-02-03 00:05:00     5
2020-02-03 00:06:00     6
2020-02-03 00:07:00     7
2020-02-03 00:08:00     8
2020-02-03 00:09:00     9
2020-02-03 00:10:00    10
2020-02-03 00:11:00    11
Freq: min, dtype: int64

In [26]:
# ohlc 表示开盘价(Open)、最高价(High)、最低价(Low)、收盘价(Close)

import pandas as pd 

index = pd.date_range('2020-2-3', periods=12, freq='min')
s =pd.Series(range(12), index=index)
s.resample('5min').ohlc()

Unnamed: 0,open,high,low,close
2020-02-03 00:00:00,0,4,0,4
2020-02-03 00:05:00,5,9,5,9
2020-02-03 00:10:00,10,11,10,11


### 移动窗口数据计算

In [32]:
# 创建淘宝的每日销量数据

import pandas as pd
index = pd.date_range('2022-1-1', periods=15, freq='D')

s = pd.Series([3, 6, 7, 4, 2, 1, 3, 8, 9, 10, 12, 15, 22, 23, 14], index=index)

# 使用 rolling 函数 计算三天的均值
s.rolling(3, min_periods=1).mean()
# min_periods 参数设置为1，表示均值计算时，即使最初少于 3 个观测值，只要窗口中至少有 1 个观测值，就会计算滚动平均值

2022-01-01     3.000000
2022-01-02     4.500000
2022-01-03     5.333333
2022-01-04     5.666667
2022-01-05     4.333333
2022-01-06     2.333333
2022-01-07     2.000000
2022-01-08     4.000000
2022-01-09     6.666667
2022-01-10     9.000000
2022-01-11    10.333333
2022-01-12    12.333333
2022-01-13    16.333333
2022-01-14    20.000000
2022-01-15    19.666667
Freq: D, dtype: float64