## 時間序列資料整理

In [1]:
import pandas as pd

df = pd.Series([1, 2, 3, 4, 5], index=pd.period_range('2021-01-01', freq='Y', periods=5))
df

2021    1
2022    2
2023    3
2024    4
2025    5
Freq: A-DEC, dtype: int64

In [2]:
# 年轉換為季
df.resample('Q', convention='start').asfreq()

2021Q1    1.0
2021Q2    NaN
2021Q3    NaN
2021Q4    NaN
2022Q1    2.0
2022Q2    NaN
2022Q3    NaN
2022Q4    NaN
2023Q1    3.0
2023Q2    NaN
2023Q3    NaN
2023Q4    NaN
2024Q1    4.0
2024Q2    NaN
2024Q3    NaN
2024Q4    NaN
2025Q1    5.0
2025Q2    NaN
2025Q3    NaN
2025Q4    NaN
Freq: Q-DEC, dtype: float64

In [3]:
# 可以抓一段時間
df['2021':'2023']

2021    1
2022    2
2023    3
Freq: A-DEC, dtype: int64

In [4]:
# 字串轉時間型態
str_date = '2021-01-01'
date = pd.Timestamp(2021,1,1)
(str_date, type(str_date)),(date, type(date))

(('2021-01-01', str),
 (Timestamp('2021-01-01 00:00:00'), pandas._libs.tslibs.timestamps.Timestamp))

In [5]:
# 時間型態轉字串
date_to_str = date.strftime('%Y-%m_%d')
str_to_date = pd.to_datetime(str_date)
(date_to_str, type(date_to_str)), (str_to_date, type(str_to_date))

(('2021-01_01', str),
 (Timestamp('2021-01-01 00:00:00'), pandas._libs.tslibs.timestamps.Timestamp))

In [6]:
date

Timestamp('2021-01-01 00:00:00')

In [7]:
# 年、月、日可分別抓出
date.year, date.month, date.day

(2021, 1, 1)

In [8]:
# 看看該日為星期幾，是該年 (可設定從哪時候開始算) 的第幾個星期
date.day_name(), date.weekofyear

('Friday', 53)

In [9]:
date2 = pd.Timestamp(2021,1,11)
date, date2

(Timestamp('2021-01-01 00:00:00'), Timestamp('2021-01-11 00:00:00'))

In [10]:
# 可看出相隔幾日
date2 - date

Timedelta('10 days 00:00:00')

In [11]:
# 檢查是否真的差十天
date + pd.Timedelta(days=10) == date2

True

In [12]:
# 設定兩天工作日
date += pd.Timedelta(days=1) 
some_b_day = 2 * pd.offsets.BDay()
some_b_day

<2 * BusinessDays>

In [13]:
# 把某日加上兩天工作日
add_some_b_date = date + some_b_day
date.day_name(), add_some_b_date.day_name()

('Saturday', 'Tuesday')

## 補充 : 轉各種期間的函數

In [13]:
import pandas as pd
import numpy as np

In [26]:
date_rng = pd.date_range('2021-01-01', freq='M', periods=12)
print(f'month date_range()：\n{date_rng}')

month date_range()：
DatetimeIndex(['2021-01-31', '2021-02-28', '2021-03-31', '2021-04-30',
               '2021-05-31', '2021-06-30', '2021-07-31', '2021-08-31',
               '2021-09-30', '2021-10-31', '2021-11-30', '2021-12-31'],
              dtype='datetime64[ns]', freq='M')


In [27]:
period_rng = pd.period_range('2021/01/01', freq='M', periods=12)
print(f'month period_range()：\n{period_rng}')

month period_range()：
PeriodIndex(['2021-01', '2021-02', '2021-03', '2021-04', '2021-05', '2021-06',
             '2021-07', '2021-08', '2021-09', '2021-10', '2021-11', '2021-12'],
            dtype='period[M]', freq='M')


In [29]:
date_rng = pd.date_range('2021-01-01', freq='W-SUN', periods=12)
print(f'week date_range()：\n{date_rng}')

week date_range()：
DatetimeIndex(['2021-01-03', '2021-01-10', '2021-01-17', '2021-01-24',
               '2021-01-31', '2021-02-07', '2021-02-14', '2021-02-21',
               '2021-02-28', '2021-03-07', '2021-03-14', '2021-03-21'],
              dtype='datetime64[ns]', freq='W-SUN')


In [30]:
period_rng=pd.period_range('2021-01-01',freq='W-SUN',periods=12)
print(f'week period_range()：\n{period_rng}')

week period_range()：
PeriodIndex(['2020-12-28/2021-01-03', '2021-01-04/2021-01-10',
             '2021-01-11/2021-01-17', '2021-01-18/2021-01-24',
             '2021-01-25/2021-01-31', '2021-02-01/2021-02-07',
             '2021-02-08/2021-02-14', '2021-02-15/2021-02-21',
             '2021-02-22/2021-02-28', '2021-03-01/2021-03-07',
             '2021-03-08/2021-03-14', '2021-03-15/2021-03-21'],
            dtype='period[W-SUN]', freq='W-SUN')


In [31]:
date_rng = pd.date_range('2021-01-01 00:00:00', freq='H', periods=12)
print(f'hour date_range()：\n{date_rng}')

hour date_range()：
DatetimeIndex(['2021-01-01 00:00:00', '2021-01-01 01:00:00',
               '2021-01-01 02:00:00', '2021-01-01 03:00:00',
               '2021-01-01 04:00:00', '2021-01-01 05:00:00',
               '2021-01-01 06:00:00', '2021-01-01 07:00:00',
               '2021-01-01 08:00:00', '2021-01-01 09:00:00',
               '2021-01-01 10:00:00', '2021-01-01 11:00:00'],
              dtype='datetime64[ns]', freq='H')


In [32]:
period_rng=pd.period_range('2021-01-01 00:00:00',freq='H',periods=12)
print(f'hour period_range()：\n{period_rng}')

hour period_range()：
PeriodIndex(['2021-01-01 00:00', '2021-01-01 01:00', '2021-01-01 02:00',
             '2021-01-01 03:00', '2021-01-01 04:00', '2021-01-01 05:00',
             '2021-01-01 06:00', '2021-01-01 07:00', '2021-01-01 08:00',
             '2021-01-01 09:00', '2021-01-01 10:00', '2021-01-01 11:00'],
            dtype='period[H]', freq='H')


In [35]:
print(pd.Timedelta(days=5, minutes=50, seconds=20, milliseconds=10, microseconds=10, nanoseconds=10))

5 days 00:50:20.010010010


In [45]:
now=pd.datetime.now()
dt=now+pd.Timedelta(days=50)
print(f'当前时间是{now}, 50天后时间是{dt}')
#当前时间是2019-06-08 17:59:31.726065, 50天后时间是2019-07-28 17:59:31.726065
#只显示年月日
print(dt.strftime('%Y-%m-%d'))#2019-07-28

当前时间是2021-04-22 12:59:58.061558, 50天后时间是2021-06-11 12:59:58.061558
2021-06-11


  now=pd.datetime.now()


In [79]:
#定义timestamp
t1=pd.Timestamp('2019-01-10')
t2=pd.Timestamp('2018-12-10')
print(f't1= {t1}')
print(f't2= {t2}')
print(f't1与t2时间间隔：{(t1-t2).days}天')

t1= 2019-01-10 00:00:00
t2= 2018-12-10 00:00:00
t1与t2时间间隔：31天


In [58]:
per=pd.Period('2019')
print(f'pd.Period()：{per}')
# pd.Period()：2019
per_del=pd.Period('2019')-pd.Period('2018')
print(f'2019和2018间隔{per_del}年')#可以直接+、-整数（代表年）
#2019和2018间隔1年

#时期转换为时间戳
print(per.to_timestamp(how='end'))#2019-12-31 00:00:00
print(per.to_timestamp(how='start'))#2019-01-01 00:00:00

pd.Period()：2019
2019和2018间隔<YearEnd: month=12>年
2019-12-31 23:59:59.999999999
2019-01-01 00:00:00


In [60]:
date=pd.date_range('1/1/2018', periods=20, freq='D')
tsdat_series=pd.Series(range(20),index=date)
tsp_series=tsdat_series.to_period('D')
print(tsp_series.index.asfreq('Q'))

PeriodIndex(['2018Q1', '2018Q1', '2018Q1', '2018Q1', '2018Q1', '2018Q1',
             '2018Q1', '2018Q1', '2018Q1', '2018Q1', '2018Q1', '2018Q1',
             '2018Q1', '2018Q1', '2018Q1', '2018Q1', '2018Q1', '2018Q1',
             '2018Q1', '2018Q1'],
            dtype='period[Q-DEC]', freq='Q-DEC')


In [54]:
date=pd.date_range('1/1/2018', periods=20, freq='M')
tsdat_series=pd.Series(range(20),index=date)
tsp_series=tsdat_series.to_period('M')
print(tsp_series.index.asfreq('Q'))

PeriodIndex(['2018Q1', '2018Q1', '2018Q1', '2018Q2', '2018Q2', '2018Q2',
             '2018Q3', '2018Q3', '2018Q3', '2018Q4', '2018Q4', '2018Q4',
             '2019Q1', '2019Q1', '2019Q1', '2019Q2', '2019Q2', '2019Q2',
             '2019Q3', '2019Q3'],
            dtype='period[Q-DEC]', freq='Q-DEC')


In [55]:
date=pd.period_range('1/1/2018', periods=20, freq='D')
tsper_series=pd.Series(range(20),index=date)
print(tsper_series.index.asfreq('Q'))

PeriodIndex(['2018Q1', '2018Q1', '2018Q1', '2018Q1', '2018Q1', '2018Q1',
             '2018Q1', '2018Q1', '2018Q1', '2018Q1', '2018Q1', '2018Q1',
             '2018Q1', '2018Q1', '2018Q1', '2018Q1', '2018Q1', '2018Q1',
             '2018Q1', '2018Q1'],
            dtype='period[Q-DEC]', freq='Q-DEC')
