# 시계열 자료 다루기

In [1]:
import numpy as np
import pandas as pd

## DatetimeIndex 인덱스

In [6]:
date_str = ['2021, 8, 10','2021-8-11','20210812','2021.8.13','081421','15/8/21']
idx = pd.to_datetime(date_str)
idx

DatetimeIndex(['2021-08-10', '2021-08-11', '2021-08-12', '2021-08-13',
               '2021-08-14', '2021-08-15'],
              dtype='datetime64[ns]', freq=None)

In [8]:
np.random.seed(2021)
s = pd.Series(np.random.randn(6), index=idx)
s

2021-08-10    1.488609
2021-08-11    0.676011
2021-08-12   -0.418451
2021-08-13   -0.806521
2021-08-14    0.555876
2021-08-15   -0.705504
dtype: float64

In [9]:
pd.date_range('20210801','2021-8-31')

DatetimeIndex(['2021-08-01', '2021-08-02', '2021-08-03', '2021-08-04',
               '2021-08-05', '2021-08-06', '2021-08-07', '2021-08-08',
               '2021-08-09', '2021-08-10', '2021-08-11', '2021-08-12',
               '2021-08-13', '2021-08-14', '2021-08-15', '2021-08-16',
               '2021-08-17', '2021-08-18', '2021-08-19', '2021-08-20',
               '2021-08-21', '2021-08-22', '2021-08-23', '2021-08-24',
               '2021-08-25', '2021-08-26', '2021-08-27', '2021-08-28',
               '2021-08-29', '2021-08-30', '2021-08-31'],
              dtype='datetime64[ns]', freq='D')

In [10]:
# 일(day)
pd.date_range('20210801',periods=31)

DatetimeIndex(['2021-08-01', '2021-08-02', '2021-08-03', '2021-08-04',
               '2021-08-05', '2021-08-06', '2021-08-07', '2021-08-08',
               '2021-08-09', '2021-08-10', '2021-08-11', '2021-08-12',
               '2021-08-13', '2021-08-14', '2021-08-15', '2021-08-16',
               '2021-08-17', '2021-08-18', '2021-08-19', '2021-08-20',
               '2021-08-21', '2021-08-22', '2021-08-23', '2021-08-24',
               '2021-08-25', '2021-08-26', '2021-08-27', '2021-08-28',
               '2021-08-29', '2021-08-30', '2021-08-31'],
              dtype='datetime64[ns]', freq='D')

In [11]:
# Biz-day
pd.date_range('20210801', periods=31, freq='B')

DatetimeIndex(['2021-08-02', '2021-08-03', '2021-08-04', '2021-08-05',
               '2021-08-06', '2021-08-09', '2021-08-10', '2021-08-11',
               '2021-08-12', '2021-08-13', '2021-08-16', '2021-08-17',
               '2021-08-18', '2021-08-19', '2021-08-20', '2021-08-23',
               '2021-08-24', '2021-08-25', '2021-08-26', '2021-08-27',
               '2021-08-30', '2021-08-31', '2021-09-01', '2021-09-02',
               '2021-09-03', '2021-09-06', '2021-09-07', '2021-09-08',
               '2021-09-09', '2021-09-10', '2021-09-13'],
              dtype='datetime64[ns]', freq='B')

In [12]:
# 일요일(W)
pd.date_range('20210801', '20210831', freq='W')

DatetimeIndex(['2021-08-01', '2021-08-08', '2021-08-15', '2021-08-22',
               '2021-08-29'],
              dtype='datetime64[ns]', freq='W-SUN')

## resample 연산


In [13]:
ts = pd.Series(np.random.randn(100),
               index=pd.date_range("2021-1-1", periods=100))
ts.tail(20)

2021-03-22    0.629269
2021-03-23    0.361891
2021-03-24    3.638726
2021-03-25   -1.201221
2021-03-26   -1.330033
2021-03-27    1.261033
2021-03-28   -0.648126
2021-03-29    2.245300
2021-03-30   -1.012394
2021-03-31   -0.125939
2021-04-01   -0.432416
2021-04-02    0.425937
2021-04-03    0.756636
2021-04-04   -0.994854
2021-04-05   -0.740574
2021-04-06    0.458233
2021-04-07    1.257247
2021-04-08   -0.441704
2021-04-09    0.541335
2021-04-10    0.567278
Freq: D, dtype: float64

In [14]:
ts.resample('W').mean()

2021-01-03    0.627425
2021-01-10   -0.083903
2021-01-17    0.068879
2021-01-24    0.361419
2021-01-31    0.370296
2021-02-07   -0.106958
2021-02-14   -0.088102
2021-02-21   -0.304320
2021-02-28    0.451788
2021-03-07    0.300178
2021-03-14   -0.328079
2021-03-21    0.340823
2021-03-28    0.387363
2021-04-04    0.123181
2021-04-11    0.273636
Freq: W-SUN, dtype: float64

In [15]:
ts.resample('M').first()

2021-01-31    1.130858
2021-02-28   -0.161508
2021-03-31   -0.654448
2021-04-30   -0.432416
Freq: M, dtype: float64