# Handle TimeSeries data

In [1]:
import numpy as np
import pandas as pd

### DatetimeIndex 

In [9]:
date_str = ["2021, 8, 10", "2021-8-11", "20210812", "2021.8.13", "081421", '15/8/21']
idx = pd.to_datetime(date_str)
idx

DatetimeIndex(['2021-08-10', '2021-08-11', '2021-08-12', '2021-08-13',
               '2021-08-14', '2021-08-15'],
              dtype='datetime64[ns]', freq=None)

In [10]:
np.random.seed(2021)
s = pd.Series(np.random.randn(6), index=idx)
s

2021-08-10    1.488609
2021-08-11    0.676011
2021-08-12   -0.418451
2021-08-13   -0.806521
2021-08-14    0.555876
2021-08-15   -0.705504
dtype: float64

In [8]:
pd.date_range('20210801', '2021-8-31')

DatetimeIndex(['2021-08-01', '2021-08-02', '2021-08-03', '2021-08-04',
               '2021-08-05', '2021-08-06', '2021-08-07', '2021-08-08',
               '2021-08-09', '2021-08-10', '2021-08-11', '2021-08-12',
               '2021-08-13', '2021-08-14', '2021-08-15', '2021-08-16',
               '2021-08-17', '2021-08-18', '2021-08-19', '2021-08-20',
               '2021-08-21', '2021-08-22', '2021-08-23', '2021-08-24',
               '2021-08-25', '2021-08-26', '2021-08-27', '2021-08-28',
               '2021-08-29', '2021-08-30', '2021-08-31'],
              dtype='datetime64[ns]', freq='D')

In [11]:
# day unit
pd.date_range('20210801', periods=31)

DatetimeIndex(['2021-08-01', '2021-08-02', '2021-08-03', '2021-08-04',
               '2021-08-05', '2021-08-06', '2021-08-07', '2021-08-08',
               '2021-08-09', '2021-08-10', '2021-08-11', '2021-08-12',
               '2021-08-13', '2021-08-14', '2021-08-15', '2021-08-16',
               '2021-08-17', '2021-08-18', '2021-08-19', '2021-08-20',
               '2021-08-21', '2021-08-22', '2021-08-23', '2021-08-24',
               '2021-08-25', '2021-08-26', '2021-08-27', '2021-08-28',
               '2021-08-29', '2021-08-30', '2021-08-31'],
              dtype='datetime64[ns]', freq='D')

In [13]:
# Biz-day
pd.date_range('20210801', periods=22, freq='B')

DatetimeIndex(['2021-08-02', '2021-08-03', '2021-08-04', '2021-08-05',
               '2021-08-06', '2021-08-09', '2021-08-10', '2021-08-11',
               '2021-08-12', '2021-08-13', '2021-08-16', '2021-08-17',
               '2021-08-18', '2021-08-19', '2021-08-20', '2021-08-23',
               '2021-08-24', '2021-08-25', '2021-08-26', '2021-08-27',
               '2021-08-30', '2021-08-31'],
              dtype='datetime64[ns]', freq='B')

In [14]:
# Sunday
pd.date_range('20210801', '20210831', freq='W')

DatetimeIndex(['2021-08-01', '2021-08-08', '2021-08-15', '2021-08-22',
               '2021-08-29'],
              dtype='datetime64[ns]', freq='W-SUN')

### resample operation

In [16]:
ts = pd.Series(np.random.randn(100), index=pd.date_range(
    "2021-1-1", periods=100, freq="D"))
ts.tail(20)

2021-03-22    0.684467
2021-03-23   -1.346683
2021-03-24   -0.977484
2021-03-25   -1.514921
2021-03-26   -1.071030
2021-03-27    0.353427
2021-03-28   -0.827718
2021-03-29   -1.496504
2021-03-30   -1.517695
2021-03-31   -1.037998
2021-04-01    0.662567
2021-04-02   -0.624228
2021-04-03   -0.643762
2021-04-04   -0.686254
2021-04-05    0.441222
2021-04-06    0.855880
2021-04-07    0.147717
2021-04-08   -1.594633
2021-04-09    2.636894
2021-04-10   -0.716528
Freq: D, dtype: float64

In [17]:
ts.resample('W').sum()

2021-01-03    0.812279
2021-01-10   -1.076052
2021-01-17    2.881679
2021-01-24    2.060777
2021-01-31    3.531142
2021-02-07    2.868890
2021-02-14    1.290510
2021-02-21   -0.462595
2021-02-28   -2.849328
2021-03-07    3.296231
2021-03-14    0.548376
2021-03-21   -0.942030
2021-03-28   -4.699942
2021-04-04   -5.343875
2021-04-11    1.770552
Freq: W-SUN, dtype: float64

In [19]:
ts.resample('M').first()

2021-01-31   -0.435382
2021-02-28    1.900322
2021-03-31    1.425841
2021-04-30    0.662567
Freq: M, dtype: float64