In [2]:
import pandas as pd
import numpy as np

In [3]:
df = pd.DataFrame({'k1':['a','a','b','b','a'], 'k2':['one','two','one','two','one'],
                  'data1':[2,3,3,2,4],
                   'data2':[5,5,5,5,10]
                  })

In [4]:
df

Unnamed: 0,k1,k2,data1,data2
0,a,one,2,5
1,a,two,3,5
2,b,one,3,5
3,b,two,2,5
4,a,one,4,10


### Iterating over group

In [5]:
gp1 = df['data1'].groupby(df['k1'])

In [6]:
gp1

<pandas.core.groupby.generic.SeriesGroupBy object at 0x000001AF1E9BAA48>

In [7]:
gp1.sum()

k1
a    9
b    5
Name: data1, dtype: int64

In [8]:
gp1.max()

k1
a    4
b    3
Name: data1, dtype: int64

In [9]:
gp1.mean()

k1
a    3.0
b    2.5
Name: data1, dtype: float64

In [10]:
for name, group in gp1:
    print('Group Name : ', name)
    print(group)
    

Group Name :  a
0    2
1    3
4    4
Name: data1, dtype: int64
Group Name :  b
2    3
3    2
Name: data1, dtype: int64


In [11]:
gp2 = df['data1'].groupby([ df['k1'], df['k2']])


In [14]:
for name , group in gp2:
    print('Group name : ', name)
    k1, k2 = name
    print("k1 : ", k1, ", k2 : ", k2)
    print(group)

Group name :  ('a', 'one')
k1 :  a , k2 :  one
0    2
4    4
Name: data1, dtype: int64
Group name :  ('a', 'two')
k1 :  a , k2 :  two
1    3
Name: data1, dtype: int64
Group name :  ('b', 'one')
k1 :  b , k2 :  one
2    3
Name: data1, dtype: int64
Group name :  ('b', 'two')
k1 :  b , k2 :  two
3    2
Name: data1, dtype: int64


In [13]:
for (k1, k2), group in gp2:
    print("k1 : ", k1, ", k2 : ", k2)
    print(group)

k1 :  a , k2 :  one
0    2
4    4
Name: data1, dtype: int64
k1 :  a , k2 :  two
1    3
Name: data1, dtype: int64
k1 :  b , k2 :  one
2    3
Name: data1, dtype: int64
k1 :  b , k2 :  two
3    2
Name: data1, dtype: int64


# Time Series (시계열...)

### Timestamp

In [15]:
# periods는 기간을 의미. periods=10은 10일
rng = pd.date_range('2020-08-03 10:35', periods=10)

In [16]:
rng

DatetimeIndex(['2020-08-03 10:35:00', '2020-08-04 10:35:00',
               '2020-08-05 10:35:00', '2020-08-06 10:35:00',
               '2020-08-07 10:35:00', '2020-08-08 10:35:00',
               '2020-08-09 10:35:00', '2020-08-10 10:35:00',
               '2020-08-11 10:35:00', '2020-08-12 10:35:00'],
              dtype='datetime64[ns]', freq='D')

## freq 인수로 특정한 날짜만 생성되도록 할 수도 있다. 많이 사용되는 freq 인수값은 다음과 같다.

s: 초

T: 분

H: 시간

D: 일(day)

B: 주말이 아닌 평일

W: 주(일요일)

W-MON: 주(월요일)

M: 각 달(month)의 마지막 날

MS: 각 달의 첫날

BM: 주말이 아닌 평일 중에서 각 달의 마지막 날

BMS: 주말이 아닌 평일 중에서 각 달의 첫날

WOM-2THU: 각 달의 두번째 목요일

Q-JAN: 각 분기의 첫달의 마지막 날

Q-DEC: 각 분기의 마지막 달의 마지막 날


출처 : http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases

In [1]:
rng2 = pd.date_range('2020-08-03 10:35', periods=10, freq='B')

NameError: name 'pd' is not defined

In [18]:
rng2

DatetimeIndex(['2020-08-03 10:35:00', '2020-08-04 10:35:00',
               '2020-08-05 10:35:00', '2020-08-06 10:35:00',
               '2020-08-07 10:35:00', '2020-08-10 10:35:00',
               '2020-08-11 10:35:00', '2020-08-12 10:35:00',
               '2020-08-13 10:35:00', '2020-08-14 10:35:00'],
              dtype='datetime64[ns]', freq='B')

In [19]:
rng3 = pd.date_range('2020-08-03 10:35', periods=10, freq='M')

In [20]:
rng3

DatetimeIndex(['2020-08-31 10:35:00', '2020-09-30 10:35:00',
               '2020-10-31 10:35:00', '2020-11-30 10:35:00',
               '2020-12-31 10:35:00', '2021-01-31 10:35:00',
               '2021-02-28 10:35:00', '2021-03-31 10:35:00',
               '2021-04-30 10:35:00', '2021-05-31 10:35:00'],
              dtype='datetime64[ns]', freq='M')

In [21]:
rng4 = pd.date_range('2020-08-03 10:35', periods=10, freq='MS')

In [22]:
rng4

DatetimeIndex(['2020-09-01 10:35:00', '2020-10-01 10:35:00',
               '2020-11-01 10:35:00', '2020-12-01 10:35:00',
               '2021-01-01 10:35:00', '2021-02-01 10:35:00',
               '2021-03-01 10:35:00', '2021-04-01 10:35:00',
               '2021-05-01 10:35:00', '2021-06-01 10:35:00'],
              dtype='datetime64[ns]', freq='MS')

In [23]:
# freq='5H' 는 5시간마다를 표시
rng5 = pd.date_range('2020-08-03 10:35', periods=10, freq='5H')

In [25]:
rng5

DatetimeIndex(['2020-08-03 10:35:00', '2020-08-03 15:35:00',
               '2020-08-03 20:35:00', '2020-08-04 01:35:00',
               '2020-08-04 06:35:00', '2020-08-04 11:35:00',
               '2020-08-04 16:35:00', '2020-08-04 21:35:00',
               '2020-08-05 02:35:00', '2020-08-05 07:35:00'],
              dtype='datetime64[ns]', freq='5H')

In [26]:
# 시작 시간과 종료 시간, 국가 설정 가능
rng6 = pd.date_range('2015 Jul 2 10:15', '2015 July 12', freq = '12H', tz = 'Asia/Seoul' )

In [27]:
rng6

DatetimeIndex(['2015-07-02 10:15:00+09:00', '2015-07-02 22:15:00+09:00',
               '2015-07-03 10:15:00+09:00', '2015-07-03 22:15:00+09:00',
               '2015-07-04 10:15:00+09:00', '2015-07-04 22:15:00+09:00',
               '2015-07-05 10:15:00+09:00', '2015-07-05 22:15:00+09:00',
               '2015-07-06 10:15:00+09:00', '2015-07-06 22:15:00+09:00',
               '2015-07-07 10:15:00+09:00', '2015-07-07 22:15:00+09:00',
               '2015-07-08 10:15:00+09:00', '2015-07-08 22:15:00+09:00',
               '2015-07-09 10:15:00+09:00', '2015-07-09 22:15:00+09:00',
               '2015-07-10 10:15:00+09:00', '2015-07-10 22:15:00+09:00',
               '2015-07-11 10:15:00+09:00', '2015-07-11 22:15:00+09:00'],
              dtype='datetime64[ns, Asia/Seoul]', freq='12H')

In [28]:
# rng6에 저장된 서울 시간을 시드니 시간으로 변환하여 출력
rng6.tz_convert('Australia/Sydney')

DatetimeIndex(['2015-07-02 11:15:00+10:00', '2015-07-02 23:15:00+10:00',
               '2015-07-03 11:15:00+10:00', '2015-07-03 23:15:00+10:00',
               '2015-07-04 11:15:00+10:00', '2015-07-04 23:15:00+10:00',
               '2015-07-05 11:15:00+10:00', '2015-07-05 23:15:00+10:00',
               '2015-07-06 11:15:00+10:00', '2015-07-06 23:15:00+10:00',
               '2015-07-07 11:15:00+10:00', '2015-07-07 23:15:00+10:00',
               '2015-07-08 11:15:00+10:00', '2015-07-08 23:15:00+10:00',
               '2015-07-09 11:15:00+10:00', '2015-07-09 23:15:00+10:00',
               '2015-07-10 11:15:00+10:00', '2015-07-10 23:15:00+10:00',
               '2015-07-11 11:15:00+10:00', '2015-07-11 23:15:00+10:00'],
              dtype='datetime64[ns, Australia/Sydney]', freq='12H')

In [29]:
rng6

DatetimeIndex(['2015-07-02 10:15:00+09:00', '2015-07-02 22:15:00+09:00',
               '2015-07-03 10:15:00+09:00', '2015-07-03 22:15:00+09:00',
               '2015-07-04 10:15:00+09:00', '2015-07-04 22:15:00+09:00',
               '2015-07-05 10:15:00+09:00', '2015-07-05 22:15:00+09:00',
               '2015-07-06 10:15:00+09:00', '2015-07-06 22:15:00+09:00',
               '2015-07-07 10:15:00+09:00', '2015-07-07 22:15:00+09:00',
               '2015-07-08 10:15:00+09:00', '2015-07-08 22:15:00+09:00',
               '2015-07-09 10:15:00+09:00', '2015-07-09 22:15:00+09:00',
               '2015-07-10 10:15:00+09:00', '2015-07-10 22:15:00+09:00',
               '2015-07-11 10:15:00+09:00', '2015-07-11 22:15:00+09:00'],
              dtype='datetime64[ns, Asia/Seoul]', freq='12H')

In [30]:
rng6[0]

Timestamp('2015-07-02 10:15:00+0900', tz='Asia/Seoul', freq='12H')

### 문자열 --> 날짜로

In [31]:
dd = ['07/07/2015', '08/12/2015', '12/04/2015']

In [32]:
dd

['07/07/2015', '08/12/2015', '12/04/2015']

In [33]:
type(dd[0])

str

### 미국식으로 변경

월/일/년도

08/12/2015 --> 2015년 8월 12일

In [34]:
pd.to_datetime(dd)  # MM/DD/YYYY

DatetimeIndex(['2015-07-07', '2015-08-12', '2015-12-04'], dtype='datetime64[ns]', freq=None)

### 유럽식으로 변경

일/월/년도

08/12/2015  --> 2015년 12월 8일

In [35]:
pd.to_datetime(dd, dayfirst=True)  # DD/MM/YYYY

DatetimeIndex(['2015-07-07', '2015-12-08', '2015-04-12'], dtype='datetime64[ns]', freq=None)

In [36]:
dd2 = ['2020-07-25', '2020-08-03']

In [37]:
pd.to_datetime(dd2)

DatetimeIndex(['2020-07-25', '2020-08-03'], dtype='datetime64[ns]', freq=None)

In [38]:
pd.to_datetime(dd2, dayfirst=True)

DatetimeIndex(['2020-07-25', '2020-08-03'], dtype='datetime64[ns]', freq=None)

In [39]:
dd3 = ['20/07/12', '20/08/05']

In [40]:
pd.to_datetime(dd3)

DatetimeIndex(['2012-07-20', '2005-08-20'], dtype='datetime64[ns]', freq=None)

In [41]:
dd4 = ['05/07/12', '03/08/05']

In [42]:
pd.to_datetime(dd4)

DatetimeIndex(['2012-05-07', '2005-03-08'], dtype='datetime64[ns]', freq=None)

### Periods

시간의 기간을 나타냄 (time span)

In [43]:
pr = pd.Period('2012', freq = 'M')

In [44]:
pr

Period('2012-01', 'M')

In [45]:
pr.asfreq('D')

Period('2012-01-31', 'D')

In [46]:
pr.asfreq('D', 'start')

Period('2012-01-01', 'D')

In [47]:
pr = pd.Period('2019', freq='A')  #annual

In [48]:
pr

Period('2019', 'A-DEC')

In [49]:
pr + 1

Period('2020', 'A-DEC')

In [50]:
pr = pd.Period('2018', freq='M')

In [51]:
pr

Period('2018-01', 'M')

In [52]:
pr + 2

Period('2018-03', 'M')

In [53]:
pr - 15

Period('2016-10', 'M')

In [54]:
prg = pd.period_range('2010', '2015', freq='A')

In [55]:
prg

PeriodIndex(['2010', '2011', '2012', '2013', '2014', '2015'], dtype='period[A-DEC]', freq='A-DEC')

In [56]:
prg[0]

Period('2010', 'A-DEC')

In [57]:
data = pd.Series(np.random.rand(len(prg)), index = prg)

In [58]:
data

2010    0.395427
2011    0.806943
2012    0.689588
2013    0.056939
2014    0.630178
2015    0.061685
Freq: A-DEC, dtype: float64

In [59]:
dates = ['2013-02-03', '2013-02-04', '2013-2-4']

In [60]:
d = pd.to_datetime(dates)

In [61]:
d

DatetimeIndex(['2013-02-03', '2013-02-04', '2013-02-04'], dtype='datetime64[ns]', freq=None)

In [62]:
data = pd.Series(np.random.rand(len(d)), index= d)

In [63]:
data

2013-02-03    0.718813
2013-02-04    0.331251
2013-02-04    0.086426
dtype: float64

In [64]:
prd = d.to_period(freq='M')

In [65]:
prd

PeriodIndex(['2013-02', '2013-02', '2013-02'], dtype='period[M]', freq='M')

In [66]:
prd.asfreq('Y')

PeriodIndex(['2013', '2013', '2013'], dtype='period[A-DEC]', freq='A-DEC')

In [67]:
prd.asfreq('D')

PeriodIndex(['2013-02-28', '2013-02-28', '2013-02-28'], dtype='period[D]', freq='D')

In [68]:
prd.to_timestamp()

DatetimeIndex(['2013-02-01', '2013-02-01', '2013-02-01'], dtype='datetime64[ns]', freq=None)

In [69]:
prd.to_timestamp(how='end')

DatetimeIndex(['2013-02-28 23:59:59.999999999',
               '2013-02-28 23:59:59.999999999',
               '2013-02-28 23:59:59.999999999'],
              dtype='datetime64[ns]', freq=None)

In [70]:
#time offset

pd.Timedelta('3 days')

Timedelta('3 days 00:00:00')

In [72]:
pd.Timestamp('9 July 2016 12:00') + pd.Timedelta('105day 3min')

Timestamp('2016-10-22 12:03:00')

In [73]:
rng

DatetimeIndex(['2020-08-03 10:35:00', '2020-08-04 10:35:00',
               '2020-08-05 10:35:00', '2020-08-06 10:35:00',
               '2020-08-07 10:35:00', '2020-08-08 10:35:00',
               '2020-08-09 10:35:00', '2020-08-10 10:35:00',
               '2020-08-11 10:35:00', '2020-08-12 10:35:00'],
              dtype='datetime64[ns]', freq='D')

In [74]:
rng + pd.Timedelta('1day 7h')

DatetimeIndex(['2020-08-04 17:35:00', '2020-08-05 17:35:00',
               '2020-08-06 17:35:00', '2020-08-07 17:35:00',
               '2020-08-08 17:35:00', '2020-08-09 17:35:00',
               '2020-08-10 17:35:00', '2020-08-11 17:35:00',
               '2020-08-12 17:35:00', '2020-08-13 17:35:00'],
              dtype='datetime64[ns]', freq='D')

### Indexing with timestamps

In [75]:
dates = pd.date_range('2015-01-12', '2015-06-14', freq='M')

In [76]:
dates

DatetimeIndex(['2015-01-31', '2015-02-28', '2015-03-31', '2015-04-30',
               '2015-05-31'],
              dtype='datetime64[ns]', freq='M')

In [77]:
atemp = pd.Series([35.3, 28, 29.5, 26, 32.6], index = dates)

In [78]:
atemp

2015-01-31    35.3
2015-02-28    28.0
2015-03-31    29.5
2015-04-30    26.0
2015-05-31    32.6
Freq: M, dtype: float64

In [79]:
idx = atemp.index[3]

In [80]:
idx

Timestamp('2015-04-30 00:00:00', freq='M')

In [81]:
atemp[idx]

26.0

In [82]:
stemp = pd.Series([34.2, 29.2, 28.5, 31.2, 33.2], index = dates)

In [83]:
stemp

2015-01-31    34.2
2015-02-28    29.2
2015-03-31    28.5
2015-04-30    31.2
2015-05-31    33.2
Freq: M, dtype: float64

In [84]:
temps = pd.DataFrame({'대구': atemp, '경산':stemp})

In [85]:
temps

Unnamed: 0,대구,경산
2015-01-31,35.3,34.2
2015-02-28,28.0,29.2
2015-03-31,29.5,28.5
2015-04-30,26.0,31.2
2015-05-31,32.6,33.2


In [86]:
temps['대구']

2015-01-31    35.3
2015-02-28    28.0
2015-03-31    29.5
2015-04-30    26.0
2015-05-31    32.6
Freq: M, Name: 대구, dtype: float64

In [87]:
temps['경산']

2015-01-31    34.2
2015-02-28    29.2
2015-03-31    28.5
2015-04-30    31.2
2015-05-31    33.2
Freq: M, Name: 경산, dtype: float64

In [88]:
temps.대구

2015-01-31    35.3
2015-02-28    28.0
2015-03-31    29.5
2015-04-30    26.0
2015-05-31    32.6
Freq: M, Name: 대구, dtype: float64

In [89]:
temps.경산

2015-01-31    34.2
2015-02-28    29.2
2015-03-31    28.5
2015-04-30    31.2
2015-05-31    33.2
Freq: M, Name: 경산, dtype: float64

In [90]:
temps['온도차'] = temps['대구'] - temps['경산']

In [91]:
temps

Unnamed: 0,대구,경산,온도차
2015-01-31,35.3,34.2,1.1
2015-02-28,28.0,29.2,-1.2
2015-03-31,29.5,28.5,1.0
2015-04-30,26.0,31.2,-5.2
2015-05-31,32.6,33.2,-0.6


In [93]:
temps.loc['2015-01-31']

대구     35.3
경산     34.2
온도차     1.1
Name: 2015-01-31 00:00:00, dtype: float64

In [94]:
temps.loc['01/31/15']

대구     35.3
경산     34.2
온도차     1.1
Name: 2015-01-31 00:00:00, dtype: float64

# 응용 예제

In [97]:
df = pd.read_csv('stocks.csv', index_col= 0)

In [98]:
df.head()

Unnamed: 0,date,AA,GE,IBM,MSFT
0,1990-02-01 00:00:00,4.98,2.87,16.79,0.51
1,1990-02-02 00:00:00,5.04,2.87,16.89,0.51
2,1990-02-05 00:00:00,5.07,2.87,17.32,0.51
3,1990-02-06 00:00:00,5.01,2.88,17.56,0.51
4,1990-02-07 00:00:00,5.04,2.91,17.93,0.51


In [99]:
df.date[0]

'1990-02-01 00:00:00'

In [100]:
type(df.date[0])

str

In [101]:
df = pd.read_csv('stocks.csv', index_col='date')

In [102]:
df.head()

Unnamed: 0_level_0,Unnamed: 0,AA,GE,IBM,MSFT
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1990-02-01 00:00:00,0,4.98,2.87,16.79,0.51
1990-02-02 00:00:00,1,5.04,2.87,16.89,0.51
1990-02-05 00:00:00,2,5.07,2.87,17.32,0.51
1990-02-06 00:00:00,3,5.01,2.88,17.56,0.51
1990-02-07 00:00:00,4,5.04,2.91,17.93,0.51


In [104]:
df.loc['1990-02-01 00:00:00']

Unnamed: 0     0.00
AA             4.98
GE             2.87
IBM           16.79
MSFT           0.51
Name: 1990-02-01 00:00:00, dtype: float64

In [105]:
# 'date' 열을 timepstamp 형태로 파일에서 읽어오자!

df = pd.read_csv('stocks.csv', parse_dates=['date'], index_col='date')

In [106]:
df.head()

Unnamed: 0_level_0,Unnamed: 0,AA,GE,IBM,MSFT
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1990-02-01,0,4.98,2.87,16.79,0.51
1990-02-02,1,5.04,2.87,16.89,0.51
1990-02-05,2,5.07,2.87,17.32,0.51
1990-02-06,3,5.01,2.88,17.56,0.51
1990-02-07,4,5.04,2.91,17.93,0.51


In [108]:
df.loc['1990-02-01 00:00:00']

Unnamed: 0     0.00
AA             4.98
GE             2.87
IBM           16.79
MSFT           0.51
Name: 1990-02-01 00:00:00, dtype: float64

In [109]:
del df['Unnamed: 0']

In [110]:
df.head()

Unnamed: 0_level_0,AA,GE,IBM,MSFT
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1990-02-01,4.98,2.87,16.79,0.51
1990-02-02,5.04,2.87,16.89,0.51
1990-02-05,5.07,2.87,17.32,0.51
1990-02-06,5.01,2.88,17.56,0.51
1990-02-07,5.04,2.91,17.93,0.51


In [111]:
# 'date' 열을 timepstamp 형태로 파일에서 읽어오자!

df = pd.read_csv('stocks.csv', parse_dates=['date'], index_col='date')

In [112]:
df.head()

Unnamed: 0_level_0,Unnamed: 0,AA,GE,IBM,MSFT
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1990-02-01,0,4.98,2.87,16.79,0.51
1990-02-02,1,5.04,2.87,16.89,0.51
1990-02-05,2,5.07,2.87,17.32,0.51
1990-02-06,3,5.01,2.88,17.56,0.51
1990-02-07,4,5.04,2.91,17.93,0.51


In [113]:
df.drop('Unnamed: 0', axis=1).head()

Unnamed: 0_level_0,AA,GE,IBM,MSFT
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1990-02-01,4.98,2.87,16.79,0.51
1990-02-02,5.04,2.87,16.89,0.51
1990-02-05,5.07,2.87,17.32,0.51
1990-02-06,5.01,2.88,17.56,0.51
1990-02-07,5.04,2.91,17.93,0.51


In [114]:
df.head()

Unnamed: 0_level_0,Unnamed: 0,AA,GE,IBM,MSFT
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1990-02-01,0,4.98,2.87,16.79,0.51
1990-02-02,1,5.04,2.87,16.89,0.51
1990-02-05,2,5.07,2.87,17.32,0.51
1990-02-06,3,5.01,2.88,17.56,0.51
1990-02-07,4,5.04,2.91,17.93,0.51


In [115]:
df.drop('Unnamed: 0', axis=1, inplace=True)

In [116]:
df.head()

Unnamed: 0_level_0,AA,GE,IBM,MSFT
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1990-02-01,4.98,2.87,16.79,0.51
1990-02-02,5.04,2.87,16.89,0.51
1990-02-05,5.07,2.87,17.32,0.51
1990-02-06,5.01,2.88,17.56,0.51
1990-02-07,5.04,2.91,17.93,0.51


In [117]:
df.loc['02/07/1990']

AA       5.04
GE       2.91
IBM     17.93
MSFT     0.51
Name: 1990-02-07 00:00:00, dtype: float64

In [118]:
df.loc['1990 Feb. 7']

AA       5.04
GE       2.91
IBM     17.93
MSFT     0.51
Name: 1990-02-07 00:00:00, dtype: float64

In [119]:
df.loc['1990-Feb-01':'1990-Feb-06'] #특정 기간을 선택

Unnamed: 0_level_0,AA,GE,IBM,MSFT
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1990-02-01,4.98,2.87,16.79,0.51
1990-02-02,5.04,2.87,16.89,0.51
1990-02-05,5.07,2.87,17.32,0.51
1990-02-06,5.01,2.88,17.56,0.51


In [120]:
df.loc['1990-02'] #특정 월을 선택

Unnamed: 0_level_0,AA,GE,IBM,MSFT
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1990-02-01,4.98,2.87,16.79,0.51
1990-02-02,5.04,2.87,16.89,0.51
1990-02-05,5.07,2.87,17.32,0.51
1990-02-06,5.01,2.88,17.56,0.51
1990-02-07,5.04,2.91,17.93,0.51
1990-02-08,5.04,2.92,17.86,0.51
1990-02-09,5.06,2.94,17.82,0.52
1990-02-12,4.96,2.89,17.58,0.52
1990-02-13,4.91,2.88,17.95,0.52
1990-02-14,4.94,2.89,18.04,0.52


In [121]:
df['1990'].tail()

Unnamed: 0_level_0,AA,GE,IBM,MSFT
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1990-12-24,4.86,2.76,20.47,0.82
1990-12-26,4.87,2.75,20.41,0.82
1990-12-27,4.8,2.74,20.43,0.82
1990-12-28,4.76,2.75,20.38,0.82
1990-12-31,4.8,2.75,20.32,0.82


## Resampling

downsampling : high freq --> low freq  
upsampling : low freq --> high freq



In [122]:
df.loc[pd.date_range(df.index[0], df.index[-1], freq='M')].head()

KeyError: 'Passing list-likes to .loc or [] with any missing labels is no longer supported, see https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike'

In [123]:
df['1990-03']

Unnamed: 0_level_0,AA,GE,IBM,MSFT
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1990-03-01,5.26,2.88,18.08,0.55
1990-03-02,5.41,2.92,18.23,0.57
1990-03-05,5.39,2.89,18.25,0.57
1990-03-06,5.4,2.92,18.39,0.57
1990-03-07,5.36,2.93,18.34,0.56
1990-03-08,5.34,2.95,18.6,0.58
1990-03-09,5.33,2.93,18.52,0.58
1990-03-12,5.34,2.92,18.73,0.59
1990-03-13,5.29,2.9,18.47,0.58
1990-03-14,5.28,2.95,18.47,0.59
