In [1]:
import numpy as np
import pandas as pd

In [2]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

### 참고. 난수 생성 : np.random 모듈


##### np.random.seed(seed값)

- seed : 난수 알고리즘에서 사용하는 기본 값
    - seed 값이 같으면 동일한 난수 발생
    - 예. np.random.seed(10) 


- 계속 변경되는 난수를 생성하려면 시드값이 매번 변하도록 지정
    - 예. np.random.seed(int(time.time()))
    

#### 난수 생성 함수

- random.rand() : 주어진 형태의 난수 배열 생성
- random.randint(최소값, 최대값, size=n) 
    - [최소값, 최대값)의 범위에서 임의의 정수 생성
    
- random.randn() : 표준정규분포(Standard normal distribution)로부터 샘플링된 난수 생성

- random.standard_normal() : 표준정규분포 난수 발생

- random.normal([loc, scale, size]) : 정규분포 난수 생성

- random.random_sample(size) : [0,1)사이의 난수 생성

- random.choice(a[, size, replace, p]) : 주어진 배열로 부터 표본추출

In [3]:
np.random.seed(20)
np.random.randint(5, size=4)

array([3, 2, 4, 2], dtype=int32)

In [5]:
import time
np. random.seed(int(time.time()))
np.random.randint(5, size=4)

array([3, 0, 2, 3], dtype=int32)

In [6]:
np.random.randint(5, size=4)

array([2, 0, 3, 1], dtype=int32)

In [11]:
fruits = ['사과', '배', '바나나', '멜론', '망고', '딸기', '포도']
[str(np.random.choice(fruits)) for _ in range(5)]

['포도', '딸기', '멜론', '포도', '딸기']

### 참고. 파이썬의 random 모듈

https://docs.python.org/ko/3/library/random.html

#### 정수 난수 발생 함수

##### random.randrange(start, stop[, step])
- range(start, stop, step)에서 임의로 선택된 요소를 반환
- choice(range(start, stop, step))와 동등하지만 실제로 range 객체를 만들지는 않음

##### random.randint(a, b)
- `a <= N <= b` 를 만족하는 임의의 정수 N을 반환
- randrange(a, b+1)의 별칭

#### 시퀀스 난수 발생 함수
##### random.choice(seq)
- 비어 있지 않은 시퀀스 seq에서 임의의 요소를 반환
- seq가 비어 있으면, IndexError를 발생


##### random.choices(population, weights=None, *, cum_weights=None, k=1)
- population에서 중복을 허락하면서(with replacement) 선택한 k 크기의 요소 리스트를 반환
- population이 비어 있으면 IndexError 발생
- weights 시퀀스가 지정되면 상대 가중치에 따라 선택됨
- weights나 cum_weights를 지정하지 않으면 같은 확률로 선택
- weights 시퀀스가 제공되면, population 시퀀스와 길이가 같아야 함
- weights와 cum_weights를 모두 지정하는 것은 TypeError

##### random.sample(population, k, *, counts=None)
- population 시퀀스로부터 추출한 k개 길이의 새 리스트를 반환
- random sampling without replacement

#### 실수 난수 발생 함수
##### random.random()
- `0.0 <= X < 1.0` 사이의 실수 반환

##### random.uniform(a, b)
- `a <= b` 일 때 `a <= N <= b`, `b < a` 일 때 `b <= N <= a`를 만족하는 임의의 부동 소수점 숫자 N을 반환
- 종단 값 b는 방정식 a + (b-a) * random()의 부동 소수점 자리 올림에 따라 범위에 포함되거나 포함되지 않을 수 있음

----

# pandas 다중 인덱스(multi index)

- 행이나 열 인덱스가 계층으로 구성된 인덱스(Hierarchical indexing)

In [19]:
df = pd.DataFrame([[1,2,3,4],[2,3,4,6],[1,2,3,4],[2,3,4,6]],
                 index=[['a','a','b','b'],[1,2,1,3]])
df
df.index

Unnamed: 0,Unnamed: 1,0,1,2,3
a,1,1,2,3,4
a,2,2,3,4,6
b,1,1,2,3,4
b,3,2,3,4,6


MultiIndex([('a', 1),
            ('a', 2),
            ('b', 1),
            ('b', 3)],
           )

[학습 내용]

1. 다중 인덱스를 갖는 Series
2. 다중 인덱스를 갖는 DataFrame
3. MultiIndex 객체
4. 다중인덱스의 특정 레벨 제거 : droplevel()
5. 행인덱스 레벨 해제 : unstack()
6. 열인덱스 레벨 해제 : stack()
7. 다중인덱스의 레벨 교환 : swaplevel()
8. 다중인덱스의 행/열 추가
9. 다중인덱스 정렬

### 1. 다중인덱스를 갖는 Series

#### 예1. 난수 데이터를 갖는 Series

In [20]:
arrays = [np.array(["bar", "bar", "baz", "baz",
                    "foo", "foo", "qux", "qux"]),
          np.array(["one", "two", "one", "two",
                    "one", "two", "one", "two"])]
arrays

[array(['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'],
       dtype='<U3'),
 array(['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two'],
       dtype='<U3')]

In [21]:
pd.Series(np.random.randn(8), index=arrays)

bar  one   -0.215775
     two   -0.437889
baz  one   -0.668968
     two    1.031410
foo  one   -0.369272
     two    1.330956
qux  one    0.723965
     two    0.052571
dtype: float64

#### 예2. 키를 튜플로 갖는 딕셔너리의 데이터로 Series 생성

In [22]:
# 키를 튜플로 갖는 딕셔너리 데이터
data = {('James', 'Eng'): 100,
        ('James', 'Math') : 90,
        ('Ted', 'Eng') : 90,
        ('Ted', 'Math') : 70,
        ('Adam', 'Eng') : 85,
        ('Adam', 'Math') : 90 }
data

{('James', 'Eng'): 100,
 ('James', 'Math'): 90,
 ('Ted', 'Eng'): 90,
 ('Ted', 'Math'): 70,
 ('Adam', 'Eng'): 85,
 ('Adam', 'Math'): 90}

In [24]:
s2 = pd.Series(data)
s2

James  Eng     100
       Math     90
Ted    Eng      90
       Math     70
Adam   Eng      85
       Math     90
dtype: int64

In [25]:
s2.index

MultiIndex([('James',  'Eng'),
            ('James', 'Math'),
            (  'Ted',  'Eng'),
            (  'Ted', 'Math'),
            ( 'Adam',  'Eng'),
            ( 'Adam', 'Math')],
           )

#### 인덱스의 이름 지정 : 시리즈.index.names = [ , ]

In [27]:
s2.index.names = ['name','course']
s2

name   course
James  Eng       100
       Math       90
Ted    Eng        90
       Math       70
Adam   Eng        85
       Math       90
dtype: int64

#### 다중인덱스를 갖는 Series의 인덱싱

- 시리즈[상위인덱스]
- 시리즈.상위인덱스
- 시리즈[(상위인덱스, 하위인덱스)]
- 시리즈[상위인덱스, 하위인덱스]
- 시리즈.상위인데스.하위인덱스
- 시리즈[:, 하위인덱스]

In [30]:
s2['James']
s2.James

course
Eng     100
Math     90
dtype: int64

course
Eng     100
Math     90
dtype: int64

In [31]:
s2[('James','Eng')]

np.int64(100)

In [32]:
s2['James',]

  s2['James',]


course
Eng     100
Math     90
dtype: int64

In [33]:
s2[: , 'Eng']

name
James    100
Ted       90
Adam      85
dtype: int64

In [34]:
s2[['James', 'Ted']]

name   course
James  Eng       100
       Math       90
Ted    Eng        90
       Math       70
dtype: int64

In [35]:
s2['James'].Eng

np.int64(100)

In [37]:
s2.James['Math']

np.int64(90)

In [38]:
s2.James.Eng

np.int64(100)

In [40]:
s2['James','Eng']

np.int64(100)

### 2. 다중 인덱스를 갖는 DataFrame

- 데이터 프레임 생성 시 생성자에서 columns인수나 index 인수를  2차원 리스트(행렬) 형태로 지정할 경우

#### 1) column인덱스를 다중 인덱스로 갖는 DataFrame

In [41]:
np.random.seed(0)
data = np.round(np.random.randn(5,4), 2)
data

array([[ 1.76,  0.4 ,  0.98,  2.24],
       [ 1.87, -0.98,  0.95, -0.15],
       [-0.1 ,  0.41,  0.14,  1.45],
       [ 0.76,  0.12,  0.44,  0.33],
       [ 1.49, -0.21,  0.31, -0.85]])

In [43]:
df = pd.DataFrame(data, columns=[['A', 'A', 'B', 'B'],'C1 C2 C3 C4'.split()])
df

Unnamed: 0_level_0,A,A,B,B
Unnamed: 0_level_1,C1,C2,C3,C4
0,1.76,0.4,0.98,2.24
1,1.87,-0.98,0.95,-0.15
2,-0.1,0.41,0.14,1.45
3,0.76,0.12,0.44,0.33
4,1.49,-0.21,0.31,-0.85


In [44]:
df.columns

MultiIndex([('A', 'C1'),
            ('A', 'C2'),
            ('B', 'C3'),
            ('B', 'C4')],
           )

#### 열 인덱싱(1): df[상위인덱스]
- 상위 인덱스의 모든 열에 대한 데이터프레임 반환

In [45]:
df['A']

Unnamed: 0,C1,C2
0,1.76,0.4
1,1.87,-0.98
2,-0.1,0.41
3,0.76,0.12
4,1.49,-0.21


#### 열 인덱싱(2): df[(상위인덱스, 하위인덱스)]
- 인덱스가 하나가 아니므로 묶어서(튜플로) 전달해야 함
- 시리즈로 반환

In [52]:
df['A','C2']
df[('A','C2')]

0    0.40
1   -0.98
2    0.41
3    0.12
4   -0.21
Name: (A, C2), dtype: float64

0    0.40
1   -0.98
2    0.41
3    0.12
4   -0.21
Name: (A, C2), dtype: float64

#### 열인덱싱(3) :  . 연산자로 확장

- df.상위인덱스
- df.상위인덱스.하위인덱스

In [49]:
df.A

Unnamed: 0,C1,C2
0,1.76,0.4
1,1.87,-0.98
2,-0.1,0.41
3,0.76,0.12
4,1.49,-0.21


In [51]:
df.A.C1

0    1.76
1    1.87
2   -0.10
3    0.76
4    1.49
Name: C1, dtype: float64

#### 열인덱스 이름 지정 : df.columns.names = []

In [62]:
df.columns.names = ['upper', 'lower']
df

upper,A,A,B,B
lower,C1,C2,C3,C4
aaaa,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
0,1.76,0.4,0.98,2.24
1,1.87,-0.98,0.95,-0.15
2,-0.1,0.41,0.14,1.45
3,0.76,0.12,0.44,0.33
4,1.49,-0.21,0.31,-0.85


#### 2) 행인덱스를 다중 인덱스로 갖는 DataFrame

In [63]:
data = np.random.randint(1,10,size=(4,4))
data

array([[1, 5, 8, 4],
       [3, 8, 3, 1],
       [1, 5, 6, 6],
       [7, 9, 5, 2]], dtype=int32)

In [66]:
df2 = pd.DataFrame(data, index=['a a b b'.split(),'1 2 1 2'.split()],
            columns='A B C D'.split())
df2

Unnamed: 0,Unnamed: 1,A,B,C,D
a,1,1,5,8,4
a,2,3,8,3,1
b,1,1,5,6,6
b,2,7,9,5,2


#### 행인덱싱(1) : df.loc[상위인덱스]

In [69]:
df2.loc['a']

Unnamed: 0,A,B,C,D
1,1,5,8,4
2,3,8,3,1


In [70]:
df2.loc['a':'b']

Unnamed: 0,Unnamed: 1,A,B,C,D
a,1,1,5,8,4
a,2,3,8,3,1
b,1,1,5,6,6
b,2,7,9,5,2


In [72]:
df2.loc['a':'c']

Unnamed: 0,Unnamed: 1,A,B,C,D
a,1,1,5,8,4
a,2,3,8,3,1
b,1,1,5,6,6
b,2,7,9,5,2


#### 행인덱싱(2) : df.loc[(상위인덱스, 하위인덱스)]
- 상위인덱스와 하위인덱스를 튜플로 전달

In [79]:
df2.loc['a','1']
df2.loc[('a','1')]

A    1
B    5
C    8
D    4
Name: (a, 1), dtype: int32

A    1
B    5
C    8
D    4
Name: (a, 1), dtype: int32

#### 인덱서 iloc : df.iloc[  ]
- iloc인덱서는 행이름, 열이름 기반이 아님

In [81]:
df2.iloc[0]

A    1
B    5
C    8
D    4
Name: (a, 1), dtype: int32

In [82]:
df2.iloc[1]

A    3
B    8
C    3
D    1
Name: (a, 2), dtype: int32

In [83]:
df2.iloc[1,1]

np.int32(8)

In [85]:
df2.iloc[1,1:]

B    8
C    3
D    1
Name: (a, 2), dtype: int32

#### 3)  행과 열에 모두 다중인덱스를 갖는 DataFrame

In [88]:
data3 = np.round(np.random.randn(6,4), 2)
data3
col1 = 'A A B B'.split()
# col1 = ['A']*2 + ['B']*2
col2 = 'C1 C2 C1 C2'.split()
# col2 = ['C'+str(i) for i in range(1,3)]*2
idx1 = 'M M M F F F'.split()
idx2 = 'id1 id2 id3 id1 id2 id3'.split()
columns = [col1, col2]
index = [idx1, idx2]
columns
index

array([[-0.31,  0.06, -1.17,  0.9 ],
       [ 0.47, -1.54,  1.49,  1.9 ],
       [ 1.18, -0.18, -1.07,  1.05],
       [-0.4 ,  1.22,  0.21,  0.98],
       [ 0.36,  0.71,  0.01,  1.79],
       [ 0.13,  0.4 ,  1.88, -1.35]])

[['A', 'A', 'B', 'B'], ['C1', 'C2', 'C1', 'C2']]

[['M', 'M', 'M', 'F', 'F', 'F'], ['id1', 'id2', 'id3', 'id1', 'id2', 'id3']]

In [89]:
df3 = pd.DataFrame(data3, index=index, columns=columns)
df3

Unnamed: 0_level_0,Unnamed: 1_level_0,A,A,B,B
Unnamed: 0_level_1,Unnamed: 1_level_1,C1,C2,C1,C2
M,id1,-0.31,0.06,-1.17,0.9
M,id2,0.47,-1.54,1.49,1.9
M,id3,1.18,-0.18,-1.07,1.05
F,id1,-0.4,1.22,0.21,0.98
F,id2,0.36,0.71,0.01,1.79
F,id3,0.13,0.4,1.88,-1.35


#### 행/열 각 인덱스에 이름(names) 설정

- 이름을 지정하면 직관성이 높아지고 편리하게 사용할 수 있음
- 열이름/행이름 구분하는데 용이
- 문법
    - df.columns.names = 값 또는 리스트
    - df.index.names = 값 또는 리스트

In [92]:
df3.columns.names = ['Cidx1', 'Cidx2']
df3.index.names = ['Ridx1', 'Ridx2']
df3

Unnamed: 0_level_0,Cidx1,A,A,B,B
Unnamed: 0_level_1,Cidx2,C1,C2,C1,C2
Ridx1,Ridx2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
M,id1,-0.31,0.06,-1.17,0.9
M,id2,0.47,-1.54,1.49,1.9
M,id3,1.18,-0.18,-1.07,1.05
F,id1,-0.4,1.22,0.21,0.98
F,id2,0.36,0.71,0.01,1.79
F,id3,0.13,0.4,1.88,-1.35


In [93]:
df3.A.C1

Ridx1  Ridx2
M      id1     -0.31
       id2      0.47
       id3      1.18
F      id1     -0.40
       id2      0.36
       id3      0.13
Name: C1, dtype: float64

In [96]:
df3.loc['M','id2']

Cidx1  Cidx2
A      C1       0.47
       C2      -1.54
B      C1       1.49
       C2       1.90
Name: (M, id2), dtype: float64

### 3. MultiIndex 객체
- https://pandas.pydata.org/docs/user_guide/advanced.html

- 생성 방법
1. MultiIndex.from_arrays() 사용 : 배열(array)의 리스트
2. MultiIndex.from_tuples() : 튜플들(tuples)의 리스트
3. MultiIndex.from_product() : 리스트의 cross product
4. MultiIndex.from_frame() : DataFrame

In [99]:
# 1. MultiIndex.from_arrays() 이용한 다중인덱스 생성
arrays = np.array([['one', 'two']*2, 'bar baz foo qux'.split()])
arrays

idx = pd.MultiIndex.from_arrays(arrays, names=['first', 'second'])
idx

array([['one', 'two', 'one', 'two'],
       ['bar', 'baz', 'foo', 'qux']], dtype='<U3')

MultiIndex([('one', 'bar'),
            ('two', 'baz'),
            ('one', 'foo'),
            ('two', 'qux')],
           names=['first', 'second'])

In [104]:
# 2. MultiIndex.from_tuples() 이용한 다중인덱스 생성
arrays = [['bar','bar','baz','baz','foo','foo','qtx','qtx'],
         ['one','two']*4]
arrays
tuples = list(zip(*arrays))
idx = pd.MultiIndex.from_tuples(tuples, names=['first','second'])
idx

[['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qtx', 'qtx'],
 ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']]

MultiIndex([('bar', 'one'),
            ('bar', 'two'),
            ('baz', 'one'),
            ('baz', 'two'),
            ('foo', 'one'),
            ('foo', 'two'),
            ('qtx', 'one'),
            ('qtx', 'two')],
           names=['first', 'second'])

In [106]:
# 3. MultiIndex.from_product() 사용한 다중인덱스 생성
data = [['bar','baz','foo','qux'],['one','two']]
idx = pd.MultiIndex.from_product(data, names=['first','second'])
idx

MultiIndex([('bar', 'one'),
            ('bar', 'two'),
            ('baz', 'one'),
            ('baz', 'two'),
            ('foo', 'one'),
            ('foo', 'two'),
            ('qux', 'one'),
            ('qux', 'two')],
           names=['first', 'second'])

In [108]:
# 4. MultiIndex.from_frame()를 사용한 다중인덱스 생성
idx = pd.DataFrame([['bar','one'],['bar','two'],['foo','one'],['foo','two']],
            columns=['first','second'])
idx
pd.MultiIndex.from_frame(idx)

Unnamed: 0,first,second
0,bar,one
1,bar,two
2,foo,one
3,foo,two


MultiIndex([('bar', 'one'),
            ('bar', 'two'),
            ('foo', 'one'),
            ('foo', 'two')],
           names=['first', 'second'])

#### 예제. MultiIndex객체 생성하여 다중인덱스 설정

In [109]:
data4 = np.round(np.random.randn(4,9),1)
data4

array([[-1.3,  1. , -1.2,  1.9, -0.4, -0.7,  1.9,  1.5,  1.9],
       [ 0.9, -0.9,  1.9, -0.3,  0.8,  0.9, -0.2,  0.6,  0.9],
       [ 0.4, -1.1,  0.3,  1.3, -0.7, -0.1, -0.4,  1.8,  0.7],
       [ 0.4, -0.8,  0.5, -0.7,  0. , -0.6,  0.7,  0.6, -0.2]])

In [110]:
index = pd.MultiIndex.from_product([[1995,2000],['May','Dec']],
                                  names=['Year','Month'])
columns = pd.MultiIndex.from_product([['A','B','C'],[1,2,3]],
                                  names=['name','count'])
df4 = pd.DataFrame(data4, index=index, columns=columns)
df4

Unnamed: 0_level_0,name,A,A,A,B,B,B,C,C,C
Unnamed: 0_level_1,count,1,2,3,1,2,3,1,2,3
Year,Month,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
1995,May,-1.3,1.0,-1.2,1.9,-0.4,-0.7,1.9,1.5,1.9
1995,Dec,0.9,-0.9,1.9,-0.3,0.8,0.9,-0.2,0.6,0.9
2000,May,0.4,-1.1,0.3,1.3,-0.7,-0.1,-0.4,1.8,0.7
2000,Dec,0.4,-0.8,0.5,-0.7,0.0,-0.6,0.7,0.6,-0.2


### 4. 다중인덱스의 특정 레벨 제거 : droplevel(level, axis)

#### 1) 시리즈의 다중인덱스 레벨 제거

In [112]:
s2

name   course
James  Eng       100
       Math       90
Ted    Eng        90
       Math       70
Adam   Eng        85
       Math       90
dtype: int64

In [113]:
s2.droplevel(0)

course
Eng     100
Math     90
Eng      90
Math     70
Eng      85
Math     90
dtype: int64

In [117]:
s2.droplevel(1)
s2.droplevel(level=1)

name
James    100
James     90
Ted       90
Ted       70
Adam      85
Adam      90
dtype: int64

name
James    100
James     90
Ted       90
Ted       70
Adam      85
Adam      90
dtype: int64

#### 2) 데이터프레임에서 다중인덱스 레벨 제거

In [116]:
df4

Unnamed: 0_level_0,name,A,A,A,B,B,B,C,C,C
Unnamed: 0_level_1,count,1,2,3,1,2,3,1,2,3
Year,Month,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
1995,May,-1.3,1.0,-1.2,1.9,-0.4,-0.7,1.9,1.5,1.9
1995,Dec,0.9,-0.9,1.9,-0.3,0.8,0.9,-0.2,0.6,0.9
2000,May,0.4,-1.1,0.3,1.3,-0.7,-0.1,-0.4,1.8,0.7
2000,Dec,0.4,-0.8,0.5,-0.7,0.0,-0.6,0.7,0.6,-0.2


#### 행인덱스 레벨 제거

In [120]:
df4.droplevel(level=0)
df4.droplevel(level=0, axis = 0)
df4.droplevel(level=0, axis = 'index')

name,A,A,A,B,B,B,C,C,C
count,1,2,3,1,2,3,1,2,3
Month,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
May,-1.3,1.0,-1.2,1.9,-0.4,-0.7,1.9,1.5,1.9
Dec,0.9,-0.9,1.9,-0.3,0.8,0.9,-0.2,0.6,0.9
May,0.4,-1.1,0.3,1.3,-0.7,-0.1,-0.4,1.8,0.7
Dec,0.4,-0.8,0.5,-0.7,0.0,-0.6,0.7,0.6,-0.2


name,A,A,A,B,B,B,C,C,C
count,1,2,3,1,2,3,1,2,3
Month,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
May,-1.3,1.0,-1.2,1.9,-0.4,-0.7,1.9,1.5,1.9
Dec,0.9,-0.9,1.9,-0.3,0.8,0.9,-0.2,0.6,0.9
May,0.4,-1.1,0.3,1.3,-0.7,-0.1,-0.4,1.8,0.7
Dec,0.4,-0.8,0.5,-0.7,0.0,-0.6,0.7,0.6,-0.2


name,A,A,A,B,B,B,C,C,C
count,1,2,3,1,2,3,1,2,3
Month,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
May,-1.3,1.0,-1.2,1.9,-0.4,-0.7,1.9,1.5,1.9
Dec,0.9,-0.9,1.9,-0.3,0.8,0.9,-0.2,0.6,0.9
May,0.4,-1.1,0.3,1.3,-0.7,-0.1,-0.4,1.8,0.7
Dec,0.4,-0.8,0.5,-0.7,0.0,-0.6,0.7,0.6,-0.2


In [126]:
df4.droplevel(1)
df4.droplevel(level=1, axis=0)

name,A,A,A,B,B,B,C,C,C
count,1,2,3,1,2,3,1,2,3
Year,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
1995,-1.3,1.0,-1.2,1.9,-0.4,-0.7,1.9,1.5,1.9
1995,0.9,-0.9,1.9,-0.3,0.8,0.9,-0.2,0.6,0.9
2000,0.4,-1.1,0.3,1.3,-0.7,-0.1,-0.4,1.8,0.7
2000,0.4,-0.8,0.5,-0.7,0.0,-0.6,0.7,0.6,-0.2


name,A,A,A,B,B,B,C,C,C
count,1,2,3,1,2,3,1,2,3
Year,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
1995,-1.3,1.0,-1.2,1.9,-0.4,-0.7,1.9,1.5,1.9
1995,0.9,-0.9,1.9,-0.3,0.8,0.9,-0.2,0.6,0.9
2000,0.4,-1.1,0.3,1.3,-0.7,-0.1,-0.4,1.8,0.7
2000,0.4,-0.8,0.5,-0.7,0.0,-0.6,0.7,0.6,-0.2


#### 열인덱스 레벨 제거

In [123]:
df4.droplevel(level=0, axis=1)

Unnamed: 0_level_0,count,1,2,3,1,2,3,1,2,3
Year,Month,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1995,May,-1.3,1.0,-1.2,1.9,-0.4,-0.7,1.9,1.5,1.9
1995,Dec,0.9,-0.9,1.9,-0.3,0.8,0.9,-0.2,0.6,0.9
2000,May,0.4,-1.1,0.3,1.3,-0.7,-0.1,-0.4,1.8,0.7
2000,Dec,0.4,-0.8,0.5,-0.7,0.0,-0.6,0.7,0.6,-0.2


In [125]:
df4.droplevel(level=1, axis=1)
df4.droplevel(level=1, axis='columns')

Unnamed: 0_level_0,name,A,A,A,B,B,B,C,C,C
Year,Month,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1995,May,-1.3,1.0,-1.2,1.9,-0.4,-0.7,1.9,1.5,1.9
1995,Dec,0.9,-0.9,1.9,-0.3,0.8,0.9,-0.2,0.6,0.9
2000,May,0.4,-1.1,0.3,1.3,-0.7,-0.1,-0.4,1.8,0.7
2000,Dec,0.4,-0.8,0.5,-0.7,0.0,-0.6,0.7,0.6,-0.2


Unnamed: 0_level_0,name,A,A,A,B,B,B,C,C,C
Year,Month,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1995,May,-1.3,1.0,-1.2,1.9,-0.4,-0.7,1.9,1.5,1.9
1995,Dec,0.9,-0.9,1.9,-0.3,0.8,0.9,-0.2,0.6,0.9
2000,May,0.4,-1.1,0.3,1.3,-0.7,-0.1,-0.4,1.8,0.7
2000,Dec,0.4,-0.8,0.5,-0.7,0.0,-0.6,0.7,0.6,-0.2


### 5. 행인덱스 레벨 해제 : unstack()

: 행인덱스 -> 열인덱스로 변환

#### 1) 시리즈의 다중인덱스 레벨 해제

- 해제된 레벨 인덱스는 열인덱스로 변경되며, 데이터프레임으로 반환됨

https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Series.unstack.html

In [127]:
s2

name   course
James  Eng       100
       Math       90
Ted    Eng        90
       Math       70
Adam   Eng        85
       Math       90
dtype: int64

#### 마지막 레벨 해제 : unstack(level=-1)

In [129]:
s2.unstack()
s2.unstack(level=-1)
s2.unstack(level=1)

course,Eng,Math
name,Unnamed: 1_level_1,Unnamed: 2_level_1
Adam,85,90
James,100,90
Ted,90,70


course,Eng,Math
name,Unnamed: 1_level_1,Unnamed: 2_level_1
Adam,85,90
James,100,90
Ted,90,70


course,Eng,Math
name,Unnamed: 1_level_1,Unnamed: 2_level_1
Adam,85,90
James,100,90
Ted,90,70


#### 첫번째 레벨 해제 : unstack(level=0)

In [130]:
s2.unstack(level=0)

name,Adam,James,Ted
course,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Eng,85,100,90
Math,90,90,70


#### 2) 데이터프레임의 다중 행인덱스 레벨 해제

- 데이터프레임의 인덱스 레벨 해제
- 해제된 레벨은 열인덱스 중 가장 마지막 레벨이 됨

[형식] DataFrame.unstack(level=-1, fill_value=None)


- level : int, str, or list of these, default= -1 (last level)
    - Level(s) of index to unstack, can pass level name.

- fill_value : int, str or dict
    - Replace NaN with this value if the unstack produces missing values.


- https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.unstack.html

In [131]:
df4

Unnamed: 0_level_0,name,A,A,A,B,B,B,C,C,C
Unnamed: 0_level_1,count,1,2,3,1,2,3,1,2,3
Year,Month,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
1995,May,-1.3,1.0,-1.2,1.9,-0.4,-0.7,1.9,1.5,1.9
1995,Dec,0.9,-0.9,1.9,-0.3,0.8,0.9,-0.2,0.6,0.9
2000,May,0.4,-1.1,0.3,1.3,-0.7,-0.1,-0.4,1.8,0.7
2000,Dec,0.4,-0.8,0.5,-0.7,0.0,-0.6,0.7,0.6,-0.2


In [134]:
df4.unstack()
df4.unstack(level=-1)

name,A,A,A,A,A,A,B,B,B,B,B,B,C,C,C,C,C,C
count,1,1,2,2,3,3,1,1,2,2,3,3,1,1,2,2,3,3
Month,Dec,May,Dec,May,Dec,May,Dec,May,Dec,May,Dec,May,Dec,May,Dec,May,Dec,May
Year,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3
1995,0.9,-1.3,-0.9,1.0,1.9,-1.2,-0.3,1.9,0.8,-0.4,0.9,-0.7,-0.2,1.9,0.6,1.5,0.9,1.9
2000,0.4,0.4,-0.8,-1.1,0.5,0.3,-0.7,1.3,0.0,-0.7,-0.6,-0.1,0.7,-0.4,0.6,1.8,-0.2,0.7


name,A,A,A,A,A,A,B,B,B,B,B,B,C,C,C,C,C,C
count,1,1,2,2,3,3,1,1,2,2,3,3,1,1,2,2,3,3
Month,Dec,May,Dec,May,Dec,May,Dec,May,Dec,May,Dec,May,Dec,May,Dec,May,Dec,May
Year,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3
1995,0.9,-1.3,-0.9,1.0,1.9,-1.2,-0.3,1.9,0.8,-0.4,0.9,-0.7,-0.2,1.9,0.6,1.5,0.9,1.9
2000,0.4,0.4,-0.8,-1.1,0.5,0.3,-0.7,1.3,0.0,-0.7,-0.6,-0.1,0.7,-0.4,0.6,1.8,-0.2,0.7


In [135]:
df4.unstack(0)

name,A,A,A,A,A,A,B,B,B,B,B,B,C,C,C,C,C,C
count,1,1,2,2,3,3,1,1,2,2,3,3,1,1,2,2,3,3
Year,1995,2000,1995,2000,1995,2000,1995,2000,1995,2000,1995,2000,1995,2000,1995,2000,1995,2000
Month,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3
Dec,0.9,0.4,-0.9,-0.8,1.9,0.5,-0.3,-0.7,0.8,0.0,0.9,-0.6,-0.2,0.7,0.6,0.6,0.9,-0.2
May,-1.3,0.4,1.0,-1.1,-1.2,0.3,1.9,1.3,-0.4,-0.7,-0.7,-0.1,1.9,-0.4,1.5,1.8,1.9,0.7


In [136]:
df4.unstack(level=0)

name,A,A,A,A,A,A,B,B,B,B,B,B,C,C,C,C,C,C
count,1,1,2,2,3,3,1,1,2,2,3,3,1,1,2,2,3,3
Year,1995,2000,1995,2000,1995,2000,1995,2000,1995,2000,1995,2000,1995,2000,1995,2000,1995,2000
Month,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3
Dec,0.9,0.4,-0.9,-0.8,1.9,0.5,-0.3,-0.7,0.8,0.0,0.9,-0.6,-0.2,0.7,0.6,0.6,0.9,-0.2
May,-1.3,0.4,1.0,-1.1,-1.2,0.3,1.9,1.3,-0.4,-0.7,-0.7,-0.1,1.9,-0.4,1.5,1.8,1.9,0.7


In [137]:
df4.unstack(level='Month')

name,A,A,A,A,A,A,B,B,B,B,B,B,C,C,C,C,C,C
count,1,1,2,2,3,3,1,1,2,2,3,3,1,1,2,2,3,3
Month,Dec,May,Dec,May,Dec,May,Dec,May,Dec,May,Dec,May,Dec,May,Dec,May,Dec,May
Year,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3
1995,0.9,-1.3,-0.9,1.0,1.9,-1.2,-0.3,1.9,0.8,-0.4,0.9,-0.7,-0.2,1.9,0.6,1.5,0.9,1.9
2000,0.4,0.4,-0.8,-1.1,0.5,0.3,-0.7,1.3,0.0,-0.7,-0.6,-0.1,0.7,-0.4,0.6,1.8,-0.2,0.7


In [138]:
df4.unstack(level='Year')

name,A,A,A,A,A,A,B,B,B,B,B,B,C,C,C,C,C,C
count,1,1,2,2,3,3,1,1,2,2,3,3,1,1,2,2,3,3
Year,1995,2000,1995,2000,1995,2000,1995,2000,1995,2000,1995,2000,1995,2000,1995,2000,1995,2000
Month,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3
Dec,0.9,0.4,-0.9,-0.8,1.9,0.5,-0.3,-0.7,0.8,0.0,0.9,-0.6,-0.2,0.7,0.6,0.6,0.9,-0.2
May,-1.3,0.4,1.0,-1.1,-1.2,0.3,1.9,1.3,-0.4,-0.7,-0.7,-0.1,1.9,-0.4,1.5,1.8,1.9,0.7


### 6. 열인덱스 레벨 해제 : stack()

: 열인덱스 -> 행인덱스로 변환

#### 데이터프레임에서 열인덱스 레벨 해제

- 지정한 열인덱스가 행인덱스의 마지막 레벨로 변환 추가됨
- single level 열인덱스를 갖는 경우 시리즈로 반환
- multi level 열인덱스를 갖는 경우 데이터프레임 반환

[형식] DataFrame.stack(level=- 1, dropna=True)

- level위치 또는 열이름 지정
- level : int, str, list, default= -1
- dropna : bool, default True

- https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.stack.html

#### 예1. 1차 레벨을 가진 데이터프레임

In [142]:
df = pd.DataFrame([[10,11],[12,13]],
                 index=['cat','dog'],
                 columns=['weight','height'])
df

Unnamed: 0,weight,height
cat,10,11
dog,12,13


In [143]:
df.unstack()

weight  cat    10
        dog    12
height  cat    11
        dog    13
dtype: int64

In [144]:
df.stack()

cat  weight    10
     height    11
dog  weight    12
     height    13
dtype: int64

#### 예2. 다중레벨을 갖는 데이터프레임

In [145]:
multicol = pd.MultiIndex.from_tuples([('weight','kg'),('weight','pound')])
df2 = pd.DataFrame([[10,11],[12,13]],
                 index=['cat','dog'],
                 columns=multicol)
df2

Unnamed: 0_level_0,weight,weight
Unnamed: 0_level_1,kg,pound
cat,10,11
dog,12,13


In [157]:
df2.stack(future_stack=True)

Unnamed: 0,Unnamed: 1,weight
cat,kg,10
cat,pound,11
dog,kg,12
dog,pound,13


In [155]:
df2.stack(level=0, future_stack=True)

Unnamed: 0,Unnamed: 1,kg,pound
cat,weight,10,11
dog,weight,12,13


In [153]:
df2.stack(level=[0,1], future_stack=True)

cat  weight  kg       10
             pound    11
dog  weight  kg       12
             pound    13
dtype: int64

#### 예3. 열인덱스 이름을 갖는 데이터프레임

In [158]:
df2.columns.names = ['C1', 'C2']
df2

C1,weight,weight
C2,kg,pound
cat,10,11
dog,12,13


In [159]:
df2.stack('C1',future_stack=True)

Unnamed: 0_level_0,C2,kg,pound
Unnamed: 0_level_1,C1,Unnamed: 2_level_1,Unnamed: 3_level_1
cat,weight,10,11
dog,weight,12,13


In [160]:
df2.stack(['C1','C2'], future_stack=True)

     C1      C2   
cat  weight  kg       10
             pound    11
dog  weight  kg       12
             pound    13
dtype: int64

### 7. 다중인덱스의 레벨 교환 : swaplevel()

[형식] DataFrame.swaplevel(i=- 2, j=- 1, axis=0)

- i, j : int or str
    - Levels of the indices to be swapped. Can pass level name as string.

- axis : 0 or ‘index’, 1 or ‘columns', default=0
    - The axis to swap levels on
    - 0 or‘index’ for row-wise
    - 1 or ‘columns’ for column-wise

- https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.swaplevel.html

In [161]:
df = pd.DataFrame({"Grade": ["A", "B", "A", "C"]},
                  index=[
                      ["Final exam", "Final exam", "Coursework", "Coursework"],
                      ["History", "Geography", "History", "Geography"],
                      ["January", "February", "March", "April"],],)
df

Unnamed: 0,Unnamed: 1,Unnamed: 2,Grade
Final exam,History,January,A
Final exam,Geography,February,B
Coursework,History,March,A
Coursework,Geography,April,C


In [162]:
df.swaplevel()

Unnamed: 0,Unnamed: 1,Unnamed: 2,Grade
Final exam,January,History,A
Final exam,February,Geography,B
Coursework,March,History,A
Coursework,April,Geography,C


In [165]:
df.swaplevel(0,1)

Unnamed: 0,Unnamed: 1,Unnamed: 2,Grade
History,Final exam,January,A
Geography,Final exam,February,B
History,Coursework,March,A
Geography,Coursework,April,C


In [166]:
df.swaplevel(0,2)

Unnamed: 0,Unnamed: 1,Unnamed: 2,Grade
January,History,Final exam,A
February,Geography,Final exam,B
March,History,Coursework,A
April,Geography,Coursework,C


In [163]:
df2

C1,weight,weight
C2,kg,pound
cat,10,11
dog,12,13


In [164]:
df2.swaplevel(axis=1)

C2,kg,pound
C1,weight,weight
cat,10,11
dog,12,13


### [정리] 다중인덱스의 접근 방법

- 인덱스가 하나가 아니므로 묶어서(튜플로) 전달
- 열 접근 : df[(튜플)]
- 행 접근 : df.loc[(튜플)]
- 참고. df.iloc[]은 정수위치로 접근하여 다중인덱스에 구애받지 않음

-----

### 8. 다중인덱스의 행/열 추가

In [167]:
data = np.round(np.random.rand(6, 4), 2)
columns = pd.MultiIndex.from_product([['A','B'],['C1','C2']],
                                   names=['cidx1','cidx2'])
index = pd.MultiIndex.from_product([['M','F'],['id1','id2','id3']],
                                    names=['ridx1','ridx2'])
df = pd.DataFrame(data=data, index=index, columns=columns)
df

Unnamed: 0_level_0,cidx1,A,A,B,B
Unnamed: 0_level_1,cidx2,C1,C2,C1,C2
ridx1,ridx2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
M,id1,0.7,0.03,0.16,0.62
M,id2,0.58,0.24,0.93,0.61
M,id3,0.54,0.59,0.73,0.31
F,id1,0.4,0.21,0.19,0.94
F,id2,0.74,0.49,0.23,0.25
F,id3,0.06,0.43,0.31,0.7


In [168]:
df2 = df.copy()
df2

Unnamed: 0_level_0,cidx1,A,A,B,B
Unnamed: 0_level_1,cidx2,C1,C2,C1,C2
ridx1,ridx2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
M,id1,0.7,0.03,0.16,0.62
M,id2,0.58,0.24,0.93,0.61
M,id3,0.54,0.59,0.73,0.31
F,id1,0.4,0.21,0.19,0.94
F,id2,0.74,0.49,0.23,0.25
F,id3,0.06,0.43,0.31,0.7


#### 값입력을 위한 위치 지정 실수?

In [171]:
df2.loc[('F','id1'),('B','C1')] = 20
df2

Unnamed: 0_level_0,cidx1,A,A,B,B
Unnamed: 0_level_1,cidx2,C1,C2,C1,C2
ridx1,ridx2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
M,id1,0.7,0.03,0.16,0.62
M,id2,0.58,0.24,0.93,0.61
M,id3,0.54,0.59,0.73,0.31
F,id1,0.4,0.21,20.0,0.94
F,id2,0.74,0.49,0.23,0.25
F,id3,0.06,0.43,0.31,0.7


In [172]:
df2.loc[('F','id1'),('B','c1')] = 20
df2

Unnamed: 0_level_0,cidx1,A,A,B,B,B
Unnamed: 0_level_1,cidx2,C1,C2,C1,C2,c1
ridx1,ridx2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
M,id1,0.7,0.03,0.16,0.62,
M,id2,0.58,0.24,0.93,0.61,
M,id3,0.54,0.59,0.73,0.31,
F,id1,0.4,0.21,20.0,0.94,20.0
F,id2,0.74,0.49,0.23,0.25,
F,id3,0.06,0.43,0.31,0.7,


In [174]:
df2.drop(columns=('B','c1'), inplace=True)

In [175]:
df2

Unnamed: 0_level_0,cidx1,A,A,B,B
Unnamed: 0_level_1,cidx2,C1,C2,C1,C2
ridx1,ridx2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
M,id1,0.7,0.03,0.16,0.62
M,id2,0.58,0.24,0.93,0.61
M,id3,0.54,0.59,0.73,0.31
F,id1,0.4,0.21,20.0,0.94
F,id2,0.74,0.49,0.23,0.25
F,id3,0.06,0.43,0.31,0.7


#### 각 행의 총합을 마지막 열로 추가

In [176]:
df2[('Row','Sum')] = df2.sum(axis=1)
df2

Unnamed: 0_level_0,cidx1,A,A,B,B,Row
Unnamed: 0_level_1,cidx2,C1,C2,C1,C2,Sum
ridx1,ridx2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
M,id1,0.7,0.03,0.16,0.62,1.51
M,id2,0.58,0.24,0.93,0.61,2.36
M,id3,0.54,0.59,0.73,0.31,2.17
F,id1,0.4,0.21,20.0,0.94,21.55
F,id2,0.74,0.49,0.23,0.25,1.71
F,id3,0.06,0.43,0.31,0.7,1.5


In [177]:
df2.sum()

cidx1  cidx2
A      C1        3.02
       C2        1.99
B      C1       22.36
       C2        3.43
Row    Sum      30.80
dtype: float64

#### 각 열의 총합을 마지막 행으로 추가

In [190]:
# 행과 열을 모두 지정해주어야 함
df2.loc[('Col', 'Sum'),:] = df2.sum(axis=0)
df2

Unnamed: 0_level_0,cidx1,A,A,B,B,Row
Unnamed: 0_level_1,cidx2,C1,C2,C1,C2,Sum
ridx1,ridx2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
M,id1,0.7,0.03,0.16,0.62,1.51
M,id2,0.58,0.24,0.93,0.61,2.36
M,id3,0.54,0.59,0.73,0.31,2.17
F,id1,0.4,0.21,20.0,0.94,21.55
F,id2,0.74,0.49,0.23,0.25,1.71
F,id3,0.06,0.43,0.31,0.7,1.5
Col,Sum,6.04,3.98,44.72,6.86,61.6


### 9. 다중인덱스 정렬

### 1) sort_index()

[형식] sort_index(*, axis=0, level=None, ascending=True, inplace=False,)

- 행/열 인덱스 기준으로 정렬
- 기본 정렬 방식 : 오름차순 정렬
- 내림차순 : ascending=Flase 설정

####  행인덱스 정렬

In [191]:
df

Unnamed: 0_level_0,cidx1,A,A,B,B
Unnamed: 0_level_1,cidx2,C1,C2,C1,C2
ridx1,ridx2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
M,id1,0.7,0.03,0.16,0.62
M,id2,0.58,0.24,0.93,0.61
M,id3,0.54,0.59,0.73,0.31
F,id1,0.4,0.21,0.19,0.94
F,id2,0.74,0.49,0.23,0.25
F,id3,0.06,0.43,0.31,0.7


In [194]:
df.sort_index()
df.sort_index(ascending=False)

Unnamed: 0_level_0,cidx1,A,A,B,B
Unnamed: 0_level_1,cidx2,C1,C2,C1,C2
ridx1,ridx2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
F,id1,0.4,0.21,0.19,0.94
F,id2,0.74,0.49,0.23,0.25
F,id3,0.06,0.43,0.31,0.7
M,id1,0.7,0.03,0.16,0.62
M,id2,0.58,0.24,0.93,0.61
M,id3,0.54,0.59,0.73,0.31


Unnamed: 0_level_0,cidx1,A,A,B,B
Unnamed: 0_level_1,cidx2,C1,C2,C1,C2
ridx1,ridx2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
M,id3,0.54,0.59,0.73,0.31
M,id2,0.58,0.24,0.93,0.61
M,id1,0.7,0.03,0.16,0.62
F,id3,0.06,0.43,0.31,0.7
F,id2,0.74,0.49,0.23,0.25
F,id1,0.4,0.21,0.19,0.94


In [195]:
df.sort_index(level=1)
df.sort_index(level=1, ascending=False)

Unnamed: 0_level_0,cidx1,A,A,B,B
Unnamed: 0_level_1,cidx2,C1,C2,C1,C2
ridx1,ridx2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
F,id1,0.4,0.21,0.19,0.94
M,id1,0.7,0.03,0.16,0.62
F,id2,0.74,0.49,0.23,0.25
M,id2,0.58,0.24,0.93,0.61
F,id3,0.06,0.43,0.31,0.7
M,id3,0.54,0.59,0.73,0.31


Unnamed: 0_level_0,cidx1,A,A,B,B
Unnamed: 0_level_1,cidx2,C1,C2,C1,C2
ridx1,ridx2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
M,id3,0.54,0.59,0.73,0.31
F,id3,0.06,0.43,0.31,0.7
M,id2,0.58,0.24,0.93,0.61
F,id2,0.74,0.49,0.23,0.25
M,id1,0.7,0.03,0.16,0.62
F,id1,0.4,0.21,0.19,0.94


#### 열인덱스 기준으로 정렬

In [197]:
df.sort_index(axis=1, ascending=False)

Unnamed: 0_level_0,cidx1,B,B,A,A
Unnamed: 0_level_1,cidx2,C2,C1,C2,C1
ridx1,ridx2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
M,id1,0.62,0.16,0.03,0.7
M,id2,0.61,0.93,0.24,0.58
M,id3,0.31,0.73,0.59,0.54
F,id1,0.94,0.19,0.21,0.4
F,id2,0.25,0.23,0.49,0.74
F,id3,0.7,0.31,0.43,0.06


In [202]:
df.sort_index(axis=1, level=1, ascending=False)
df.sort_index(axis=1, level=1, ascending=True)

Unnamed: 0_level_0,cidx1,B,A,B,A
Unnamed: 0_level_1,cidx2,C2,C2,C1,C1
ridx1,ridx2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
M,id1,0.62,0.03,0.16,0.7
M,id2,0.61,0.24,0.93,0.58
M,id3,0.31,0.59,0.73,0.54
F,id1,0.94,0.21,0.19,0.4
F,id2,0.25,0.49,0.23,0.74
F,id3,0.7,0.43,0.31,0.06


Unnamed: 0_level_0,cidx1,A,B,A,B
Unnamed: 0_level_1,cidx2,C1,C1,C2,C2
ridx1,ridx2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
M,id1,0.7,0.16,0.03,0.62
M,id2,0.58,0.93,0.24,0.61
M,id3,0.54,0.73,0.59,0.31
F,id1,0.4,0.19,0.21,0.94
F,id2,0.74,0.23,0.49,0.25
F,id3,0.06,0.31,0.43,0.7


### 2) sort_values()

[형식] ort_values(by, *, axis=0, ascending=True, inplace=False, )

- 특정 컬럼 값을 기준으로 정렬
- by = 특정컬럼
    - 특정컬럼이 다중인덱스 일 경우 컬럼명을 튜플로 전달

#### df의 A.C1 컬럼을 기준으로 정렬

In [204]:
df

Unnamed: 0_level_0,cidx1,A,A,B,B
Unnamed: 0_level_1,cidx2,C1,C2,C1,C2
ridx1,ridx2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
M,id1,0.7,0.03,0.16,0.62
M,id2,0.58,0.24,0.93,0.61
M,id3,0.54,0.59,0.73,0.31
F,id1,0.4,0.21,0.19,0.94
F,id2,0.74,0.49,0.23,0.25
F,id3,0.06,0.43,0.31,0.7


In [205]:
df.sort_values(by=('A','C2'))

Unnamed: 0_level_0,cidx1,A,A,B,B
Unnamed: 0_level_1,cidx2,C1,C2,C1,C2
ridx1,ridx2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
M,id1,0.7,0.03,0.16,0.62
F,id1,0.4,0.21,0.19,0.94
M,id2,0.58,0.24,0.93,0.61
F,id3,0.06,0.43,0.31,0.7
F,id2,0.74,0.49,0.23,0.25
M,id3,0.54,0.59,0.73,0.31


-----------