In [3]:
import numpy as np
import pandas as pd

In [2]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity="all"

# pandas 다중 인덱스(multi index)

- 행이나 열 인덱스가 계층으로 구성된 인덱스(Hierarchical indexing)

In [5]:
df=pd.DataFrame([[1,2,3,4],[2,3,4,5]],
               index=['a','b'])
df

Unnamed: 0,0,1,2,3
a,1,2,3,4
b,2,3,4,5


In [6]:
df.index

Index(['a', 'b'], dtype='object')

In [8]:
df.loc[['a']]

Unnamed: 0,0,1,2,3
a,1,2,3,4


### 참고. 난수 생성 : np.random 모듈

: https://docs.python.org/ko/3/library/random.html

##### np.random.seed(seed값)

- seed : 난수 알고리즘에서 사용하는 기본 값
    - seed 값이 같으면 동일한 난수 발생
    - 예. np.random.seed(10) 


- 계속 변경되는 난수를 생성하려면 시드값이 매번 변하도록 지정
    - 예. np.random.seed(int(time.time()))
    

#### 난수 생성 함수

- random.rand() : 주어진 형태의 난수 배열 생성
- random.randint(최소값, 최대값, size=n) 
    - [최소값, 최대값)의 범위에서 임의의 정수 생성
    
- random.randn() : 표준정규분포(Standard normal distribution)로부터 샘플링된 난수 생성

- random.standard_normal() : 표준정규분포 난수 발생

- random.normal([loc, scale, size]) : 정규분포 난수 생성

- random.random_sample(size) : [0,1)사이의 난수 생성

- random.choice(a[, size, replace, p]) : 주어진 배열로 부터 표본추출

In [11]:
np.random.seed(10)
np.random.randint(5,size=4)

array([1, 4, 0, 1])

In [12]:
np.random.randint(5,size=4)

array([3, 4, 1, 0])

In [13]:
import time

int(time.time())

1675298930

In [16]:
time.time

<function time.time>

In [18]:
np.random.seed(int(time.time()))
np.random.randint(5,size=4)

array([2, 0, 0, 1])

### 참고. 파이썬의 random 모듈

#### 정수 난수 발생 함수

##### random.randrange(start, stop[, step])
- range(start, stop, step)에서 임의로 선택된 요소를 반환
- choice(range(start, stop, step))와 동등하지만 실제로 range 객체를 만들지는 않음

##### random.randint(a, b)
- `a <= N <= b` 를 만족하는 임의의 정수 N을 반환
- randrange(a, b+1)의 별칭

#### 시퀀스 난수 발생 함수
##### random.choice(seq)
- 비어 있지 않은 시퀀스 seq에서 임의의 요소를 반환
- seq가 비어 있으면, IndexError를 발생


##### random.choices(population, weights=None, *, cum_weights=None, k=1)
- population에서 중복을 허락하면서(with replacement) 선택한 k 크기의 요소 리스트를 반환
- population이 비어 있으면 IndexError 발생
- weights 시퀀스가 지정되면 상대 가중치에 따라 선택됨
- weights나 cum_weights를 지정하지 않으면 같은 확률로 선택
- weights 시퀀스가 제공되면, population 시퀀스와 길이가 같아야 함
- weights와 cum_weights를 모두 지정하는 것은 TypeError

##### random.sample(population, k, *, counts=None)
- population 시퀀스로부터 추출한 k개 길이의 새 리스트를 반환
- random sampling without replacement

#### 실수 난수 발생 함수
##### random.random()
- `0.0 <= X < 1.0` 사이의 실수 반환

##### random.uniform(a, b)
- `a <= b` 일 때 `a <= N <= b`, `b < a` 일 때 `b <= N <= a`를 만족하는 임의의 부동 소수점 숫자 N을 반환
- 종단 값 b는 방정식 a + (b-a) * random()의 부동 소수점 자리 올림에 따라 범위에 포함되거나 포함되지 않을 수 있음

### 1. 다중인덱스를 갖는 Series

#### 예1. 난수 데이터를 갖는 Series

In [23]:
index = [np.array(["bar", "bar", "baz", "baz",
                    "foo", "foo", "qux", "qux"]),
          np.array(["one", "two", "one", "two",
                    "one", "two", "one", "two"])]
index

[array(['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'],
       dtype='<U3'),
 array(['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two'],
       dtype='<U3')]

In [28]:
s1=pd.Series(np.random.randn(8),
            index=index)
s1

bar  one   -0.349919
     two   -0.614027
baz  one    1.223247
     two   -0.887361
foo  one   -0.907272
     two   -0.235782
qux  one   -0.893656
     two   -0.016419
dtype: float64

In [29]:
s1.index

MultiIndex([('bar', 'one'),
            ('bar', 'two'),
            ('baz', 'one'),
            ('baz', 'two'),
            ('foo', 'one'),
            ('foo', 'two'),
            ('qux', 'one'),
            ('qux', 'two')],
           )

#### 예2. 키를 튜플로 갖는 딕셔너리의 데이터로 Series 생성

In [22]:
# 키를 튜플로 갖는 딕셔너리 데이터
data = {('James', 'Eng'): 100,
        ('James', 'Math') : 90,
        ('Ted', 'Eng') : 90,
        ('Ted', 'Math') : 70,
        ('Adam', 'Eng') : 85,
        ('Adam', 'Math') : 90 }


In [32]:
s2=pd.Series(data)
s2

James  Eng     100
       Math     90
Ted    Eng      90
       Math     70
Adam   Eng      85
       Math     90
dtype: int64

In [33]:
s2.index

MultiIndex([('James',  'Eng'),
            ('James', 'Math'),
            (  'Ted',  'Eng'),
            (  'Ted', 'Math'),
            ( 'Adam',  'Eng'),
            ( 'Adam', 'Math')],
           )

#### 인덱스의 이름 지정 : 시리즈.index.names = [ , ]

In [35]:
s2.index.names=['Name','class']
s2.index

MultiIndex([('James',  'Eng'),
            ('James', 'Math'),
            (  'Ted',  'Eng'),
            (  'Ted', 'Math'),
            ( 'Adam',  'Eng'),
            ( 'Adam', 'Math')],
           names=['Name', 'class'])

#### 다중인덱스를 갖는 Series의 인덱싱

- 시리즈[상위인덱스]
- 시리즈.상위인덱스
- 시리즈[(상위인덱스, 하위인덱스)]
- 시리즈.상위인데스.하위인덱스
- 시리즈[:, 하위인덱스]

In [36]:
s2['James']

class
Eng     100
Math     90
dtype: int64

In [37]:
s2.James

class
Eng     100
Math     90
dtype: int64

In [38]:
s2[('Ted','Eng')]

90

In [40]:
s2.Ted.Eng

90

In [41]:
s2[:,'Eng']

Name
James    100
Ted       90
Adam      85
dtype: int64

In [42]:
s2[['James','Ted']]

Name   class
James  Eng      100
       Math      90
Ted    Eng       90
       Math      70
dtype: int64

In [44]:
s2.loc[['James','Ted']]

Name   class
James  Eng      100
       Math      90
Ted    Eng       90
       Math      70
dtype: int64

In [45]:
s2[:,'Math']

Name
James    90
Ted      70
Adam     90
dtype: int64

### 2. 다중 인덱스를 갖는 DataFrame

- 데이터 프레임 생성 시 생성자에서 columns인수나 index 인수를  2차원 리스트(행렬) 형태로 지정할 경우

#### 1) column인덱스를 다중 인덱스로 갖는 DataFrame

In [47]:
np.random.seed(0)
data=np.round(np.random.randn(5,4),2)
data

array([[ 1.76,  0.4 ,  0.98,  2.24],
       [ 1.87, -0.98,  0.95, -0.15],
       [-0.1 ,  0.41,  0.14,  1.45],
       [ 0.76,  0.12,  0.44,  0.33],
       [ 1.49, -0.21,  0.31, -0.85]])

In [48]:
df=pd.DataFrame(data=data,
               columns=[['A','A','B','B'],
                       ['C1','C2','C3','C4']])
df

Unnamed: 0_level_0,A,A,B,B
Unnamed: 0_level_1,C1,C2,C3,C4
0,1.76,0.4,0.98,2.24
1,1.87,-0.98,0.95,-0.15
2,-0.1,0.41,0.14,1.45
3,0.76,0.12,0.44,0.33
4,1.49,-0.21,0.31,-0.85


In [50]:
df.columns

MultiIndex([('A', 'C1'),
            ('A', 'C2'),
            ('B', 'C3'),
            ('B', 'C4')],
           )

#### 열 인덱싱(1): df[상위인덱스]
- 상위 인덱스의 모든 열에 대한 데이터프레임 반환

In [51]:
df['A']

Unnamed: 0,C1,C2
0,1.76,0.4
1,1.87,-0.98
2,-0.1,0.41
3,0.76,0.12
4,1.49,-0.21


#### 열 인덱싱(2): df[(상위인덱스, 하위인덱스)]
- 인덱스가 하나가 아니므로 묶어서(튜플로) 전달해야 함
- 시리즈로 반환

In [52]:
df[('A','C1')]

0    1.76
1    1.87
2   -0.10
3    0.76
4    1.49
Name: (A, C1), dtype: float64

In [53]:
df[('B','C3')]

0    0.98
1    0.95
2    0.14
3    0.44
4    0.31
Name: (B, C3), dtype: float64

#### 열인덱싱(3) :  . 연산자로 확장

- df.상위인덱스
- df.상위인덱스.하위인덱스

In [54]:
df.A

Unnamed: 0,C1,C2
0,1.76,0.4
1,1.87,-0.98
2,-0.1,0.41
3,0.76,0.12
4,1.49,-0.21


In [55]:
df.A.C2

0    0.40
1   -0.98
2    0.41
3    0.12
4   -0.21
Name: C2, dtype: float64

#### 열인덱스 이름 지정 : df.columns.names = []

In [56]:
df.columns.names=['upper','lower']
df

upper,A,A,B,B
lower,C1,C2,C3,C4
0,1.76,0.4,0.98,2.24
1,1.87,-0.98,0.95,-0.15
2,-0.1,0.41,0.14,1.45
3,0.76,0.12,0.44,0.33
4,1.49,-0.21,0.31,-0.85


#### 2) 행인덱스를 다중 인덱스로 갖는 DataFrame

In [70]:
data2=np.random.randint(1,10,size=(4,4))
df2=pd.DataFrame(data=data2,
                index=[['a','a','b','b'],
                      ['1','2','1','2']],
                columns=['A','B','C','D'])
df2

Unnamed: 0,Unnamed: 1,A,B,C,D
a,1,5,9,2,2
a,2,8,4,7,8
b,1,3,1,4,6
b,2,5,5,7,5


In [71]:
df2.index

MultiIndex([('a', '1'),
            ('a', '2'),
            ('b', '1'),
            ('b', '2')],
           )

#### 행인덱싱(1) : df.loc[상위인덱스]

In [72]:
df2.loc['a']

Unnamed: 0,A,B,C,D
1,5,9,2,2
2,8,4,7,8


In [73]:
df2.loc['a':]

Unnamed: 0,Unnamed: 1,A,B,C,D
a,1,5,9,2,2
a,2,8,4,7,8
b,1,3,1,4,6
b,2,5,5,7,5


In [74]:
df2.loc[:,'A']

a  1    5
   2    8
b  1    3
   2    5
Name: A, dtype: int32

#### 행인덱싱(2) : df.loc[(상위인덱스, 하위인덱스)]
- 상위인덱스와 하위인덱스를 튜플로 전달

In [75]:
df2.loc[('a','1')]

A    5
B    9
C    2
D    2
Name: (a, 1), dtype: int32

In [76]:
df2.loc['a','1']

A    5
B    9
C    2
D    2
Name: (a, 1), dtype: int32

#### 인덱서 iloc : df.iloc[  ]
- iloc인덱서는 행이름, 열이름 기반이 아님

In [77]:
df2.iloc[0]

A    5
B    9
C    2
D    2
Name: (a, 1), dtype: int32

In [78]:
df2.iloc[1]

A    8
B    4
C    7
D    8
Name: (a, 2), dtype: int32

In [79]:
df2.iloc[1,1:]

B    4
C    7
D    8
Name: (a, 2), dtype: int32

#### 3)  행과 열에 모두 다중인덱스를 갖는 DataFrame

In [81]:
data3=np.round(np.random.randn(6,4),2)
data3

array([[ 1.25,  1.42, -0.74, -2.52],
       [-1.51,  1.15, -1.19,  1.14],
       [ 1.51,  1.07, -0.69,  0.01],
       [-0.38, -0.04,  0.37, -0.04],
       [-0.3 , -2.22,  0.72,  0.36],
       [ 1.08,  0.19,  0.85,  0.02]])

In [82]:
# col1=['A','A','B','B']
col1=['A']*2+['B']*2
col2=['C'+str(i) for i in range(1,5)]
col1
col2

['A', 'A', 'B', 'B']

['C1', 'C2', 'C3', 'C4']

In [83]:
idx1=['M']*3+['F']*3
idx2=['id'+str(i) for i in range(1,4)]*2
idx1
idx2

['M', 'M', 'M', 'F', 'F', 'F']

['id1', 'id2', 'id3', 'id1', 'id2', 'id3']

In [85]:
df3=pd.DataFrame(data=data3,
                index=[idx1,idx2],
                columns=[col1,col2])
df3

Unnamed: 0_level_0,Unnamed: 1_level_0,A,A,B,B
Unnamed: 0_level_1,Unnamed: 1_level_1,C1,C2,C3,C4
M,id1,1.25,1.42,-0.74,-2.52
M,id2,-1.51,1.15,-1.19,1.14
M,id3,1.51,1.07,-0.69,0.01
F,id1,-0.38,-0.04,0.37,-0.04
F,id2,-0.3,-2.22,0.72,0.36
F,id3,1.08,0.19,0.85,0.02


#### 행/열 각 인덱스에 이름(names) 설정

- 이름을 지정하면 직관성이 높아지고 편리하게 사용할 수 있음
- 열이름/행이름 구분하는데 용이
- 문법
    - df.columns.names = 값 또는 리스트
    - df.index.names = 값 또는 리스트

In [86]:
df3.columns.names=['Cidx1','Cidx2']
df3.index.names=['Ridx1','Ridx2']
df3

Unnamed: 0_level_0,Cidx1,A,A,B,B
Unnamed: 0_level_1,Cidx2,C1,C2,C3,C4
Ridx1,Ridx2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
M,id1,1.25,1.42,-0.74,-2.52
M,id2,-1.51,1.15,-1.19,1.14
M,id3,1.51,1.07,-0.69,0.01
F,id1,-0.38,-0.04,0.37,-0.04
F,id2,-0.3,-2.22,0.72,0.36
F,id3,1.08,0.19,0.85,0.02


In [88]:
df3.columns

MultiIndex([('A', 'C1'),
            ('A', 'C2'),
            ('B', 'C3'),
            ('B', 'C4')],
           names=['Cidx1', 'Cidx2'])

In [87]:
df3.index

MultiIndex([('M', 'id1'),
            ('M', 'id2'),
            ('M', 'id3'),
            ('F', 'id1'),
            ('F', 'id2'),
            ('F', 'id3')],
           names=['Ridx1', 'Ridx2'])

### 3. MultiIndex 객체
- https://pandas.pydata.org/docs/user_guide/advanced.html

- 생성 방법
1. MultiIndex.from_arrays() 사용 : 배열(array)의 리스트
2. MultiIndex.from_tuples() : 튜플들(tuples)의 리스트
3. MultiIndex.from_product() : 리스트의 cross product
4. MultiIndex.from_frame() : DataFrame

In [89]:
# 1. MultiIndex.from_arrays() 이용한 다중인덱스 생성
arrays = np.array([["one", "two", "one", "two"],
                   ["bar", "baz", "foo", "qux"]])
arrays

index = pd.MultiIndex.from_arrays(arrays, 
                                  names=["first", "second"])
index

array([['one', 'two', 'one', 'two'],
       ['bar', 'baz', 'foo', 'qux']], dtype='<U3')

MultiIndex([('one', 'bar'),
            ('two', 'baz'),
            ('one', 'foo'),
            ('two', 'qux')],
           names=['first', 'second'])

In [90]:
# 2. MultiIndex.from_tuples() 이용한 다중인덱스 생성
arrays = [
    ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"],
    ["one", "two", "one", "two", "one", "two", "one", "two"]]

tuples = list(zip(*arrays))
tuples

index = pd.MultiIndex.from_tuples(tuples, 
                                  names=["first", "second"])
index


[('bar', 'one'),
 ('bar', 'two'),
 ('baz', 'one'),
 ('baz', 'two'),
 ('foo', 'one'),
 ('foo', 'two'),
 ('qux', 'one'),
 ('qux', 'two')]

MultiIndex([('bar', 'one'),
            ('bar', 'two'),
            ('baz', 'one'),
            ('baz', 'two'),
            ('foo', 'one'),
            ('foo', 'two'),
            ('qux', 'one'),
            ('qux', 'two')],
           names=['first', 'second'])

In [91]:
# 3. MultiIndex.from_product() 사용한 다중인덱스 생성
iterables = [["bar", "baz", "foo", "qux"], ["one", "two"]]
pd.MultiIndex.from_product(iterables,
                           names=["first", "second"])


MultiIndex([('bar', 'one'),
            ('bar', 'two'),
            ('baz', 'one'),
            ('baz', 'two'),
            ('foo', 'one'),
            ('foo', 'two'),
            ('qux', 'one'),
            ('qux', 'two')],
           names=['first', 'second'])

In [94]:
# 4. MultiIndex.from_frame()를 사용한 다중인덱스 생성
df = pd.DataFrame([["bar", "one"], ["bar", "two"],
                   ["foo", "one"], ["foo", "two"]],
                  columns=["first", "second"])
df
pd.MultiIndex.from_frame(df)

Unnamed: 0,first,second
0,bar,one
1,bar,two
2,foo,one
3,foo,two


MultiIndex([('bar', 'one'),
            ('bar', 'two'),
            ('foo', 'one'),
            ('foo', 'two')],
           names=['first', 'second'])

#### 예제. MultiIndex객체 생성하여 다중인덱스 설정

In [96]:
data4=np.round(np.random.randn(4,9),2)
index=pd.MultiIndex.from_product([[1995,2000],['May','Dec']],
                                names=['year','month'])
index
columns=pd.MultiIndex.from_product( [['A','B','C'],[1,2,3]],
                                  names=['name','count'])
columns
df4=pd.DataFrame(data=data4,
                index=index,
                columns=columns)
df4

MultiIndex([(1995, 'May'),
            (1995, 'Dec'),
            (2000, 'May'),
            (2000, 'Dec')],
           names=['year', 'month'])

MultiIndex([('A', 1),
            ('A', 2),
            ('A', 3),
            ('B', 1),
            ('B', 2),
            ('B', 3),
            ('C', 1),
            ('C', 2),
            ('C', 3)],
           names=['name', 'count'])

Unnamed: 0_level_0,name,A,A,A,B,B,B,C,C,C
Unnamed: 0_level_1,count,1,2,3,1,2,3,1,2,3
year,month,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
1995,May,-0.26,-0.71,-0.5,0.06,-0.49,-0.38,0.26,-1.68,-0.92
1995,Dec,-0.12,-0.45,-2.14,0.01,-0.56,0.48,-0.57,-0.25,1.04
2000,May,-0.02,0.09,-0.59,-0.11,0.74,-0.75,-0.14,-2.12,-1.03
2000,Dec,-0.52,-0.14,-1.28,1.48,1.66,0.0,1.51,0.1,-0.24


In [97]:
df4.reset_index()

name,year,month,A,A,A,B,B,B,C,C,C
count,Unnamed: 1_level_1,Unnamed: 2_level_1,1,2,3,1,2,3,1,2,3
0,1995,May,-0.26,-0.71,-0.5,0.06,-0.49,-0.38,0.26,-1.68,-0.92
1,1995,Dec,-0.12,-0.45,-2.14,0.01,-0.56,0.48,-0.57,-0.25,1.04
2,2000,May,-0.02,0.09,-0.59,-0.11,0.74,-0.75,-0.14,-2.12,-1.03
3,2000,Dec,-0.52,-0.14,-1.28,1.48,1.66,0.0,1.51,0.1,-0.24


### 4. 다중인덱스의 특정 레벨 제거 : droplevel(level, axis)

#### 1) 시리즈의 다중인덱스 레벨 제거

In [98]:
s2

Name   class
James  Eng      100
       Math      90
Ted    Eng       90
       Math      70
Adam   Eng       85
       Math      90
dtype: int64

In [99]:
s2.droplevel(0)

class
Eng     100
Math     90
Eng      90
Math     70
Eng      85
Math     90
dtype: int64

In [101]:
s2.droplevel(1)

Name
James    100
James     90
Ted       90
Ted       70
Adam      85
Adam      90
dtype: int64

#### 2) 데이터프레임에서 다중인덱스 레벨 제거

In [100]:
df4

Unnamed: 0_level_0,name,A,A,A,B,B,B,C,C,C
Unnamed: 0_level_1,count,1,2,3,1,2,3,1,2,3
year,month,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
1995,May,-0.26,-0.71,-0.5,0.06,-0.49,-0.38,0.26,-1.68,-0.92
1995,Dec,-0.12,-0.45,-2.14,0.01,-0.56,0.48,-0.57,-0.25,1.04
2000,May,-0.02,0.09,-0.59,-0.11,0.74,-0.75,-0.14,-2.12,-1.03
2000,Dec,-0.52,-0.14,-1.28,1.48,1.66,0.0,1.51,0.1,-0.24


#### 행인덱스 레벨 제거

In [105]:
df4.droplevel(level=0,axis=0)

name,A,A,A,B,B,B,C,C,C
count,1,2,3,1,2,3,1,2,3
month,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
May,-0.26,-0.71,-0.5,0.06,-0.49,-0.38,0.26,-1.68,-0.92
Dec,-0.12,-0.45,-2.14,0.01,-0.56,0.48,-0.57,-0.25,1.04
May,-0.02,0.09,-0.59,-0.11,0.74,-0.75,-0.14,-2.12,-1.03
Dec,-0.52,-0.14,-1.28,1.48,1.66,0.0,1.51,0.1,-0.24


In [106]:
df4.droplevel(level=1,axis=0)

name,A,A,A,B,B,B,C,C,C
count,1,2,3,1,2,3,1,2,3
year,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
1995,-0.26,-0.71,-0.5,0.06,-0.49,-0.38,0.26,-1.68,-0.92
1995,-0.12,-0.45,-2.14,0.01,-0.56,0.48,-0.57,-0.25,1.04
2000,-0.02,0.09,-0.59,-0.11,0.74,-0.75,-0.14,-2.12,-1.03
2000,-0.52,-0.14,-1.28,1.48,1.66,0.0,1.51,0.1,-0.24


#### 열인덱스 레벨 제거

In [107]:
df4.droplevel(level=0,axis=1)

Unnamed: 0_level_0,count,1,2,3,1,2,3,1,2,3
year,month,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1995,May,-0.26,-0.71,-0.5,0.06,-0.49,-0.38,0.26,-1.68,-0.92
1995,Dec,-0.12,-0.45,-2.14,0.01,-0.56,0.48,-0.57,-0.25,1.04
2000,May,-0.02,0.09,-0.59,-0.11,0.74,-0.75,-0.14,-2.12,-1.03
2000,Dec,-0.52,-0.14,-1.28,1.48,1.66,0.0,1.51,0.1,-0.24


In [108]:
df4.droplevel(level=1,axis=1)

Unnamed: 0_level_0,name,A,A,A,B,B,B,C,C,C
year,month,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1995,May,-0.26,-0.71,-0.5,0.06,-0.49,-0.38,0.26,-1.68,-0.92
1995,Dec,-0.12,-0.45,-2.14,0.01,-0.56,0.48,-0.57,-0.25,1.04
2000,May,-0.02,0.09,-0.59,-0.11,0.74,-0.75,-0.14,-2.12,-1.03
2000,Dec,-0.52,-0.14,-1.28,1.48,1.66,0.0,1.51,0.1,-0.24


### 5. 행인덱스 레벨 해제 : unstack()

: 행인덱스 -> 열인덱스로 변환

#### 1) 시리즈의 다중인덱스 레벨 해제

- 해제된 레벨 인덱스는 열인덱스로 변경되며, 데이터프레임으로 반환됨

In [109]:
s = pd.Series([1, 2, 3, 4],
              index=pd.MultiIndex.from_product([['one', 'two'],
                                                ['a', 'b']]))
s

one  a    1
     b    2
two  a    3
     b    4
dtype: int64

#### 마지막 레벨 해제 : unstack(level=-1)

In [110]:
s.unstack(level=-1)

Unnamed: 0,a,b
one,1,2
two,3,4


#### 첫번째 레벨 해제 : unstack(level=0)

In [112]:
s.unstack(level=0)

Unnamed: 0,one,two
a,1,3
b,2,4


#### 2) 데이터프레임의 다중 행인덱스 레벨 해제

- 데이터프레임의 인덱스 레벨 해제
- 해제된 레벨은 열인덱스 중 가장 마지막 레벨이 됨

[형식] DataFrame.unstack(level=-1, fill_value=None)


- level : int, str, or list of these, default= -1 (last level)
    - Level(s) of index to unstack, can pass level name.

- fill_value : int, str or dict
    - Replace NaN with this value if the unstack produces missing values.


- https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.unstack.html

In [113]:
index = pd.MultiIndex.from_product([[1995, 2000], ['May','Dec']],
                                 names = ['year', 'month'])
columns = pd.MultiIndex.from_product([['A','B','C'],[1,2]],
                                    names = ['name', 'count'])

df =pd.DataFrame(np.round(np.random.randn(4,6), 1),
                  index = index, columns = columns)
df

Unnamed: 0_level_0,name,A,A,B,B,C,C
Unnamed: 0_level_1,count,1,2,1,2,1,2
year,month,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
1995,May,0.8,0.4,-1.4,0.3,-0.7,-0.7
1995,Dec,-1.5,2.2,1.0,-0.0,0.1,0.3
2000,May,1.2,0.1,-0.1,1.7,1.0,1.6
2000,Dec,-0.3,1.8,0.9,-0.1,-0.4,-0.5


In [114]:
df.unstack(level=0)

name,A,A,A,A,B,B,B,B,C,C,C,C
count,1,1,2,2,1,1,2,2,1,1,2,2
year,1995,2000,1995,2000,1995,2000,1995,2000,1995,2000,1995,2000
month,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3
Dec,-1.5,-0.3,2.2,1.8,1.0,0.9,-0.0,-0.1,0.1,-0.4,0.3,-0.5
May,0.8,1.2,0.4,0.1,-1.4,-0.1,0.3,1.7,-0.7,1.0,-0.7,1.6


In [115]:
df.unstack(level=-1)

name,A,A,A,A,B,B,B,B,C,C,C,C
count,1,1,2,2,1,1,2,2,1,1,2,2
month,Dec,May,Dec,May,Dec,May,Dec,May,Dec,May,Dec,May
year,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3
1995,-1.5,0.8,2.2,0.4,1.0,-1.4,-0.0,0.3,0.1,-0.7,0.3,-0.7
2000,-0.3,1.2,1.8,0.1,0.9,-0.1,-0.1,1.7,-0.4,1.0,-0.5,1.6


In [116]:
df.unstack(level='month')

name,A,A,A,A,B,B,B,B,C,C,C,C
count,1,1,2,2,1,1,2,2,1,1,2,2
month,Dec,May,Dec,May,Dec,May,Dec,May,Dec,May,Dec,May
year,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3
1995,-1.5,0.8,2.2,0.4,1.0,-1.4,-0.0,0.3,0.1,-0.7,0.3,-0.7
2000,-0.3,1.2,1.8,0.1,0.9,-0.1,-0.1,1.7,-0.4,1.0,-0.5,1.6


In [117]:
df.unstack(level=1)

name,A,A,A,A,B,B,B,B,C,C,C,C
count,1,1,2,2,1,1,2,2,1,1,2,2
month,Dec,May,Dec,May,Dec,May,Dec,May,Dec,May,Dec,May
year,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3
1995,-1.5,0.8,2.2,0.4,1.0,-1.4,-0.0,0.3,0.1,-0.7,0.3,-0.7
2000,-0.3,1.2,1.8,0.1,0.9,-0.1,-0.1,1.7,-0.4,1.0,-0.5,1.6


In [118]:
df.unstack(level=0)

name,A,A,A,A,B,B,B,B,C,C,C,C
count,1,1,2,2,1,1,2,2,1,1,2,2
year,1995,2000,1995,2000,1995,2000,1995,2000,1995,2000,1995,2000
month,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3
Dec,-1.5,-0.3,2.2,1.8,1.0,0.9,-0.0,-0.1,0.1,-0.4,0.3,-0.5
May,0.8,1.2,0.4,0.1,-1.4,-0.1,0.3,1.7,-0.7,1.0,-0.7,1.6


### 6. 열인덱스 레벨 해제 : stack()

: 열인덱스 -> 행인덱스로 변환

#### 데이터프레임에서 열인덱스 레벨 해제

- 지정한 열인덱스가 행인덱스의 마지막 레벨로 변환 추가됨
- single level 열인덱스를 갖는 경우 시리즈로 반환
- multi level 열인덱스를 갖는 경우 데이터프레임 반환

[형식] DataFrame.stack(level=- 1, dropna=True)

- level위치 또는 열이름 지정
- level : int, str, list, default= -1
- dropna : bool, default True

- https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.stack.html

#### 예1. 1차 레벨을 가진 데이터프레임

In [120]:
df = pd.DataFrame([[0, 1], [2, 3]],
                  index=['cat', 'dog'],
                  columns=['weight', 'height'])
df

Unnamed: 0,weight,height
cat,0,1
dog,2,3


In [121]:
df.stack()

cat  weight    0
     height    1
dog  weight    2
     height    3
dtype: int64

In [122]:
df.stack(level=0)

cat  weight    0
     height    1
dog  weight    2
     height    3
dtype: int64

#### 예2. 다중레벨을 갖는 데이터프레임

In [123]:
multicol1 = pd.MultiIndex.from_tuples([('weight', 'kg'),
                                       ('weight', 'pounds')])
df2 = pd.DataFrame([[1, 2], [2, 4]],
                                    index=['cat', 'dog'],
                                    columns=multicol1)
df2

Unnamed: 0_level_0,weight,weight
Unnamed: 0_level_1,kg,pounds
cat,1,2
dog,2,4


In [124]:
df2.stack()

Unnamed: 0,Unnamed: 1,weight
cat,kg,1
cat,pounds,2
dog,kg,2
dog,pounds,4


In [125]:
df2.stack(level=0)

Unnamed: 0,Unnamed: 1,kg,pounds
cat,weight,1,2
dog,weight,2,4


In [127]:
df2.stack(level=[1,0])

cat  kg      weight    1
     pounds  weight    2
dog  kg      weight    2
     pounds  weight    4
dtype: int64

#### 예3. 열인덱스 이름을 갖는 데이터프레임

In [128]:
df2.columns.names=['C1','C2']
df2

C1,weight,weight
C2,kg,pounds
cat,1,2
dog,2,4


In [129]:
df2.stack(level='C1') 

Unnamed: 0_level_0,C2,kg,pounds
Unnamed: 0_level_1,C1,Unnamed: 2_level_1,Unnamed: 3_level_1
cat,weight,1,2
dog,weight,2,4


In [130]:
df2.stack(level=['C2','C1']) 

     C2      C1    
cat  kg      weight    1
     pounds  weight    2
dog  kg      weight    2
     pounds  weight    4
dtype: int64

### 7. 다중인덱스의 레벨 교환 : swaplevel()

[형식] DataFrame.swaplevel(i=- 2, j=- 1, axis=0)

- i, j : int or str
    - Levels of the indices to be swapped. Can pass level name as string.

- axis : 0 or ‘index’, 1 or ‘columns', default=0
    - The axis to swap levels on
    - 0 or‘index’ for row-wise
    - 1 or ‘columns’ for column-wise

- https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.swaplevel.html

In [131]:
df = pd.DataFrame({"Grade": ["A", "B", "A", "C"]},
                  index=[
                      ["Final exam", "Final exam", "Coursework", "Coursework"],
                      ["History", "Geography", "History", "Geography"],
                      ["January", "February", "March", "April"],],)
df

Unnamed: 0,Unnamed: 1,Unnamed: 2,Grade
Final exam,History,January,A
Final exam,Geography,February,B
Coursework,History,March,A
Coursework,Geography,April,C


In [132]:
df.swaplevel()

Unnamed: 0,Unnamed: 1,Unnamed: 2,Grade
Final exam,January,History,A
Final exam,February,Geography,B
Coursework,March,History,A
Coursework,April,Geography,C


In [133]:
df.swaplevel(0,1)

Unnamed: 0,Unnamed: 1,Unnamed: 2,Grade
History,Final exam,January,A
Geography,Final exam,February,B
History,Coursework,March,A
Geography,Coursework,April,C


In [134]:
df.swaplevel(0,2)

Unnamed: 0,Unnamed: 1,Unnamed: 2,Grade
January,History,Final exam,A
February,Geography,Final exam,B
March,History,Coursework,A
April,Geography,Coursework,C


### [정리] 다중인덱스의 접근 방법

- 인덱스가 하나가 아니므로 묶어서(튜플로) 전달
- 열 접근 : df[(튜플)]
- 행 접근 : df.loc[(튜플)]
- 참고. df.iloc[]은 정수위치로 접근하여 다중인덱스에 구애받지 않음

### 문제.

#### 1. 다음과 같이 다중인덱스를 갖는 데이터프레임을 생성하시오.

- 행인덱스 레벨 = 2
- 열인덱스 레벨 = 2
- 데이터 크기는 (6,4)이며 값은 (0,1) 사이의 난수를 갖음
- 인덱스는 MultiIndex를 생성하는 방법을 사용

In [4]:
data = np.round(np.random.rand(6,4), 2)
columns = pd.MultiIndex.from_product([['A','B'],['C1','C2']],
                                   names=['cidx1','cidx2'])
index = pd.MultiIndex.from_product([['M','F'],['id1','id2','id3']],
                                    names=['ridx1','ridx2'])
df = pd.DataFrame(data=data, index=index, columns=columns)
df

Unnamed: 0_level_0,cidx1,A,A,B,B
Unnamed: 0_level_1,cidx2,C1,C2,C1,C2
ridx1,ridx2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
M,id1,0.24,0.64,0.41,0.55
M,id2,0.8,0.46,0.92,0.04
M,id3,0.46,0.4,0.28,0.63
F,id1,0.12,0.4,0.88,0.68
F,id2,0.12,0.77,0.42,0.64
F,id3,0.73,0.91,0.77,0.08


#### 2. df의 'A'열을 출력하시오.

In [5]:
df['A']

Unnamed: 0_level_0,cidx2,C1,C2
ridx1,ridx2,Unnamed: 2_level_1,Unnamed: 3_level_1
M,id1,0.24,0.64
M,id2,0.8,0.46
M,id3,0.46,0.4
F,id1,0.12,0.4
F,id2,0.12,0.77
F,id3,0.73,0.91


In [6]:
df.A

Unnamed: 0_level_0,cidx2,C1,C2
ridx1,ridx2,Unnamed: 2_level_1,Unnamed: 3_level_1
M,id1,0.24,0.64
M,id2,0.8,0.46
M,id3,0.46,0.4
F,id1,0.12,0.4
F,id2,0.12,0.77
F,id3,0.73,0.91


#### 3. df의 'F'행을 출력하시오.

In [15]:
df.loc[['F']]

Unnamed: 0_level_0,cidx1,A,A,B,B
Unnamed: 0_level_1,cidx2,C1,C2,C1,C2
ridx1,ridx2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
F,id1,0.12,0.4,0.88,0.68
F,id2,0.12,0.77,0.42,0.64
F,id3,0.73,0.91,0.77,0.08


#### 4. df의 'A'열의 'C1'열을 출력하시오.

In [19]:
df['A']['C1']

ridx1  ridx2
M      id1      0.24
       id2      0.80
       id3      0.46
F      id1      0.12
       id2      0.12
       id3      0.73
Name: C1, dtype: float64

#### 5. df의 'M'행의  'id1'행을 출력하시오.

In [20]:
df.loc['M'].loc['id1']

cidx1  cidx2
A      C1       0.24
       C2       0.64
B      C1       0.41
       C2       0.55
Name: id1, dtype: float64

#### 6. df의 ('M', 'id1')행 'A'열을 출력하시오.

In [24]:
df.loc['M'].loc['id1']['A']

cidx2
C1    0.24
C2    0.64
Name: id1, dtype: float64

#### 7. df의 ('M', 'id1')행 ('A','C1')열 요소를 출력하시오.

In [25]:
df.loc['M'].loc['id1']['A']['C1']

0.24

#### 8. df의 4행 4열의 요소를 출력하시오.

In [29]:
df.loc[('F','id1'),('B','C2')]

0.68

#### 9. df의 4행 4열의 요소를 10으로 변경하시오.

In [30]:
df.loc[('F','id1'),('B','C2')]=10
df

Unnamed: 0_level_0,cidx1,A,A,B,B
Unnamed: 0_level_1,cidx2,C1,C2,C1,C2
ridx1,ridx2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
M,id1,0.24,0.64,0.41,0.55
M,id2,0.8,0.46,0.92,0.04
M,id3,0.46,0.4,0.28,0.63
F,id1,0.12,0.4,0.88,10.0
F,id2,0.12,0.77,0.42,0.64
F,id3,0.73,0.91,0.77,0.08


-----

### 8. 다중인덱스의 행/열 추가

In [151]:
data = np.round(np.random.rand(6, 4), 2)
columns = pd.MultiIndex.from_product([['A','B'],['C1','C2']],
                                   names=['cidx1','cidx2'])
index = pd.MultiIndex.from_product([['M','F'],['id1','id2','id3']],
                                    names=['ridx1','ridx2'])
df = pd.DataFrame(data=data, index=index, columns=columns)
df

Unnamed: 0_level_0,cidx1,A,A,B,B
Unnamed: 0_level_1,cidx2,C1,C2,C1,C2
ridx1,ridx2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
M,id1,0.75,0.0,0.27,0.41
M,id2,0.43,0.3,0.4,0.12
M,id3,0.98,0.41,0.57,0.34
F,id1,0.79,0.41,0.36,0.4
F,id2,0.3,0.78,0.93,0.33
F,id3,0.95,0.01,0.53,0.3


In [152]:
df2=df.copy()

#### 값입력을 위한 위치 지정 실수?

In [153]:
df2.loc[('F','id1'),('B','c1')]=20
df2

Unnamed: 0_level_0,cidx1,A,A,B,B,B
Unnamed: 0_level_1,cidx2,C1,C2,C1,C2,c1
ridx1,ridx2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
M,id1,0.75,0.0,0.27,0.41,
M,id2,0.43,0.3,0.4,0.12,
M,id3,0.98,0.41,0.57,0.34,
F,id1,0.79,0.41,0.36,0.4,20.0
F,id2,0.3,0.78,0.93,0.33,
F,id3,0.95,0.01,0.53,0.3,


In [154]:
df2.drop(columns=('B','c1'))
df2

Unnamed: 0_level_0,cidx1,A,A,B,B
Unnamed: 0_level_1,cidx2,C1,C2,C1,C2
ridx1,ridx2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
M,id1,0.75,0.0,0.27,0.41
M,id2,0.43,0.3,0.4,0.12
M,id3,0.98,0.41,0.57,0.34
F,id1,0.79,0.41,0.36,0.4
F,id2,0.3,0.78,0.93,0.33
F,id3,0.95,0.01,0.53,0.3


Unnamed: 0_level_0,cidx1,A,A,B,B,B
Unnamed: 0_level_1,cidx2,C1,C2,C1,C2,c1
ridx1,ridx2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
M,id1,0.75,0.0,0.27,0.41,
M,id2,0.43,0.3,0.4,0.12,
M,id3,0.98,0.41,0.57,0.34,
F,id1,0.79,0.41,0.36,0.4,20.0
F,id2,0.3,0.78,0.93,0.33,
F,id3,0.95,0.01,0.53,0.3,


#### 각 행의 총합을 마지막 열로 추가

In [155]:
df2[('Row','Sum')]=df2.sum(axis=1)
df2

Unnamed: 0_level_0,cidx1,A,A,B,B,B,Row
Unnamed: 0_level_1,cidx2,C1,C2,C1,C2,c1,Sum
ridx1,ridx2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
M,id1,0.75,0.0,0.27,0.41,,1.43
M,id2,0.43,0.3,0.4,0.12,,1.25
M,id3,0.98,0.41,0.57,0.34,,2.3
F,id1,0.79,0.41,0.36,0.4,20.0,21.96
F,id2,0.3,0.78,0.93,0.33,,2.34
F,id3,0.95,0.01,0.53,0.3,,1.79


#### 각 열의 총합을 마지막 행으로 추가

In [156]:
#행과 열을 모두 지정해야 함
df2.loc[('Co1','Sum'),:]=df2.sum(axis=0)
df2

Unnamed: 0_level_0,cidx1,A,A,B,B,B,Row
Unnamed: 0_level_1,cidx2,C1,C2,C1,C2,c1,Sum
ridx1,ridx2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
M,id1,0.75,0.0,0.27,0.41,,1.43
M,id2,0.43,0.3,0.4,0.12,,1.25
M,id3,0.98,0.41,0.57,0.34,,2.3
F,id1,0.79,0.41,0.36,0.4,20.0,21.96
F,id2,0.3,0.78,0.93,0.33,,2.34
F,id3,0.95,0.01,0.53,0.3,,1.79
Co1,Sum,4.2,1.91,3.06,1.9,20.0,31.07


### 9. 다중인덱스 정렬

### 1) sort_index()

[형식] sort_index(*, axis=0, level=None, ascending=True, inplace=False,)

- 행/열 인덱스 기준으로 정렬
- 기본 정렬 방식 : 오름차순 정렬
- 내림차순 : ascending=Flase 설정

####  행인덱스 정렬

In [157]:
df

Unnamed: 0_level_0,cidx1,A,A,B,B
Unnamed: 0_level_1,cidx2,C1,C2,C1,C2
ridx1,ridx2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
M,id1,0.75,0.0,0.27,0.41
M,id2,0.43,0.3,0.4,0.12
M,id3,0.98,0.41,0.57,0.34
F,id1,0.79,0.41,0.36,0.4
F,id2,0.3,0.78,0.93,0.33
F,id3,0.95,0.01,0.53,0.3


In [158]:
df.sort_index()

Unnamed: 0_level_0,cidx1,A,A,B,B
Unnamed: 0_level_1,cidx2,C1,C2,C1,C2
ridx1,ridx2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
F,id1,0.79,0.41,0.36,0.4
F,id2,0.3,0.78,0.93,0.33
F,id3,0.95,0.01,0.53,0.3
M,id1,0.75,0.0,0.27,0.41
M,id2,0.43,0.3,0.4,0.12
M,id3,0.98,0.41,0.57,0.34


In [160]:
df.sort_index(ascending=False)

Unnamed: 0_level_0,cidx1,A,A,B,B
Unnamed: 0_level_1,cidx2,C1,C2,C1,C2
ridx1,ridx2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
M,id3,0.98,0.41,0.57,0.34
M,id2,0.43,0.3,0.4,0.12
M,id1,0.75,0.0,0.27,0.41
F,id3,0.95,0.01,0.53,0.3
F,id2,0.3,0.78,0.93,0.33
F,id1,0.79,0.41,0.36,0.4


In [161]:
df.sort_index(level=1)

Unnamed: 0_level_0,cidx1,A,A,B,B
Unnamed: 0_level_1,cidx2,C1,C2,C1,C2
ridx1,ridx2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
F,id1,0.79,0.41,0.36,0.4
M,id1,0.75,0.0,0.27,0.41
F,id2,0.3,0.78,0.93,0.33
M,id2,0.43,0.3,0.4,0.12
F,id3,0.95,0.01,0.53,0.3
M,id3,0.98,0.41,0.57,0.34


#### 열인덱스 기준으로 정렬

In [163]:
df.sort_index(axis=1,ascending=False)

Unnamed: 0_level_0,cidx1,B,B,A,A
Unnamed: 0_level_1,cidx2,C2,C1,C2,C1
ridx1,ridx2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
M,id1,0.41,0.27,0.0,0.75
M,id2,0.12,0.4,0.3,0.43
M,id3,0.34,0.57,0.41,0.98
F,id1,0.4,0.36,0.41,0.79
F,id2,0.33,0.93,0.78,0.3
F,id3,0.3,0.53,0.01,0.95


In [164]:
df.sort_index(axis=1,level=1)

Unnamed: 0_level_0,cidx1,A,B,A,B
Unnamed: 0_level_1,cidx2,C1,C1,C2,C2
ridx1,ridx2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
M,id1,0.75,0.27,0.0,0.41
M,id2,0.43,0.4,0.3,0.12
M,id3,0.98,0.57,0.41,0.34
F,id1,0.79,0.36,0.41,0.4
F,id2,0.3,0.93,0.78,0.33
F,id3,0.95,0.53,0.01,0.3


### 2) sort_values()

[형식] sort_values(by, *, axis=0, ascending=True, inplace=False, )

- 특정 컬럼 값을 기준으로 정렬
- by = 특정컬럼
    - 특정컬럼이 다중인덱스 일 경우 컬럼명을 튜플로 전달

#### df의 A.C1 컬럼을 기준으로 정렬

In [166]:
df.sort_values(by=('A','C1'))

Unnamed: 0_level_0,cidx1,A,A,B,B
Unnamed: 0_level_1,cidx2,C1,C2,C1,C2
ridx1,ridx2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
F,id2,0.3,0.78,0.93,0.33
M,id2,0.43,0.3,0.4,0.12
M,id1,0.75,0.0,0.27,0.41
F,id1,0.79,0.41,0.36,0.4
F,id3,0.95,0.01,0.53,0.3
M,id3,0.98,0.41,0.57,0.34


In [168]:
# 정렬 기준 컬럼은 유일해야 한다. 
# df.sort_values(by=('A'))

-----------