## Pandas 인덱서

- loc : label 기반 복수 인덱싱
- iloc : 숫자 기반 복수 인덱싱

In [1]:
import pandas as pd
import numpy as np

### loc 인덱서
loc 인덱서는 아래와 같은 조건에서 row 인덱싱 또는 column 인덱싱을 할 수 있다.
- 정수 인덱스가 아닌 label 값(정수로 인덱스를 직접 지정한 경우는 예외)
- label 값의 List
- label 값의 slicing
- boolean list, 1차원 array, Series

In [2]:
df = pd.DataFrame(np.arange(10, 22).reshape(3, 4), index=['a', 'b', 'c'], columns=['A', 'B', 'C', 'D'])
df

Unnamed: 0,A,B,C,D
a,10,11,12,13
b,14,15,16,17
c,18,19,20,21


In [3]:
df.loc['a', 'A']

10

In [4]:
df.loc['b':, 'A']

b    14
c    18
Name: A, dtype: int64

In [5]:
df.loc[['a', 'b'], ['B', 'D']]

Unnamed: 0,B,D
a,11,13
b,15,17


In [6]:
df.loc['a':, 'B':]

Unnamed: 0,B,C,D
a,11,12,13
b,15,16,17
c,19,20,21


In [7]:
df.loc[df.A > 10, :]    # 첫 열이 10보다 큰 행만 출력

Unnamed: 0,A,B,C,D
b,14,15,16,17
c,18,19,20,21


In [8]:
df.A

a    10
b    14
c    18
Name: A, dtype: int64

In [9]:
print(type(df.loc['a', :]))
print(type(df[:1]))

<class 'pandas.core.series.Series'>
<class 'pandas.core.frame.DataFrame'>


In [10]:
df.loc['e']  = [90, 91, 92, 93]
df

Unnamed: 0,A,B,C,D
a,10,11,12,13
b,14,15,16,17
c,18,19,20,21
e,90,91,92,93


### iloc 인덱서
정수 인덱스만 사용할 수 있다.

In [11]:
df.iloc[0, 1]

11

In [12]:
df.iloc[:2, 2]

a    12
b    16
Name: C, dtype: int64

In [13]:
df.iloc[0, -2:]

C    12
D    13
Name: a, dtype: int64

In [14]:
df.iloc[2:3, 1:3]

Unnamed: 0,B,C
c,19,20


In [15]:
df.iloc[-1]

A    90
B    91
C    92
D    93
Name: e, dtype: int64

### DataFrame 인덱스 설정 및 제거
- set_index : 기존의 row 인덱스를 제거하고 데이터 column 중 하나를 이덱스로 설정
- reset_index : 기존의 row 인덱서를 제거하고 인덱서를 마지막 데이터로 column 으로 추가

#### column 다중 인덱스

In [16]:
np.random.seed(0)
data = pd.DataFrame(np.random.randint(1, 10, (10, 4)), columns=['c1', 'c2', 'c3', 'c4'])
data

Unnamed: 0,c1,c2,c3,c4
0,6,1,4,4
1,8,4,6,3
2,5,8,7,9
3,9,2,7,8
4,8,9,2,6
5,9,5,4,1
6,4,6,1,3
7,4,9,2,4
8,4,4,8,1
9,2,1,5,8


In [17]:
data2 = data.set_index('c1')
data2

Unnamed: 0_level_0,c2,c3,c4
c1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
6,1,4,4
8,4,6,3
5,8,7,9
9,2,7,8
8,9,2,6
9,5,4,1
4,6,1,3
4,9,2,4
4,4,8,1
2,1,5,8


In [18]:
data2.reset_index()

Unnamed: 0,c1,c2,c3,c4
0,6,1,4,4
1,8,4,6,3
2,5,8,7,9
3,9,2,7,8
4,8,9,2,6
5,9,5,4,1
6,4,6,1,3
7,4,9,2,4
8,4,4,8,1
9,2,1,5,8


In [19]:
data2.reset_index(drop=True)

Unnamed: 0,c2,c3,c4
0,1,4,4
1,4,6,3
2,8,7,9
3,2,7,8
4,9,2,6
5,5,4,1
6,6,1,3
7,9,2,4
8,4,8,1
9,1,5,8


In [20]:
multiIdx = pd.DataFrame(np.random.randint(1, 10, (10, 4)), columns=[['A', 'A', 'B', 'B'], ['c1', 'c2', 'c3', 'c4']])
multiIdx

Unnamed: 0_level_0,A,A,B,B
Unnamed: 0_level_1,c1,c2,c3,c4
0,4,3,8,3
1,1,1,5,6
2,6,7,9,5
3,2,5,9,2
4,2,8,4,7
5,8,3,1,4
6,6,5,5,7
7,5,5,4,5
8,5,9,5,4
9,8,6,6,1


In [21]:
multiIdx.columns.names = ['Cdx1', 'Cdx2']
multiIdx

Cdx1,A,A,B,B
Cdx2,c1,c2,c3,c4
0,4,3,8,3
1,1,1,5,6
2,6,7,9,5
3,2,5,9,2
4,2,8,4,7
5,8,3,1,4
6,6,5,5,7
7,5,5,4,5
8,5,9,5,4
9,8,6,6,1


#### row 다중 인덱스

In [22]:
np.random.seed(0)
multiIdx2 = pd.DataFrame(np.random.randint(1, 10, (8, 4)), 
                  columns=[["A", "A", "B", "B"], ["C", "D", "C", "D"]],
                  index=[["M", "M", "M", "M", "F", "F", "F", "F"], ["ID" + str(i) for i in range(4)] * 2])
multiIdx2.columns.names = ["Cdx1", "Cdx2"]
multiIdx2.index.names = ["Rdx1", "Rdx2"]
multiIdx2

Unnamed: 0_level_0,Cdx1,A,A,B,B
Unnamed: 0_level_1,Cdx2,C,D,C,D
Rdx1,Rdx2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
M,ID0,6,1,4,4
M,ID1,8,4,6,3
M,ID2,5,8,7,9
M,ID3,9,2,7,8
F,ID0,8,9,2,6
F,ID1,9,5,4,1
F,ID2,4,6,1,3
F,ID3,4,9,2,4


### row index와 col index 교환
- stack : col index 를 row index 로 변환한다.
- unstack : row index 를 col index 로 변환한다.

In [23]:
multiIdx2.stack('Cdx1')

Unnamed: 0_level_0,Unnamed: 1_level_0,Cdx2,C,D
Rdx1,Rdx2,Cdx1,Unnamed: 3_level_1,Unnamed: 4_level_1
M,ID0,A,6,1
M,ID0,B,4,4
M,ID1,A,8,4
M,ID1,B,6,3
M,ID2,A,5,8
M,ID2,B,7,9
M,ID3,A,9,2
M,ID3,B,7,8
F,ID0,A,8,9
F,ID0,B,2,6


In [24]:
multiIdx2.stack(0)

Unnamed: 0_level_0,Unnamed: 1_level_0,Cdx2,C,D
Rdx1,Rdx2,Cdx1,Unnamed: 3_level_1,Unnamed: 4_level_1
M,ID0,A,6,1
M,ID0,B,4,4
M,ID1,A,8,4
M,ID1,B,6,3
M,ID2,A,5,8
M,ID2,B,7,9
M,ID3,A,9,2
M,ID3,B,7,8
F,ID0,A,8,9
F,ID0,B,2,6


In [25]:
multiIdx2.unstack('Rdx2')

Cdx1,A,A,A,A,A,A,A,A,B,B,B,B,B,B,B,B
Cdx2,C,C,C,C,D,D,D,D,C,C,C,C,D,D,D,D
Rdx2,ID0,ID1,ID2,ID3,ID0,ID1,ID2,ID3,ID0,ID1,ID2,ID3,ID0,ID1,ID2,ID3
Rdx1,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3
F,8,9,4,4,9,5,6,9,2,4,1,2,6,1,3,4
M,6,8,5,9,1,4,8,2,4,6,7,7,4,3,9,8


In [26]:
multiIdx2.unstack(1)

Cdx1,A,A,A,A,A,A,A,A,B,B,B,B,B,B,B,B
Cdx2,C,C,C,C,D,D,D,D,C,C,C,C,D,D,D,D
Rdx2,ID0,ID1,ID2,ID3,ID0,ID1,ID2,ID3,ID0,ID1,ID2,ID3,ID0,ID1,ID2,ID3
Rdx1,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3
F,8,9,4,4,9,5,6,9,2,4,1,2,6,1,3,4
M,6,8,5,9,1,4,8,2,4,6,7,7,4,3,9,8


### 다중 인덱스에서 인덱싱
다중인덱스를 가진 DataFrame에서 인덱싱을 할 경우, tuple로 인덱싱을 해야한다. 만약, 단일 label 값을 넣으면 다중 인덱스들 중에서 최상위 값을 지정한 것으로 본다.

In [27]:
multiIdx

Cdx1,A,A,B,B
Cdx2,c1,c2,c3,c4
0,4,3,8,3
1,1,1,5,6
2,6,7,9,5
3,2,5,9,2
4,2,8,4,7
5,8,3,1,4
6,6,5,5,7
7,5,5,4,5
8,5,9,5,4
9,8,6,6,1


In [28]:
multiIdx['A']

Cdx2,c1,c2
0,4,3
1,1,1
2,6,7
3,2,5
4,2,8
5,8,3
6,6,5
7,5,5
8,5,9
9,8,6


In [29]:
multiIdx[('A', 'c1')]

0    4
1    1
2    6
3    2
4    2
5    8
6    6
7    5
8    5
9    8
Name: (A, c1), dtype: int64

In [30]:
multiIdx

Cdx1,A,A,B,B
Cdx2,c1,c2,c3,c4
0,4,3,8,3
1,1,1,5,6
2,6,7,9,5
3,2,5,9,2
4,2,8,4,7
5,8,3,1,4
6,6,5,5,7
7,5,5,4,5
8,5,9,5,4
9,8,6,6,1


In [31]:
multiIdx.loc[2, ["A", 'c1']]

Cdx1  Cdx2
A     c1      6
      c2      7
Name: 2, dtype: int64

### 다중 인덱스 순서교환
- swaplevel(i, j, axis)
- axis=0 이면 row를, axis=1이면 col 인덱스를 교환한다.

In [32]:
multiIdx2

Unnamed: 0_level_0,Cdx1,A,A,B,B
Unnamed: 0_level_1,Cdx2,C,D,C,D
Rdx1,Rdx2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
M,ID0,6,1,4,4
M,ID1,8,4,6,3
M,ID2,5,8,7,9
M,ID3,9,2,7,8
F,ID0,8,9,2,6
F,ID1,9,5,4,1
F,ID2,4,6,1,3
F,ID3,4,9,2,4


In [33]:
multiIdx2.swaplevel("Rdx1", "Rdx2", axis=0)

Unnamed: 0_level_0,Cdx1,A,A,B,B
Unnamed: 0_level_1,Cdx2,C,D,C,D
Rdx2,Rdx1,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
ID0,M,6,1,4,4
ID1,M,8,4,6,3
ID2,M,5,8,7,9
ID3,M,9,2,7,8
ID0,F,8,9,2,6
ID1,F,9,5,4,1
ID2,F,4,6,1,3
ID3,F,4,9,2,4


In [34]:
multiIdx2.swaplevel("Cdx1", "Cdx2", axis=1)

Unnamed: 0_level_0,Cdx2,C,D,C,D
Unnamed: 0_level_1,Cdx1,A,A,B,B
Rdx1,Rdx2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
M,ID0,6,1,4,4
M,ID1,8,4,6,3
M,ID2,5,8,7,9
M,ID3,9,2,7,8
F,ID0,8,9,2,6
F,ID1,9,5,4,1
F,ID2,4,6,1,3
F,ID3,4,9,2,4
