# 데이터 프레임의 인덱스 조작

## 인덱스 설정 및 제거

* 데이터프레임에 인덱스로 들어가 있어야 할 데이터가 일반 데이터 열에 들어가 있거나 반대로 일반 데이터 열이어야 할 것이 인덱스로 되어 있을 수 있다. 이 때는 set_index 명령이나 reset_index 명령으로 인덱스와 일반 데이터 열을 교환할 수 있다.

* set_index : 기존의 행 인덱스를 제거하고 데이터 열 중 하나를 인덱스로 설정

* reset_index : 기존의 행 인덱스를 제거하고 인덱스를 데이터 열로 추가

In [1]:
import numpy as np
import pandas as pd

In [2]:
np.random.seed(2021)
df1 = pd.DataFrame(np.vstack([list('ABCDE'),
                              np.round(np.random.rand(3, 5), 2)]).T,
                   columns=["C1", "C2", "C3", "C4"])
df1

Unnamed: 0,C1,C2,C3,C4
0,A,0.61,0.13,0.1
1,B,0.73,0.18,0.06
2,C,0.14,0.75,0.96
3,D,0.31,0.66,0.62
4,E,1.0,0.78,0.09


In [3]:
np.vstack([list('ABCDE'), np.round(np.random.rand(3, 5), 2)])

array([['A', 'B', 'C', 'D', 'E'],
       ['0.56', '0.62', '0.96', '0.57', '0.37'],
       ['0.45', '0.2', '0.57', '0.2', '0.58'],
       ['0.48', '0.52', '0.82', '0.73', '0.07']], dtype='<U32')

In [5]:
df2 = df1.set_index('C1')
df2

Unnamed: 0_level_0,C2,C3,C4
C1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
A,0.61,0.13,0.1
B,0.73,0.18,0.06
C,0.14,0.75,0.96
D,0.31,0.66,0.62
E,1.0,0.78,0.09


In [6]:
df2.reset_index()

Unnamed: 0,C1,C2,C3,C4
0,A,0.61,0.13,0.1
1,B,0.73,0.18,0.06
2,C,0.14,0.75,0.96
3,D,0.31,0.66,0.62
4,E,1.0,0.78,0.09


In [7]:
df2

Unnamed: 0_level_0,C2,C3,C4
C1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
A,0.61,0.13,0.1
B,0.73,0.18,0.06
C,0.14,0.75,0.96
D,0.31,0.66,0.62
E,1.0,0.78,0.09


In [8]:
df2.reset_index(inplace=True)
df2

Unnamed: 0,C1,C2,C3,C4
0,A,0.61,0.13,0.1
1,B,0.73,0.18,0.06
2,C,0.14,0.75,0.96
3,D,0.31,0.66,0.62
4,E,1.0,0.78,0.09


In [9]:
# 연습문제 4.5.1

np.random.seed(2021)
data = np.random.randint(0, 100, size=(5, 3))
df_score1 = pd.DataFrame(data, columns=['국어', '영어', '수학'])
df_score1

Unnamed: 0,국어,영어,수학
0,85,57,0
1,94,86,44
2,62,91,29
3,21,93,24
4,12,70,70


In [20]:
df_score1.index = ['A', 'B', 'C', 'D', 'E']
df_score1

NameError: ignored

In [12]:
df_score1.index = ['A', 'B', 'C', 'D', 'E']
df_score1.index.name = '이름'
df_score1

Unnamed: 0_level_0,국어,영어,수학
이름,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
A,85,57,0
B,94,86,44
C,62,91,29
D,21,93,24
E,12,70,70


In [13]:
df_score2 = df_score1.reset_index()
df_score2

Unnamed: 0,이름,국어,영어,수학
0,A,85,57,0
1,B,94,86,44
2,C,62,91,29
3,D,21,93,24
4,E,12,70,70


In [16]:
# df_score2 = df_score2.set_index('이름')
df_score2.set_index('이름', inplace=True)
df_score2

Unnamed: 0_level_0,국어,영어,수학
이름,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
A,85,57,0
B,94,86,44
C,62,91,29
D,21,93,24
E,12,70,70


## 다중 인덱스

In [19]:
np.random.seed(2021)
df4 = pd.DataFrame(np.round(np.random.randn(6, 4), 2),
                   columns=[["A", "A", "B", "B"],
                            ["C", "D", "C", "D"]],
                   index=[["M", "M", "M", "F", "F", "F"],
                          ["id_" + str(i + 1) for i in range(3)] * 2])
df4.columns.names = ["Cidx1", "Cidx2"]
df4.index.names = ["Ridx1", "Ridx2"]
df4

Unnamed: 0_level_0,Cidx1,A,A,B,B
Unnamed: 0_level_1,Cidx2,C,D,C,D
Ridx1,Ridx2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
M,id_1,1.49,0.68,-0.42,-0.81
M,id_2,0.56,-0.71,1.13,0.65
M,id_3,0.11,0.42,0.12,-0.84
F,id_1,0.41,0.1,-1.91,1.1
F,id_2,-1.4,-0.23,-1.34,0.3
F,id_3,-0.72,2.54,1.32,0.07
