# pandas 데이터분석.pdf
# **Pandas**
- SQL 테이블 또는 Excel 스프레드 시트에서와 같은 열과 행으로 이루어진 테이블 형식 데이터
- 정렬되고 정렬되지 않은 시계열 데이터
- 다른 형태의 관찰 / 통계 데이터 세트

<br>

---
> ## **ailas(별칭)와 버전**
---

In [1]:
import pandas as pd

In [2]:
pd.__version__

'1.5.2'


---
> # **_Series_**
> - 1차원 배열
> - 인덱스 사용 가능
> - **데이터 타입** 가짐(dtype)
---

In [3]:
import numpy as np
arr = np.arange(100,105)
arr

array([100, 101, 102, 103, 104])

#### dtype 지정한 경우

In [4]:
s = pd.Series(arr,dtype ='int32')
s

0    100
1    101
2    102
3    103
4    104
dtype: int32

#### list 지정한 경우

In [5]:
s = pd.Series(['부장','차장','대리','사원','인턴'])
s

0    부장
1    차장
2    대리
3    사원
4    인턴
dtype: object

---
> ### **indexing**
---

In [6]:
s = pd.Series(['손흥민','김연아','박세리','박찬호','김연경'],
              index = ['a','b','c','d','e'])
s

a    손흥민
b    김연아
c    박세리
d    박찬호
e    김연경
dtype: object

In [7]:
s[1]

'김연아'

---
> ### **fancy indexing**
> - index를 선택하여 list로 정의하고, 선택한 index list로 indexing 하는 방법
---

In [8]:
s[['a','c']]

a    손흥민
c    박세리
dtype: object

In [9]:
i = ['a','c']
s[i]

a    손흥민
c    박세리
dtype: object

---
> ### **boolean indexing**
> - index list 에서 **True인 index만 선택**합니다.
> - boolean index list의 개수와 Series의 개수가 맞아야 함
---

In [10]:
s[[True,True,False,False,True]]

a    손흥민
b    김연아
e    김연경
dtype: object

In [11]:
s = pd.Series([29,29,np.nan,11,56], index = ['a','b','c','d','e'])
s

a    29.0
b    29.0
c     NaN
d    11.0
e    56.0
dtype: float64

In [12]:
s > 50

a    False
b    False
c    False
d    False
e     True
dtype: bool

In [13]:
s[s>50]

e    56.0
dtype: float64

---
> ### **결측치 (NaN) 값 처리**
> - isnull()과 isna()는 NaN을 찾는 함수
> - isnull()과 isna()는 결과가 동일
---

In [14]:
s.isnull()

a    False
b    False
c     True
d    False
e    False
dtype: bool

In [15]:
s.isna()

a    False
b    False
c     True
d    False
e    False
dtype: bool

In [16]:
s[s.isnull()]

c   NaN
dtype: float64

In [17]:
s[s.isna()]

c   NaN
dtype: float64

In [18]:
s.notnull()

a     True
b     True
c    False
d     True
e     True
dtype: bool

---
> ### **slicing**
> - (주의) 숫자형 index로 접근할 때는 뒷 index가 포함되지 않음
---

In [19]:
s[1:3]

b    29.0
c     NaN
dtype: float64

In [20]:
s['b':'d']

b    29.0
c     NaN
d    11.0
dtype: float64

---
> # **_DataFrame_**
> - 2차원 데이터 구조
> - 행, 열로 구성
> - 각 열은 각각의 데이터 타입을 가짐
---

In [21]:
import pandas as pd

In [22]:
pd.DataFrame([[1,2,3],
              [4,5,6],
              [7,8,9]])

Unnamed: 0,0,1,2
0,1,2,3
1,4,5,6
2,7,8,9


In [23]:
pd.DataFrame([[1,2,3],
              [4,5,6],
              [7,8,9]], columns=['가','나','다'])

Unnamed: 0,가,나,다
0,1,2,3
1,4,5,6
2,7,8,9


#### **dictionary를 통한 생성**

In [24]:
data = {
    'name' :['Kim','Lee','Park'],
    'age' :[24,27,34],
    'children' : [2,1,3]
}
df = pd.DataFrame(data)
df

Unnamed: 0,name,age,children
0,Kim,24,2
1,Lee,27,1
2,Park,34,3


--- 
> ### **속성**
> - index : 행
> - columns : 열
> - values : numpy array 형식의 데이터 값
> - T : 전치
---

In [25]:
df.index

RangeIndex(start=0, stop=3, step=1)

In [26]:
df.columns

Index(['name', 'age', 'children'], dtype='object')

In [27]:
df.dtypes

name        object
age          int64
children     int64
dtype: object

In [28]:
df.T

Unnamed: 0,0,1,2
name,Kim,Lee,Park
age,24,27,34
children,2,1,3


#### **index 지정**

In [29]:
df

Unnamed: 0,name,age,children
0,Kim,24,2
1,Lee,27,1
2,Park,34,3


In [30]:
df.index = list('abc')
df

Unnamed: 0,name,age,children
a,Kim,24,2
b,Lee,27,1
c,Park,34,3


#### **column 다루기**

In [31]:
df['name']

a     Kim
b     Lee
c    Park
Name: name, dtype: object

In [32]:
type(df['name'])

pandas.core.series.Series

In [33]:
df[['name','children']]

Unnamed: 0,name,children
a,Kim,2
b,Lee,1
c,Park,3


In [34]:
df.rename(columns={'name':'이름'})

Unnamed: 0,이름,age,children
a,Kim,24,2
b,Lee,27,1
c,Park,34,3


In [35]:
df.rename({'name':'이름'},axis =1)

Unnamed: 0,이름,age,children
a,Kim,24,2
b,Lee,27,1
c,Park,34,3


---
> # **_Pandas_**
> - 색인이 다른 객체간의 산술 연산
> - 객체를 더할 때 짝이 맞지 않는 색인이 있으면 결과에 색인이 통합됨
---

In [36]:
import pandas as pd
import numpy as np

In [37]:
s1 = pd.Series([7.3,-2.5,3.4,1.5],index=['a','c','d','e'])
s1

a    7.3
c   -2.5
d    3.4
e    1.5
dtype: float64

In [38]:
s2 = pd.Series([-2.1,3.6,-1.5,4,3.1],index=['a','c','e','f','g'])
s2

a   -2.1
c    3.6
e   -1.5
f    4.0
g    3.1
dtype: float64

In [39]:
s1 + s2

a    5.2
c    1.1
d    NaN
e    0.0
f    NaN
g    NaN
dtype: float64

In [40]:
df1 = pd.DataFrame(np.arange(9.).reshape(3,3),columns=list('bcd'),
                   index=['Ohio','Texas','Colorado'])
df1

Unnamed: 0,b,c,d
Ohio,0.0,1.0,2.0
Texas,3.0,4.0,5.0
Colorado,6.0,7.0,8.0


In [41]:
df2 = pd.DataFrame(np.arange(12.).reshape(4,3),columns=list('bde'),
                   index=['Utah','Ohio','Texas','Oregon'])
df2

Unnamed: 0,b,d,e
Utah,0.0,1.0,2.0
Ohio,3.0,4.0,5.0
Texas,6.0,7.0,8.0
Oregon,9.0,10.0,11.0


In [42]:
df1 + df2

Unnamed: 0,b,c,d,e
Colorado,,,,
Ohio,3.0,,6.0,
Oregon,,,,
Texas,9.0,,12.0,
Utah,,,,


In [43]:
df1.add(df2)

Unnamed: 0,b,c,d,e
Colorado,,,,
Ohio,3.0,,6.0,
Oregon,,,,
Texas,9.0,,12.0,
Utah,,,,


---
> ### **DataFrame과 Series간의 연산**
> #### 1. Series의 색인을 DataFrame의 칼럼에 맞추고 계산
---

In [44]:
frame = pd.DataFrame(np.arange(12.).reshape(4,3),columns=list('bde'),
                   index=['Utah','Ohio','Texas','Oregon'])
print(frame)
series = frame.loc['Utah']
print(series)
frame - series

          b     d     e
Utah    0.0   1.0   2.0
Ohio    3.0   4.0   5.0
Texas   6.0   7.0   8.0
Oregon  9.0  10.0  11.0
b    0.0
d    1.0
e    2.0
Name: Utah, dtype: float64


Unnamed: 0,b,d,e
Utah,0.0,0.0,0.0
Ohio,3.0,3.0,3.0
Texas,6.0,6.0,6.0
Oregon,9.0,9.0,9.0


> #### 2. 색인값이 없다면, 형식을 맞추기 위해 재색인

In [45]:
series2 = pd.Series(range(3),index=['b','e','f'])
print(series2)
frame + series2

b    0
e    1
f    2
dtype: int64


Unnamed: 0,b,d,e,f
Utah,0.0,,3.0,
Ohio,3.0,,6.0,
Texas,6.0,,9.0,
Oregon,9.0,,12.0,


---
> ### **함수 적용과 매핑**

---

In [46]:
frame = pd.DataFrame(np.random.randn(4,3), columns=list('bde'),
                     index = ['Utah','Ohio','Texas','Oregon'])
frame

Unnamed: 0,b,d,e
Utah,-0.297258,1.035234,0.394226
Ohio,0.847369,-0.576185,1.46784
Texas,0.168765,-0.312553,-0.536442
Oregon,0.124916,0.028613,0.860533


In [47]:
np.abs(frame)

Unnamed: 0,b,d,e
Utah,0.297258,1.035234,0.394226
Ohio,0.847369,0.576185,1.46784
Texas,0.168765,0.312553,0.536442
Oregon,0.124916,0.028613,0.860533


#### **apply**
- 각 로우나 칼럼의 1차원 배열에 함수적용
- Series, Df 모두 가능

In [48]:
f = lambda x:x.max() - x.min()
frame.apply(f) #행방향

b    1.144628
d    1.611419
e    2.004282
dtype: float64

In [49]:
frame.apply(f,axis=1) #열방향

Utah      1.332492
Ohio      2.044025
Texas     0.705207
Oregon    0.831920
dtype: float64

In [50]:
def f(x):
    return pd.Series([x.min(),x.max()], index=['min','max'])
frame.apply(f)

Unnamed: 0,b,d,e
min,-0.297258,-0.576185,-0.536442
max,0.847369,1.035234,1.46784


#### **applymap**

In [51]:
format = lambda x: '%.2f' %x
frame.applymap(format)

Unnamed: 0,b,d,e
Utah,-0.3,1.04,0.39
Ohio,0.85,-0.58,1.47
Texas,0.17,-0.31,-0.54
Oregon,0.12,0.03,0.86


---
> # **_데이터 정렬, 순위_**
> - 색인을 기준으로 정렬
> - 값을 기준으로 정렬
> - 특정 로우나 컬럼만 정렬
---

#### **sort_index()**

In [52]:
obj = pd.Series(range(4),index = ['d','a','b','c'])
obj

d    0
a    1
b    2
c    3
dtype: int64

In [53]:
obj.sort_index()

a    1
b    2
c    3
d    0
dtype: int64

In [54]:
frame = pd.DataFrame(np.arange(8).reshape(2,4),index=['three','one'],
                     columns = ['d','a','b','c'])
frame

Unnamed: 0,d,a,b,c
three,0,1,2,3
one,4,5,6,7


In [55]:
frame.sort_index()

Unnamed: 0,d,a,b,c
one,4,5,6,7
three,0,1,2,3


In [56]:
frame.sort_index(axis=1)

Unnamed: 0,a,b,c,d
three,1,2,3,0
one,5,6,7,4


In [57]:
frame.sort_index(axis=1,ascending=False)

Unnamed: 0,d,c,b,a
three,0,3,2,1
one,4,7,6,5


#### **rank**

In [58]:
obj = pd.Series([7,-5,7,4,2,0,4])
obj

0    7
1   -5
2    7
3    4
4    2
5    0
6    4
dtype: int64

In [59]:
obj.rank()

0    6.5
1    1.0
2    6.5
3    4.5
4    3.0
5    2.0
6    4.5
dtype: float64

In [60]:
obj.rank(method='first')

0    6.0
1    1.0
2    7.0
3    4.0
4    3.0
5    2.0
6    5.0
dtype: float64

---
> ### **동점을 가진 데이터의 순위를 정하는 방법**
> - average(평균) : 그룹의 평균순위 부여
>     - 두 명이 공동 1등이라면 둘다 1.5등 처리
> - min(최소값) : 그룹에서 가장 낮은 순위 부여
>     - 두 명이 공동 1등이라면 둘다 1등 처리
> - max(최댓값) : 그룹에서 가장 낮은 순위 부여
>     - 두 명이 공동 1등이라면 둘다 2등 처리
> - first(첫 번째) : 그룹에서 표시되는 순서대로 순위 부여
>     - 두 명이 공동 1등이라면 순서가 빠른 사람을 1등으로
> - dense(밀도) : min과 동일, 순위는 항상 1씩 증가
---

In [61]:
obj.rank(ascending =False,method='max')

0    2.0
1    7.0
2    2.0
3    4.0
4    5.0
5    6.0
6    4.0
dtype: float64

In [62]:
frame = pd.DataFrame({'b':[4.3,7,-3,2],
                      'a':[0,1,0,1],
                      'c':[-2,5,8,-2.5]})
frame

Unnamed: 0,b,a,c
0,4.3,0,-2.0
1,7.0,1,5.0
2,-3.0,0,8.0
3,2.0,1,-2.5


In [63]:
frame.rank()

Unnamed: 0,b,a,c
0,3.0,1.5,2.0
1,4.0,3.5,3.0
2,1.0,1.5,4.0
3,2.0,3.5,1.0


In [64]:
frame.rank(axis=1)

Unnamed: 0,b,a,c
0,3.0,2.0,1.0
1,3.0,1.0,2.0
2,1.0,2.0,3.0
3,3.0,2.0,1.0


#### **중복 색인**

In [65]:
obj = pd.Series(range(5), index=['a','a','b','b','c'])
obj

a    0
a    1
b    2
b    3
c    4
dtype: int64

In [66]:
obj['a']

a    0
a    1
dtype: int64

In [67]:
obj['c']

4

In [68]:
df = pd.DataFrame(np.random.rand(4,3), index=['a','a','b','b'])
df

Unnamed: 0,0,1,2
a,0.281122,0.988615,0.195437
a,0.896894,0.669023,0.159405
b,0.395493,0.050853,0.462716
b,0.316328,0.23783,0.652184


In [69]:
df.loc['b']

Unnamed: 0,0,1,2
b,0.395493,0.050853,0.462716
b,0.316328,0.23783,0.652184


---
> ### **기본 자료 특성 탐색**
> - 자료특성 요약
> - 유일 값, 카운트, 멤버십
---

#### 자료특성 요약

In [70]:
import numpy as np
df = pd.DataFrame([[1.4,np.nan],[7.1,-4.5],
                   [np.nan,np.nan],[0.75,-1.3]],
                  index = ['a','b','c','d'],
                  columns = ['one','two'])
df

Unnamed: 0,one,two
a,1.4,
b,7.1,-4.5
c,,
d,0.75,-1.3


In [71]:
df.describe()

Unnamed: 0,one,two
count,3.0,2.0
mean,3.083333,-2.9
std,3.493685,2.262742
min,0.75,-4.5
25%,1.075,-3.7
50%,1.4,-2.9
75%,4.25,-2.1
max,7.1,-1.3


In [72]:
df.sum()

one    9.25
two   -5.80
dtype: float64

In [73]:
df.mean(axis=0,skipna=True)

one    3.083333
two   -2.900000
dtype: float64

In [74]:
df.sum(axis=1)

a    1.40
b    2.60
c    0.00
d   -0.55
dtype: float64

In [75]:
df.mean(axis=1,skipna=False)

a      NaN
b    1.300
c      NaN
d   -0.275
dtype: float64

In [76]:
df.mean(axis=1,skipna=True)

a    1.400
b    1.300
c      NaN
d   -0.275
dtype: float64

In [77]:
df.idxmax()

one    b
two    d
dtype: object

##### 수치 데이터가 아니면?

In [78]:
obj = pd.Series(['a','a','b','c']*4)
obj

0     a
1     a
2     b
3     c
4     a
5     a
6     b
7     c
8     a
9     a
10    b
11    c
12    a
13    a
14    b
15    c
dtype: object

In [79]:
obj.describe()

count     16
unique     3
top        a
freq       8
dtype: object

#### 유일 값, 카운트, 멤버십

In [80]:
obj = pd.Series(['c','a','d','a','a','b','b','c','c'])
obj

0    c
1    a
2    d
3    a
4    a
5    b
6    b
7    c
8    c
dtype: object

In [81]:
uniques = obj.unique()
uniques

array(['c', 'a', 'd', 'b'], dtype=object)

In [82]:
obj.value_counts()

c    3
a    3
b    2
d    1
dtype: int64

In [83]:
pd.value_counts(obj.values, sort =True)

c    3
a    3
b    2
d    1
dtype: int64

In [84]:
obj

0    c
1    a
2    d
3    a
4    a
5    b
6    b
7    c
8    c
dtype: object

In [85]:
mask = obj.isin(['b','c'])
mask

0     True
1    False
2    False
3    False
4    False
5     True
6     True
7     True
8     True
dtype: bool

---
> # **_누락값 처리_**
> - 누락값 찾기 : isnull
> - 누락값 제거 : dropna
> - 누락값 채우기 : fillna
---

---
> ### **누락값 찾기 : isnull**
> - pandas는 누락된 데이터를 모두 NaN으로 취급
---

In [86]:
string_data = pd.Series(['aardvark','artichoke',np.nan,'avocado'])
string_data

0     aardvark
1    artichoke
2          NaN
3      avocado
dtype: object

In [87]:
string_data.isnull()

0    False
1    False
2     True
3    False
dtype: bool

In [88]:
string_data[0] =None
string_data

0         None
1    artichoke
2          NaN
3      avocado
dtype: object

In [89]:
string_data.isnull()

0     True
1    False
2     True
3    False
dtype: bool

---
> ### **누락값 제거 : dropna**
---

In [90]:
data = pd.Series([1,np.nan,3.5,np.nan,7])
data

0    1.0
1    NaN
2    3.5
3    NaN
4    7.0
dtype: float64

In [91]:
data.dropna()

0    1.0
2    3.5
4    7.0
dtype: float64

In [92]:
data = pd.DataFrame([[1.,7.5,3.],[1.,np.nan,np.nan],
                     [np.nan,np.nan,np.nan],[np.nan,9,2]])
data

Unnamed: 0,0,1,2
0,1.0,7.5,3.0
1,1.0,,
2,,,
3,,9.0,2.0


In [93]:
cleaned = data.dropna()
cleaned

Unnamed: 0,0,1,2
0,1.0,7.5,3.0


In [94]:
data.dropna(how='all')

Unnamed: 0,0,1,2
0,1.0,7.5,3.0
1,1.0,,
3,,9.0,2.0


In [95]:
data.dropna(thresh=2)

Unnamed: 0,0,1,2
0,1.0,7.5,3.0
3,,9.0,2.0


---
> ### **누락값 채우기 : fillna**
---

In [96]:
df = pd.DataFrame(np.random.randn(7,3))
df.loc[:4,1] = np.nan
df.loc[:2,2] = np.nan
df

Unnamed: 0,0,1,2
0,-0.097827,,
1,0.378866,,
2,0.075183,,
3,-0.318241,,-0.005219
4,0.010806,,0.129015
5,0.750739,0.114358,-0.102532
6,1.153261,-0.134278,0.509729


In [97]:
df.fillna(0)

Unnamed: 0,0,1,2
0,-0.097827,0.0,0.0
1,0.378866,0.0,0.0
2,0.075183,0.0,0.0
3,-0.318241,0.0,-0.005219
4,0.010806,0.0,0.129015
5,0.750739,0.114358,-0.102532
6,1.153261,-0.134278,0.509729


In [98]:
df.fillna({1:0.5,2:-1})

Unnamed: 0,0,1,2
0,-0.097827,0.5,-1.0
1,0.378866,0.5,-1.0
2,0.075183,0.5,-1.0
3,-0.318241,0.5,-0.005219
4,0.010806,0.5,0.129015
5,0.750739,0.114358,-0.102532
6,1.153261,-0.134278,0.509729


In [99]:
df =pd.DataFrame(np.random.randn(6,3))
df.loc[2:,1] = np.nan
df.loc[4:,2] = np.nan
df

Unnamed: 0,0,1,2
0,0.567532,0.101723,-0.534234
1,1.460447,0.014847,-0.800111
2,-0.241112,,-0.148786
3,1.831397,,-1.340711
4,-0.428947,,
5,1.461781,,


In [100]:
df.fillna(method='ffill')

Unnamed: 0,0,1,2
0,0.567532,0.101723,-0.534234
1,1.460447,0.014847,-0.800111
2,-0.241112,0.014847,-0.148786
3,1.831397,0.014847,-1.340711
4,-0.428947,0.014847,-1.340711
5,1.461781,0.014847,-1.340711


In [101]:
df.fillna(df.mean(0))

Unnamed: 0,0,1,2
0,0.567532,0.101723,-0.534234
1,1.460447,0.014847,-0.800111
2,-0.241112,0.058285,-0.148786
3,1.831397,0.058285,-1.340711
4,-0.428947,0.058285,-0.70596
5,1.461781,0.058285,-0.70596


---
> # **_계층적 색인_**
---

In [102]:
data =pd.Series(np.random.randn(10),
                index= [['a','a','a','b','b','b','c','c','d','d'],
                        [1,2,3,1,2,3,1,2,2,3]])
data

a  1    0.017808
   2   -1.033680
   3    0.035477
b  1   -0.641870
   2   -0.214899
   3    0.232527
c  1    0.836396
   2    1.038473
d  2    1.312113
   3   -1.523416
dtype: float64

In [103]:
data.index

MultiIndex([('a', 1),
            ('a', 2),
            ('a', 3),
            ('b', 1),
            ('b', 2),
            ('b', 3),
            ('c', 1),
            ('c', 2),
            ('d', 2),
            ('d', 3)],
           )

In [104]:
data['b']

1   -0.641870
2   -0.214899
3    0.232527
dtype: float64

In [105]:
data['b':'c']

b  1   -0.641870
   2   -0.214899
   3    0.232527
c  1    0.836396
   2    1.038473
dtype: float64

In [106]:
data.unstack()

Unnamed: 0,1,2,3
a,0.017808,-1.03368,0.035477
b,-0.64187,-0.214899,0.232527
c,0.836396,1.038473,
d,,1.312113,-1.523416


In [107]:
data.unstack().stack()

a  1    0.017808
   2   -1.033680
   3    0.035477
b  1   -0.641870
   2   -0.214899
   3    0.232527
c  1    0.836396
   2    1.038473
d  2    1.312113
   3   -1.523416
dtype: float64

In [108]:
frame = pd.DataFrame(np.arange(12).reshape(4,3),
                     index =[['a','a','b','b'],[1,2,1,2]],
                     columns=[['Ohio','Ohio','Colorado'],
                              ['Green','Red','Green']])
frame

Unnamed: 0_level_0,Unnamed: 1_level_0,Ohio,Ohio,Colorado
Unnamed: 0_level_1,Unnamed: 1_level_1,Green,Red,Green
a,1,0,1,2
a,2,3,4,5
b,1,6,7,8
b,2,9,10,11


In [109]:
frame.index.names = ['key1','key2']
frame

Unnamed: 0_level_0,Unnamed: 1_level_0,Ohio,Ohio,Colorado
Unnamed: 0_level_1,Unnamed: 1_level_1,Green,Red,Green
key1,key2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,1,0,1,2
a,2,3,4,5
b,1,6,7,8
b,2,9,10,11


In [110]:
frame.columns.names=['state','color']
frame

Unnamed: 0_level_0,state,Ohio,Ohio,Colorado
Unnamed: 0_level_1,color,Green,Red,Green
key1,key2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,1,0,1,2
a,2,3,4,5
b,1,6,7,8
b,2,9,10,11


In [111]:
frame['Ohio']

Unnamed: 0_level_0,color,Green,Red
key1,key2,Unnamed: 2_level_1,Unnamed: 3_level_1
a,1,0,1
a,2,3,4
b,1,6,7
b,2,9,10


In [112]:
frame['Ohio'].loc['a']

color,Green,Red
key2,Unnamed: 1_level_1,Unnamed: 2_level_1
1,0,1
2,3,4


In [113]:
frame.groupby(level='key2').sum()

state,Ohio,Ohio,Colorado
color,Green,Red,Green
key2,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
1,6,8,10
2,12,14,16


In [114]:
frame.groupby(level='color',axis=1).sum()

Unnamed: 0_level_0,color,Green,Red
key1,key2,Unnamed: 2_level_1,Unnamed: 3_level_1
a,1,2,1
a,2,8,4
b,1,14,7
b,2,20,10


---
> ### **로우 색인과 컬럼 색인 교환**
---

In [115]:
frame = pd.DataFrame({'a' :range(7),
                      'b' : range(7,0,-1),
                      'c' : ['one','one','one','two','two','two','two'],
                      'd' : [0,1,2,0,1,2,3]})
frame

Unnamed: 0,a,b,c,d
0,0,7,one,0
1,1,6,one,1
2,2,5,one,2
3,3,4,two,0
4,4,3,two,1
5,5,2,two,2
6,6,1,two,3


In [116]:
frame2 = frame.set_index(['c','d'])
frame2

Unnamed: 0_level_0,Unnamed: 1_level_0,a,b
c,d,Unnamed: 2_level_1,Unnamed: 3_level_1
one,0,0,7
one,1,1,6
one,2,2,5
two,0,3,4
two,1,4,3
two,2,5,2
two,3,6,1


In [117]:
frame2.reset_index()

Unnamed: 0,c,d,a,b
0,one,0,0,7
1,one,1,1,6
2,one,2,2,5
3,two,0,3,4
4,two,1,4,3
5,two,2,5,2
6,two,3,6,1
