In [9]:
# 라이브러리 불러오기
import pandas as pd
import seaborn as sns

# titanic 데이터셋에서 5개 열을 선택하여 데이터프레임 만들기
titanic = sns.load_dataset('titanic')
df = titanic.loc[:, ['age', 'sex', 'class', 'fare', 'survived']]

# class 열을 기준으로 분할
group = df.groupby(['class', 'sex'], observed=True)

# 그룹 객체에 연산 메서드 적용
gdf = group.agg(['mean', 'std'], numeric_only=True)
gdf

Unnamed: 0_level_0,Unnamed: 1_level_0,age,age,fare,fare,survived,survived
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,mean,std,mean,std
class,sex,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
First,female,34.611765,13.612052,106.125798,74.259988,0.968085,0.176716
First,male,41.281386,15.13957,67.226127,77.548021,0.368852,0.484484
Second,female,28.722973,12.872702,21.970121,10.891796,0.921053,0.271448
Second,male,30.740707,14.793894,19.741782,14.922235,0.157407,0.365882
Third,female,21.75,12.729964,16.11881,11.690314,0.5,0.501745
Third,male,26.507589,12.159514,12.661633,11.681696,0.135447,0.342694


In [10]:
# 인덱스 속성 확인하기
gdf.index

MultiIndex([( 'First', 'female'),
            ( 'First',   'male'),
            ('Second', 'female'),
            ('Second',   'male'),
            ( 'Third', 'female'),
            ( 'Third',   'male')],
           names=['class', 'sex'])

In [18]:
# 멀티 인덱스 만들기 - 리스트 배열 이용
arrays = [['a', 'a', 'b', 'b'], [1, 2, 1, 2]]
multi_index_arrays = pd.MultiIndex.from_arrays(arrays, names=('letter', 'number'))
print(multi_index_arrays)

# 멀티 인덱스 만들기 - 튜플 배열 이용
tuples = [('a', 1), ('a', 2), ('b', 1), ('b', 2)]
multi_index_tuples = pd.MultiIndex.from_tuples(tuples, names=('letter', 'number'))
print(multi_index_tuples)

# 멀티 인덱스 만들기 - 교차 반복객체 이용
letters = ['a', 'b']
numbers = [1, 2]
multi_index_product = pd.MultiIndex.from_product([letters, numbers], names=('letters', 'numbers'))
print(multi_index_product)

# 멀티 인덱스 만들기 - 데이터프레임 이용
df = pd.DataFrame([['a', 1], ['a', 2], ['b', 1], ['b', 2]], columns=['letter', 'number'])
multi_index_frame = pd.MultiIndex.from_frame(df)
print(multi_index_frame)

MultiIndex([('a', 1),
            ('a', 2),
            ('b', 1),
            ('b', 2)],
           names=['letter', 'number'])
MultiIndex([('a', 1),
            ('a', 2),
            ('b', 1),
            ('b', 2)],
           names=['letter', 'number'])
MultiIndex([('a', 1),
            ('a', 2),
            ('b', 1),
            ('b', 2)],
           names=['letters', 'numbers'])
MultiIndex([('a', 1),
            ('a', 2),
            ('b', 1),
            ('b', 2)],
           names=['letter', 'number'])


In [29]:
# 멀티인덱스 특정 라벨 추출
index_int = multi_index_frame.get_level_values(0)
print(index_int)

# 멀티인덱스 특정 라벨 추출
index_label = multi_index_frame.get_level_values('letter')
print(index_label)

# 열 이름의 멀티인덱스 확인
gdf.columns.levels

# 라벨을 지정하여 열 인덱스 추출_0번째 인덱스
index_label_2 = gdf.columns.get_level_values(0)
print(gdf)
print(index_label_2)

# 라벨을 지정하여 열 인덱스 추출_1번째 인덱스
index_label_3 = gdf.columns.get_level_values(1)
print(index_label_3)

Index(['a', 'a', 'b', 'b'], dtype='object', name='letter')
Index(['a', 'a', 'b', 'b'], dtype='object', name='letter')
                     age                   fare             survived          
                    mean        std        mean        std      mean       std
class  sex                                                                    
First  female  34.611765  13.612052  106.125798  74.259988  0.968085  0.176716
       male    41.281386  15.139570   67.226127  77.548021  0.368852  0.484484
Second female  28.722973  12.872702   21.970121  10.891796  0.921053  0.271448
       male    30.740707  14.793894   19.741782  14.922235  0.157407  0.365882
Third  female  21.750000  12.729964   16.118810  11.690314  0.500000  0.501745
       male    26.507589  12.159514   12.661633  11.681696  0.135447  0.342694
Index(['age', 'age', 'fare', 'fare', 'survived', 'survived'], dtype='object')
Index(['mean', 'std', 'mean', 'std', 'mean', 'std'], dtype='object')


In [33]:
# age 변수의 데이터 선택
df_age = gdf['age']
print(df_age)

# age 변수의 평균값 데이터 선택
df_age_mean = gdf['age', 'mean']
print(df_age_mean)

# age 변수를 먼저 선택하고, 다시 평균값 데이터 선택
df_age_mean_2 = gdf['age']['mean']
print(df_age_mean_2)


                    mean        std
class  sex                         
First  female  34.611765  13.612052
       male    41.281386  15.139570
Second female  28.722973  12.872702
       male    30.740707  14.793894
Third  female  21.750000  12.729964
       male    26.507589  12.159514
class   sex   
First   female    34.611765
        male      41.281386
Second  female    28.722973
        male      30.740707
Third   female    21.750000
        male      26.507589
Name: (age, mean), dtype: float64
class   sex   
First   female    34.611765
        male      41.281386
Second  female    28.722973
        male      30.740707
Third   female    21.750000
        male      26.507589
Name: mean, dtype: float64


In [41]:
# class 값이 Fist인 행 선택
class_first = gdf.loc['First']
print(class_first)

# class 값이 First이고, sex 값이 female 행 선택
gdf.loc[('First', 'female')]

# class 값이 First이고, sex 값이 female인 행의 'age' 열 선택
gdf.loc[('First', 'female'), 'age']

# class 값이 First이고, sex 값이 female인 행의 'age' 열의 'mean' 선택
gdf.loc[('First', 'female'), ('age', 'mean')]

# First이고 Female인 행의 'age' 열의 'std'부터 'fare'열의 'mean'열까지 범위 선택
gdf.loc[('First', 'female'), ('age', 'std'):('fare', 'mean')]

              age                   fare             survived          
             mean        std        mean        std      mean       std
sex                                                                    
female  34.611765  13.612052  106.125798  74.259988  0.968085  0.176716
male    41.281386  15.139570   67.226127  77.548021  0.368852  0.484484


age   std      13.612052
fare  mean    106.125798
Name: (First, female), dtype: float64

In [42]:
# 특정 레벨에서 cross - section 이용 - sex 값이 male인 데이터 선택
sex_male = gdf.xs('male', level='sex') 
sex_male

Unnamed: 0_level_0,age,age,fare,fare,survived,survived
Unnamed: 0_level_1,mean,std,mean,std,mean,std
class,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
First,41.281386,15.13957,67.226127,77.548021,0.368852,0.484484
Second,30.740707,14.793894,19.741782,14.922235,0.157407,0.365882
Third,26.507589,12.159514,12.661633,11.681696,0.135447,0.342694


In [48]:
# 멀티인덱스 정렬_int
sort_asc = gdf.sort_index(level=0, ascending=False)
print(sort_asc)

# 멀티인덱스 정렬_sex순서만 고려
sort_dec = gdf.sort_index(level='sex', ascending=False)
print(sort_dec)

# 멀티인덱스 정렬_sex를 먼저 정렬한 뒤, class 오름차순 정렬
sort_2 = gdf.sort_index(level=['sex', 'class'], ascending=[False, True])
print(sort_2)

                     age                   fare             survived          
                    mean        std        mean        std      mean       std
class  sex                                                                    
Third  male    26.507589  12.159514   12.661633  11.681696  0.135447  0.342694
       female  21.750000  12.729964   16.118810  11.690314  0.500000  0.501745
Second male    30.740707  14.793894   19.741782  14.922235  0.157407  0.365882
       female  28.722973  12.872702   21.970121  10.891796  0.921053  0.271448
First  male    41.281386  15.139570   67.226127  77.548021  0.368852  0.484484
       female  34.611765  13.612052  106.125798  74.259988  0.968085  0.176716
                     age                   fare             survived          
                    mean        std        mean        std      mean       std
class  sex                                                                    
Third  male    26.507589  12.159514   12.661633  11.