In [1]:
import numpy as np
import pandas as pd


In [6]:
index = [('California', 2000), ('California', 2010),
         ('New York', 2000), ('New York', 2010),
         ('Texas', 2000), ('Texas', 2010)]
populations = [33871648, 37253956,
               18976457, 19378102,
               20851820, 25145561]


In [7]:
index = pd.MultiIndex.from_tuples(index)
index

MultiIndex(levels=[['California', 'New York', 'Texas'], [2000, 2010]],
           codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]])

In [9]:
pop = pd.Series(populations, index=index)
pop


California  2000    33871648
            2010    37253956
New York    2000    18976457
            2010    19378102
Texas       2000    20851820
            2010    25145561
dtype: int64

In [19]:
pop[:,2000]

California    33871648
New York      18976457
Texas         20851820
dtype: int64

In [20]:
pop_df = pop.unstack()
pop_df

Unnamed: 0,2000,2010
California,33871648,37253956
New York,18976457,19378102
Texas,20851820,25145561


In [22]:
pop_df.stack()

California  2000    33871648
            2010    37253956
New York    2000    18976457
            2010    19378102
Texas       2000    20851820
            2010    25145561
dtype: int64

In [23]:
pop_df = pd.DataFrame({'total': pop, 'under18':[9267895, 3894032, 8320404,
                                               4830249, 823942, 23984]})
pop_df

Unnamed: 0,Unnamed: 1,total,under18
California,2000,33871648,9267895
California,2010,37253956,3894032
New York,2000,18976457,8320404
New York,2010,19378102,4830249
Texas,2000,20851820,823942
Texas,2010,25145561,23984


In [27]:
pop_df

Unnamed: 0,Unnamed: 1,total,under18
California,2000,33871648,9267895
California,2010,37253956,3894032
New York,2000,18976457,8320404
New York,2010,19378102,4830249
Texas,2000,20851820,823942
Texas,2010,25145561,23984


In [29]:
f_u18 = pop_df['under18']/pop_df['total']
f_u18.unstack()

Unnamed: 0,2000,2010
California,0.273618,0.104527
New York,0.438459,0.249263
Texas,0.039514,0.000954


In [31]:
df = pd.DataFrame(np.random.rand(4,2),
                 index = [['a','a','b','b'], [1,2,1,2]],
                 columns = ['data1', 'data2'])
df

Unnamed: 0,Unnamed: 1,data1,data2
a,1,0.206458,0.571364
a,2,0.792587,0.666301
b,1,0.158488,0.734258
b,2,0.377267,0.11524


In [32]:
data = {('California', 2000): 33871648,
        ('California', 2010): 37253956,
        ('Texas', 2000): 20851820,
        ('Texas', 2010): 25145561,
        ('New York', 2000): 18976457,
        ('New York', 2010): 19378102}
pd.Series(data)

California  2000    33871648
            2010    37253956
Texas       2000    20851820
            2010    25145561
New York    2000    18976457
            2010    19378102
dtype: int64

In [33]:
# hierarchical indices and columns
#Очень удобно использовать такое создание, через multiIndex Через произведение
index = pd.MultiIndex.from_product([[2013, 2014], [1, 2]],
                                   names=['year', 'visit'])
columns = pd.MultiIndex.from_product([['Bob', 'Guido', 'Sue'], ['HR', 'Temp']],
                                     names=['subject', 'type'])

# mock some data
data = np.round(np.random.randn(4, 6), 1)
data[:, ::2] *= 10
data += 37

# create the DataFrame
health_data = pd.DataFrame(data, index=index, columns=columns)
health_data

Unnamed: 0_level_0,subject,Bob,Bob,Guido,Guido,Sue,Sue
Unnamed: 0_level_1,type,HR,Temp,HR,Temp,HR,Temp
year,visit,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
2013,1,28.0,36.7,46.0,37.4,40.0,38.7
2013,2,49.0,36.3,34.0,38.1,50.0,37.1
2014,1,22.0,36.3,48.0,37.3,44.0,36.6
2014,2,35.0,37.6,35.0,36.4,49.0,36.9


In [40]:
pop


California  2000    33871648
            2010    37253956
New York    2000    18976457
            2010    19378102
Texas       2000    20851820
            2010    25145561
dtype: int64

In [43]:
index = pd.MultiIndex.from_product([['a','c', 'b'], [1,2]])
data = pd.Series(np.random.rand(6), index = index)
data

a  1    0.150915
   2    0.653355
c  1    0.734112
   2    0.515803
b  1    0.177978
   2    0.731650
dtype: float64

In [147]:
data = np.random.rand(10,3)
columns = ['first_col', 'second_col', 'third_col']
index = pd.MultiIndex.from_product([['Sasha', 'Pavel'], list('abcde')])
df = pd.DataFrame(data,
                 index=index,
                 columns=columns)
df

Unnamed: 0,Unnamed: 1,first_col,second_col,third_col
Sasha,a,0.350702,0.365052,0.909595
Sasha,b,0.016435,0.156685,0.584669
Sasha,c,0.570024,0.430217,0.618538
Sasha,d,0.480816,0.763831,0.753817
Sasha,e,0.268796,0.342493,0.984354
Pavel,a,0.100328,0.763713,0.697789
Pavel,b,0.916018,0.962612,0.887321
Pavel,c,0.806017,0.817254,0.99393
Pavel,d,0.260811,0.248343,0.858714
Pavel,e,0.39914,0.4616,0.557353


In [156]:
idx = pd.IndexSlice # Нужны для доступа к мультииндексам
df.loc[idx[:, list('a')], :]


Unnamed: 0,Unnamed: 1,first_col,second_col,third_col
Sasha,a,0.350702,0.365052,0.909595
Pavel,a,0.100328,0.763713,0.697789


In [161]:
df_flat = df.reset_index()#это очень удобная штука
df_flat

Unnamed: 0,level_0,level_1,first_col,second_col,third_col
0,Sasha,a,0.350702,0.365052,0.909595
1,Sasha,b,0.016435,0.156685,0.584669
2,Sasha,c,0.570024,0.430217,0.618538
3,Sasha,d,0.480816,0.763831,0.753817
4,Sasha,e,0.268796,0.342493,0.984354
5,Pavel,a,0.100328,0.763713,0.697789
6,Pavel,b,0.916018,0.962612,0.887321
7,Pavel,c,0.806017,0.817254,0.99393
8,Pavel,d,0.260811,0.248343,0.858714
9,Pavel,e,0.39914,0.4616,0.557353


In [176]:
df = df_flat.set_index(['level_0', 'level_1'])
df.index.names = ['Name', '№']
df

Unnamed: 0_level_0,Unnamed: 1_level_0,first_col,second_col,third_col
Name,№,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Sasha,a,0.350702,0.365052,0.909595
Sasha,b,0.016435,0.156685,0.584669
Sasha,c,0.570024,0.430217,0.618538
Sasha,d,0.480816,0.763831,0.753817
Sasha,e,0.268796,0.342493,0.984354
Pavel,a,0.100328,0.763713,0.697789
Pavel,b,0.916018,0.962612,0.887321
Pavel,c,0.806017,0.817254,0.99393
Pavel,d,0.260811,0.248343,0.858714
Pavel,e,0.39914,0.4616,0.557353


In [184]:
health_data



Unnamed: 0_level_0,subject,Bob,Bob,Guido,Guido,Sue,Sue
Unnamed: 0_level_1,type,HR,Temp,HR,Temp,HR,Temp
year,visit,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
2013,1,28.0,36.7,46.0,37.4,40.0,38.7
2013,2,49.0,36.3,34.0,38.1,50.0,37.1
2014,1,22.0,36.3,48.0,37.3,44.0,36.6
2014,2,35.0,37.6,35.0,36.4,49.0,36.9


In [185]:
data_mean = health_data.mean(level='year')
data_mean

subject,Bob,Bob,Guido,Guido,Sue,Sue
type,HR,Temp,HR,Temp,HR,Temp
year,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
2013,38.5,36.5,40.0,37.75,45.0,37.9
2014,28.5,36.95,41.5,36.85,46.5,36.75


In [187]:
data_mean.mean(axis=1,level='type')

type,HR,Temp
year,Unnamed: 1_level_1,Unnamed: 2_level_1
2013,41.166667,37.383333
2014,38.833333,36.85
