In [1]:
import numpy as np
import pandas as pd
from pandas import DataFrame, Series

In [2]:
dframe = DataFrame({'k1':['X','X','Y','Y','Z'],
                    'k2':['alpha','beta','alpha','beta','alpha'],
                    'dataset1':np.random.randn(5),
                    'dataset2':np.random.randn(5)})
dframe

Unnamed: 0,dataset1,dataset2,k1,k2
0,1.009515,1.890564,X,alpha
1,-0.797155,0.386308,X,beta
2,-0.487252,0.077967,Y,alpha
3,-0.666881,-0.021769,Y,beta
4,0.226436,-0.191574,Z,alpha


In [3]:
# k1をキーとして、データをグループにまとめます。
group1 = dframe['dataset1'].groupby(dframe['k1'])
group1

<pandas.core.groupby.SeriesGroupBy object at 0x0000028ED788FB38>

In [4]:
#グループごとの平均値を計算
group1.mean()

k1
X    0.106180
Y   -0.577066
Z    0.226436
Name: dataset1, dtype: float64

In [5]:
# キーは変えられます。
cities = np.array(['NY','LA','LA','NY','NY'])
month = np.array(['JAN','FEB','JAN','FEB','JAN'])
#  それぞれでグループ化します。
dframe['dataset1'].groupby([cities,month]).mean()

LA  FEB   -0.797155
    JAN   -0.487252
NY  FEB   -0.666881
    JAN    0.617976
Name: dataset1, dtype: float64

In [6]:
dframe

Unnamed: 0,dataset1,dataset2,k1,k2
0,1.009515,1.890564,X,alpha
1,-0.797155,0.386308,X,beta
2,-0.487252,0.077967,Y,alpha
3,-0.666881,-0.021769,Y,beta
4,0.226436,-0.191574,Z,alpha


In [7]:
dframe.groupby('k1').mean()

Unnamed: 0_level_0,dataset1,dataset2
k1,Unnamed: 1_level_1,Unnamed: 2_level_1
X,0.10618,1.138436
Y,-0.577066,0.028099
Z,0.226436,-0.191574


In [8]:
# 複数の列名にも対応しています。
dframe.groupby(['k1','k2']).mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,dataset1,dataset2
k1,k2,Unnamed: 2_level_1,Unnamed: 3_level_1
X,alpha,1.009515,1.890564
X,beta,-0.797155,0.386308
Y,alpha,-0.487252,0.077967
Y,beta,-0.666881,-0.021769
Z,alpha,0.226436,-0.191574


In [9]:
# 列を限定することもできます。
dataset2_group = dframe.groupby(['k1','k2'])[['dataset2']]
dataset2_group.mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,dataset2
k1,k2,Unnamed: 2_level_1
X,alpha,1.890564
X,beta,0.386308
Y,alpha,0.077967
Y,beta,-0.021769
Z,alpha,-0.191574


In [10]:
# size()と一緒に使うのも便利です。
dframe.groupby(['k1']).size()

k1
X    2
Y    2
Z    1
dtype: int64

In [11]:
# イテレート（繰り返し処理）ができます。
for name,group in dframe.groupby('k1'):
    print('This is the {} group'.format(name))
    print(group)
    print('\n')

This is the X group
   dataset1  dataset2 k1     k2
0  1.009515  1.890564  X  alpha
1 -0.797155  0.386308  X   beta


This is the Y group
   dataset1  dataset2 k1     k2
2 -0.487252  0.077967  Y  alpha
3 -0.666881 -0.021769  Y   beta


This is the Z group
   dataset1  dataset2 k1     k2
4  0.226436 -0.191574  Z  alpha




In [12]:
# 複数のキーでも同じ事ができます。
for (k1,k2) , group in dframe.groupby(['k1','k2']):
    print('Key1 = {} Key2 = {}'.format(k1,k2))
    print(group)
    print('\n')

Key1 = X Key2 = alpha
   dataset1  dataset2 k1     k2
0  1.009515  1.890564  X  alpha


Key1 = X Key2 = beta
   dataset1  dataset2 k1    k2
1 -0.797155  0.386308  X  beta


Key1 = Y Key2 = alpha
   dataset1  dataset2 k1     k2
2 -0.487252  0.077967  Y  alpha


Key1 = Y Key2 = beta
   dataset1  dataset2 k1    k2
3 -0.666881 -0.021769  Y  beta


Key1 = Z Key2 = alpha
   dataset1  dataset2 k1     k2
4  0.226436 -0.191574  Z  alpha




In [13]:
gr = dframe.groupby('k1')
gr.get_group('X')

Unnamed: 0,dataset1,dataset2,k1,k2
0,1.009515,1.890564,X,alpha
1,-0.797155,0.386308,X,beta


In [14]:
# リストを作ってそれを辞書にするこもできます。
group_dict = dict(list(dframe.groupby('k1')))
group_dict['X']

Unnamed: 0,dataset1,dataset2,k1,k2
0,1.009515,1.890564,X,alpha
1,-0.797155,0.386308,X,beta


In [15]:
# 列方向（axis = 1）についても同じような事ができます。
# ちょっと複雑ですが、
group_dict_axis1 = dict(list(dframe.groupby(dframe.dtypes,axis=1)))
group_dict_axis1

{dtype('float64'):    dataset1  dataset2
 0  1.009515  1.890564
 1 -0.797155  0.386308
 2 -0.487252  0.077967
 3 -0.666881 -0.021769
 4  0.226436 -0.191574, dtype('O'):   k1     k2
 0  X  alpha
 1  X   beta
 2  Y  alpha
 3  Y   beta
 4  Z  alpha}