In [18]:
import pandas as pd
import numpy as np

In [19]:
df = pd.DataFrame({
    'key1': ['a', 'a', 'b', 'b', 'a'],
    'key2': ['one', 'two', 'one', 'two', 'one'],
    'data1': np.random.randn(5),
    'data2': np.random.randn(5)
})
df

Unnamed: 0,data1,data2,key1,key2
0,-1.548111,0.464531,a,one
1,-0.968466,-1.300033,a,two
2,1.105436,-0.687448,b,one
3,0.360346,0.432048,b,two
4,-0.352205,0.62296,a,one


In [20]:
grouped = df['data1'].groupby(df['key1'])
grouped

<pandas.core.groupby.SeriesGroupBy object at 0x7efe1db3bc18>

In [21]:
grouped.mean()

key1
a   -0.956261
b    0.732891
Name: data1, dtype: float64

In [22]:
means = df['data1'].groupby([df['key1'], df['key2']]).mean()
means

key1  key2
a     one    -0.950158
      two    -0.968466
b     one     1.105436
      two     0.360346
Name: data1, dtype: float64

In [23]:
means.unstack()

key2,one,two
key1,Unnamed: 1_level_1,Unnamed: 2_level_1
a,-0.950158,-0.968466
b,1.105436,0.360346


In [24]:
states = np.array(['Ohio', 'California', 'California', 'Ohio', 'Ohio'])
years = np.array([2005, 2005, 2006, 2005, 2006])
df['data1'].groupby([states, years]).mean()

California  2005   -0.968466
            2006    1.105436
Ohio        2005   -0.593883
            2006   -0.352205
Name: data1, dtype: float64

In [25]:
df.groupby('key1').mean()

Unnamed: 0_level_0,data1,data2
key1,Unnamed: 1_level_1,Unnamed: 2_level_1
a,-0.956261,-0.070847
b,0.732891,-0.1277


In [26]:
df.groupby(['key1', 'key2']).mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,data1,data2
key1,key2,Unnamed: 2_level_1,Unnamed: 3_level_1
a,one,-0.950158,0.543745
a,two,-0.968466,-1.300033
b,one,1.105436,-0.687448
b,two,0.360346,0.432048


In [27]:
df.groupby(['key1', 'key2']).size()

key1  key2
a     one     2
      two     1
b     one     1
      two     1
dtype: int64

In [28]:
for name, group in df.groupby('key1'):
    print(name)
    print(group)

a
      data1     data2 key1 key2
0 -1.548111  0.464531    a  one
1 -0.968466 -1.300033    a  two
4 -0.352205  0.622960    a  one
b
      data1     data2 key1 key2
2  1.105436 -0.687448    b  one
3  0.360346  0.432048    b  two


In [29]:
for (k1, k2), group in df.groupby(['key1', 'key2']):
    print(k1, k2)
    print(group)

a one
      data1     data2 key1 key2
0 -1.548111  0.464531    a  one
4 -0.352205  0.622960    a  one
a two
      data1     data2 key1 key2
1 -0.968466 -1.300033    a  two
b one
      data1     data2 key1 key2
2  1.105436 -0.687448    b  one
b two
      data1     data2 key1 key2
3  0.360346  0.432048    b  two


In [30]:
pieces = dict(list(df.groupby('key1')))
pieces['b']

Unnamed: 0,data1,data2,key1,key2
2,1.105436,-0.687448,b,one
3,0.360346,0.432048,b,two


In [31]:
df.dtypes

data1    float64
data2    float64
key1      object
key2      object
dtype: object

In [32]:
grouped = df.groupby(df.dtypes, axis=1)
dict(list(grouped))

{dtype('float64'):       data1     data2
 0 -1.548111  0.464531
 1 -0.968466 -1.300033
 2  1.105436 -0.687448
 3  0.360346  0.432048
 4 -0.352205  0.622960, dtype('O'):   key1 key2
 0    a  one
 1    a  two
 2    b  one
 3    b  two
 4    a  one}

In [34]:
df.groupby(['key1', 'key2'])[['data2']].mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,data2
key1,key2,Unnamed: 2_level_1
a,one,0.543745
a,two,-1.300033
b,one,-0.687448
b,two,0.432048


In [36]:
s_groupby = df.groupby(['key1', 'key2'])['data2']
s_groupby.mean()

key1  key2
a     one     0.543745
      two    -1.300033
b     one    -0.687448
      two     0.432048
Name: data2, dtype: float64

In [52]:
people = pd.DataFrame(np.random.randn(5, 5),
                     columns=['a', 'b', 'c', 'd', 'e'],
                     index=['Joe', 'Steve', 'Wes', 'Jim', 'Travis'])
people.loc[2:3, ['b', 'c']] = np.nan
people

Unnamed: 0,a,b,c,d,e
Joe,0.28145,-0.170637,0.132814,0.166357,0.698844
Steve,-2.183259,-0.200328,1.529635,-0.734544,-0.026904
Wes,-1.806624,,,-1.843513,0.331911
Jim,-0.237198,0.373747,-0.485802,-0.012521,-0.650636
Travis,0.845003,0.062007,0.431975,-0.428876,-1.174931
