In [1]:
import pandas as pd
import numpy as np

df = pd.DataFrame({
    "A" : ['A', 'B', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'B'],
    "B" : ['one', 'two', 'one', 'three', 'three', 'two', 'one', 'three', 'two', 'two', ],
    "C" : np.random.randn(10),
    "D" : np.random.randn(10),
})

df

Unnamed: 0,A,B,C,D
0,A,one,0.213017,0.700451
1,B,two,0.902395,-0.083694
2,A,one,1.288234,0.985284
3,B,three,0.285026,0.687456
4,A,three,0.908457,-0.905452
5,B,two,0.658849,0.7111
6,A,one,0.993444,0.067832
7,B,three,1.018129,0.407822
8,A,two,0.181335,-0.299199
9,B,two,-0.595484,1.065725


In [2]:
df.groupby('A').mean()

  df.groupby('A').mean()


Unnamed: 0_level_0,C,D
A,Unnamed: 1_level_1,Unnamed: 2_level_1
A,0.716898,0.109783
B,0.453783,0.557682


In [3]:
df.groupby(['A', 'B']).mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,C,D
A,B,Unnamed: 2_level_1,Unnamed: 3_level_1
A,one,0.831565,0.584522
A,three,0.908457,-0.905452
A,two,0.181335,-0.299199
B,three,0.651578,0.547639
B,two,0.32192,0.564377


In [4]:
df.groupby('A').agg({'C':np.sum, "D": np.mean})

Unnamed: 0_level_0,C,D
A,Unnamed: 1_level_1,Unnamed: 2_level_1
A,3.584488,0.109783
B,2.268915,0.557682


In [7]:
df.groupby('A').agg(
    max_C = pd.NamedAgg(column="C", aggfunc='max'),
    min_D = pd.NamedAgg(column="D", aggfunc='min'),
    mean_C = pd.NamedAgg(column="C", aggfunc='mean'),
)

Unnamed: 0_level_0,max_C,min_D,mean_C
A,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
A,1.288234,-0.905452,0.716898
B,1.018129,-0.083694,0.453783


In [13]:
df.groupby('B').filter(lambda x: len(x)<4)

Unnamed: 0,A,B,C,D
0,A,one,0.213017,0.700451
2,A,one,1.288234,0.985284
3,B,three,0.285026,0.687456
4,A,three,0.908457,-0.905452
6,A,one,0.993444,0.067832
7,B,three,1.018129,0.407822


In [15]:
df

Unnamed: 0,A,B,C,D
0,A,one,0.213017,0.700451
1,B,two,0.902395,-0.083694
2,A,one,1.288234,0.985284
3,B,three,0.285026,0.687456
4,A,three,0.908457,-0.905452
5,B,two,0.658849,0.7111
6,A,one,0.993444,0.067832
7,B,three,1.018129,0.407822
8,A,two,0.181335,-0.299199
9,B,two,-0.595484,1.065725


In [14]:
df.groupby('A')['D'].transform('mean')

0    0.109783
1    0.557682
2    0.109783
3    0.557682
4    0.109783
5    0.557682
6    0.109783
7    0.557682
8    0.109783
9    0.557682
Name: D, dtype: float64

In [16]:
def normalize(x):
    return (x-x.mean())/x.std()

df.groupby('A')['C'].apply(normalize)

To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  df.groupby('A')['C'].apply(normalize)


0   -1.017825
1    0.689921
2    1.154085
3   -0.259531
4    0.386946
5    0.315371
6    0.558617
7    0.867908
8   -1.081823
9   -1.613669
Name: C, dtype: float64

In [18]:
df

Unnamed: 0,A,B,C,D
0,A,one,0.213017,0.700451
1,B,two,0.902395,-0.083694
2,A,one,1.288234,0.985284
3,B,three,0.285026,0.687456
4,A,three,0.908457,-0.905452
5,B,two,0.658849,0.7111
6,A,one,0.993444,0.067832
7,B,three,1.018129,0.407822
8,A,two,0.181335,-0.299199
9,B,two,-0.595484,1.065725


In [17]:
df.groupby('A')['C'].rank()

0    2.0
1    4.0
2    5.0
3    2.0
4    3.0
5    3.0
6    4.0
7    5.0
8    1.0
9    1.0
Name: C, dtype: float64

In [19]:
df.groupby('A').agg(lambda x: x.max()-x.min())

  df.groupby('A').agg(lambda x: x.max()-x.min())


Unnamed: 0_level_0,C,D
A,Unnamed: 1_level_1,Unnamed: 2_level_1
A,1.106899,1.890736
B,1.613614,1.149419


In [21]:
df.groupby("A").describe()

Unnamed: 0_level_0,C,C,C,C,C,C,C,C,D,D,D,D,D,D,D,D
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max
A,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2
A,5.0,0.716898,0.495056,0.181335,0.213017,0.908457,0.993444,1.288234,5.0,0.109783,0.760776,-0.905452,-0.299199,0.067832,0.700451,0.985284
B,5.0,0.453783,0.650237,-0.595484,0.285026,0.658849,0.902395,1.018129,5.0,0.557682,0.427875,-0.083694,0.407822,0.687456,0.7111,1.065725
