In [3]:
import pandas as pd
import numpy as np

In [4]:
df = pd.DataFrame(
    {
        "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"],
        "B": ["one", "one", "two", "three", "two", "two", "one", "three"],
        "C": np.random.randn(8),
        "D": np.random.randn(8),
    }
)

In [11]:
def get_letter_type(letter):
    if letter.lower() in 'aeiou':
        return 'vowel'
    else:
        return 'consonant'

In [13]:
def get_par_impar(num):
    return 'par' if num % 2 == 0 else 'impar'

In [8]:
df.groupby("A").groups

{'bar': [1, 3, 5], 'foo': [0, 2, 4, 6, 7]}

In [12]:
df.T.groupby(get_letter_type).groups


{'consonant': ['B', 'C', 'D'], 'vowel': ['A']}

In [16]:
df

Unnamed: 0,A,B,C,D
0,foo,one,0.59445,0.152506
1,bar,one,-1.343827,-0.339227
2,foo,two,-0.252532,-0.248354
3,bar,three,-0.368568,1.803275
4,foo,two,-1.186751,-0.501409
5,bar,two,-0.412433,-2.27837
6,foo,one,2.270409,-0.946686
7,foo,three,0.491279,2.388466


In [17]:
df.groupby(get_par_impar).groups

{'impar': [1, 3, 5, 7], 'par': [0, 2, 4, 6]}

In [18]:
df3 = pd.DataFrame({"X": ["A", "B", "A", "B"], "Y": [1, 4, 3, 2]})

df3.groupby("X").get_group("A")

Unnamed: 0,X,Y
0,A,1
2,A,3


In [19]:
df3.groupby("X").get_group("B")

Unnamed: 0,X,Y
1,B,4
3,B,2


In [20]:
grouped = df.groupby('A')

for name, group in grouped:
    print(name)
    print(group)

bar
     A      B         C         D
1  bar    one -1.343827 -0.339227
3  bar  three -0.368568  1.803275
5  bar    two -0.412433 -2.278370
foo
     A      B         C         D
0  foo    one  0.594450  0.152506
2  foo    two -0.252532 -0.248354
4  foo    two -1.186751 -0.501409
6  foo    one  2.270409 -0.946686
7  foo  three  0.491279  2.388466


In [21]:
grouped = df.groupby(["A", "B"], as_index=False)
grouped["C"].agg([lambda x: x.max() - x.min(), lambda x: x.median() - x.mean()])

Unnamed: 0,A,B,<lambda_0>,<lambda_1>
0,bar,one,0.0,0.0
1,bar,three,0.0,0.0
2,bar,two,0.0,0.0
3,foo,one,1.675959,0.0
4,foo,three,0.0,0.0
5,foo,two,0.934219,0.0


In [22]:
animals = pd.DataFrame(
    {
        "kind": ["cat", "dog", "cat", "dog"],
        "height": [9.1, 6.0, 9.5, 34.0],
        "weight": [7.9, 7.5, 9.9, 198.0],
    }
)

In [23]:
animals.groupby("kind").agg(
    min_height=pd.NamedAgg(column="height", aggfunc="min"),
    max_height=pd.NamedAgg(column="height", aggfunc="max"),
    average_weight=pd.NamedAgg(column="weight", aggfunc="mean"),
)

Unnamed: 0_level_0,min_height,max_height,average_weight
kind,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
cat,9.1,9.5,8.9
dog,6.0,34.0,102.75


In [24]:
animals.groupby("kind").agg(
    min_height=("height", "min"),
    max_height=("height", "max"),
    average_weight=("weight", "mean"),
)

Unnamed: 0_level_0,min_height,max_height,average_weight
kind,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
cat,9.1,9.5,8.9
dog,6.0,34.0,102.75


In [25]:
sf = pd.Series([1, 1, 2, 3, 3, 3])

sf.groupby(sf).filter(lambda x: x.sum() > 2)

3    3
4    3
5    3
dtype: int64

In [26]:
dff = pd.DataFrame({"A": np.arange(8), "B": list("aabbbbcc")})

dff.groupby("B").filter(lambda x: len(x) > 2)

Unnamed: 0,A,B
2,2,b
3,3,b
4,4,b
5,5,b


In [27]:
df.groupby("A").std(numeric_only=True)

Unnamed: 0_level_0,C,D
A,Unnamed: 1_level_1,Unnamed: 2_level_1
bar,0.55084,2.041667
foo,1.273456,1.303331
