## Group by: split-apply-combine

> https://pandas.pydata.org/pandas-docs/stable/user_guide/groupby.html

In [5]:
import pandas as pd
import numpy as np

In [6]:
speeds = pd.DataFrame(
    [
        ("bird", "Falconiformes", 389.0),
        ("bird", "Psittaciformes", 24.0),
        ("mammal", "Carnivora", 80.2),
        ("mammal", "Primates", np.nan),
        ("mammal", "Carnivora", 58),
    ],
    index=["falcon", "parrot", "lion", "monkey", "leopard"],
    columns=("class", "order", "max_speed"),
)

In [7]:
speeds

Unnamed: 0,class,order,max_speed
falcon,bird,Falconiformes,389.0
parrot,bird,Psittaciformes,24.0
lion,mammal,Carnivora,80.2
monkey,mammal,Primates,
leopard,mammal,Carnivora,58.0


In [12]:
for group_name, group in speeds.groupby("class"):
    print (group_name)
    print ("::::")
    print (group)
    print ("-----------------")

bird
::::
       class           order  max_speed
falcon  bird   Falconiformes      389.0
parrot  bird  Psittaciformes       24.0
-----------------
mammal
::::
          class      order  max_speed
lion     mammal  Carnivora       80.2
monkey   mammal   Primates        NaN
leopard  mammal  Carnivora       58.0
-----------------


In [13]:
for group_name, group in speeds.groupby(["class", "order"]):
    print (group_name)
    print ("::::")
    print (group)
    print ("-----------------")

('bird', 'Falconiformes')
::::
       class          order  max_speed
falcon  bird  Falconiformes      389.0
-----------------
('bird', 'Psittaciformes')
::::
       class           order  max_speed
parrot  bird  Psittaciformes       24.0
-----------------
('mammal', 'Carnivora')
::::
          class      order  max_speed
lion     mammal  Carnivora       80.2
leopard  mammal  Carnivora       58.0
-----------------
('mammal', 'Primates')
::::
         class     order  max_speed
monkey  mammal  Primates        NaN
-----------------


In [15]:
df = pd.DataFrame(
    {
        "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"],
        "B": ["one", "one", "two", "three", "two", "two", "one", "three"],
        "C": np.random.randn(8),
        "D": np.random.randn(8),
    }
)
df

Unnamed: 0,A,B,C,D
0,foo,one,-0.391973,0.217129
1,bar,one,0.127013,0.908317
2,foo,two,-0.747417,0.758781
3,bar,three,2.17345,-1.291968
4,foo,two,-2.146463,1.222474
5,bar,two,0.550133,-0.325676
6,foo,one,0.856529,0.021973
7,foo,three,0.901743,0.796604


In [17]:
def test_func(x):
    print (x)
df.groupby(test_func)

0
1
2
3
4
5
6
7


<pandas.core.groupby.generic.DataFrameGroupBy object at 0x117fa72e0>

In [19]:
def get_letter_type(letter):
    if letter.lower() in 'aeiou':
        return 'vowel'
    else:
        return 'consonant'


grouped = df.T.groupby(get_letter_type)

In [21]:
grouped

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x117e3c520>