[Reference](https://levelup.gitconnected.com/working-with-groupby-in-pandas-7e7823414537)

In [1]:
import pandas as pd
import numpy as np

In [2]:
df=pd.DataFrame(
    {"key1":list("aabbab"),
     "key2":["one","two","three"]*2,
     "data1":np.random.randn(6),
     "data2":np.random.randn(6)})
df

Unnamed: 0,key1,key2,data1,data2
0,a,one,-0.733562,0.160089
1,a,two,-0.530035,-0.523706
2,b,three,-1.12277,1.281235
3,b,one,0.620954,-1.906671
4,a,two,0.82937,0.365393
5,b,three,-0.243797,0.055353


In [3]:
group=df["data1"].groupby(df["key1"])

In [4]:
group

<pandas.core.groupby.generic.SeriesGroupBy object at 0x7ff24f6f7ad0>

In [5]:
group.mean()

key1
a   -0.144742
b   -0.248538
Name: data1, dtype: float64

In [6]:
ave=df["data1"].groupby([df["key1"],
                         df["key2"]]).mean()
ave

key1  key2 
a     one     -0.733562
      two      0.149668
b     one      0.620954
      three   -0.683283
Name: data1, dtype: float64

In [7]:
ave.unstack()

key2,one,three,two
key1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
a,-0.733562,,0.149668
b,0.620954,-0.683283,


In [8]:
df.groupby("key1").mean()

Unnamed: 0_level_0,data1,data2
key1,Unnamed: 1_level_1,Unnamed: 2_level_1
a,-0.144742,0.000592
b,-0.248538,-0.190028


In [9]:
df.groupby(["key1","key2"]).mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,data1,data2
key1,key2,Unnamed: 2_level_1,Unnamed: 3_level_1
a,one,-0.733562,0.160089
a,two,0.149668,-0.079157
b,one,0.620954,-1.906671
b,three,-0.683283,0.668294


# Iterating over Groups


In [10]:
for name, group in df.groupby("key1"):
    print(name)
    print(group)

a
  key1 key2     data1     data2
0    a  one -0.733562  0.160089
1    a  two -0.530035 -0.523706
4    a  two  0.829370  0.365393
b
  key1   key2     data1     data2
2    b  three -1.122770  1.281235
3    b    one  0.620954 -1.906671
5    b  three -0.243797  0.055353


In [11]:
for (x1,x2),group in df.groupby(["key1",
                                 "key2"]):
    print(x1,x2)
    print(group)

a one
  key1 key2     data1     data2
0    a  one -0.733562  0.160089
a two
  key1 key2     data1     data2
1    a  two -0.530035 -0.523706
4    a  two  0.829370  0.365393
b one
  key1 key2     data1     data2
3    b  one  0.620954 -1.906671
b three
  key1   key2     data1     data2
2    b  three -1.122770  1.281235
5    b  three -0.243797  0.055353


In [12]:
piece=dict(list(df.groupby("key1")))
piece["a"]

Unnamed: 0,key1,key2,data1,data2
0,a,one,-0.733562,0.160089
1,a,two,-0.530035,-0.523706
4,a,two,0.82937,0.365393


# Selecting a Column or Subset of Columns

In [13]:
df.groupby(['key1', 
            'key2'])[['data1']].mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,data1
key1,key2,Unnamed: 2_level_1
a,one,-0.733562
a,two,0.149668
b,one,0.620954
b,three,-0.683283


# Grouping with Dicts and Series

In [14]:
fruit=pd.DataFrame(np.random.randn(4,4),
                   columns=list("abcd"),
                   index=["apple","cherry",
                          "banana","kiwi"])
fruit

Unnamed: 0,a,b,c,d
apple,0.289173,-1.981669,1.176892,-0.421748
cherry,-0.419894,-1.060042,-0.404343,-1.433458
banana,-0.25748,0.260566,0.631103,1.198952
kiwi,-1.269702,0.131651,0.619057,-0.367785


In [15]:
label={"a": "green","b":"yellow",
       "c":"green","d":"yellow",
       "e":"purple"}

In [16]:
group=fruit.groupby(label,axis=1)

In [17]:
group.sum()

Unnamed: 0,green,yellow
apple,1.466065,-2.403417
cherry,-0.824237,-2.4935
banana,0.373624,1.459519
kiwi,-0.650645,-0.236134


In [18]:
s=pd.Series(label)
s

a     green
b    yellow
c     green
d    yellow
e    purple
dtype: object

In [19]:
fruit.groupby(s,axis=1).count()

Unnamed: 0,green,yellow
apple,2,2
cherry,2,2
banana,2,2
kiwi,2,2


# Grouping with Functions

In [20]:
fruit.groupby(len).sum()

Unnamed: 0,a,b,c,d
4,-1.269702,0.131651,0.619057,-0.367785
5,0.289173,-1.981669,1.176892,-0.421748
6,-0.677374,-0.799476,0.226761,-0.234506


# Grouping by Index Levels

In [21]:
data=pd.DataFrame(np.random.randn(4,5),
                  columns=[list("AAABB"),
                           [1,2,3,1,2]])

In [22]:
data.columns.names=["letter","number"]
data

letter,A,A,A,B,B
number,1,2,3,1,2
0,1.579174,-2.026467,0.250509,-0.155879,-0.457966
1,0.694029,0.091966,-0.184268,0.154181,-2.06221
2,0.744386,1.705838,-0.489713,-0.008884,1.349077
3,0.249495,0.272742,0.703468,-0.216501,-0.548471


In [23]:
data.groupby(level="letter",axis=1).sum()

letter,A,B
0,-0.196784,-0.613844
1,0.601727,-1.908029
2,1.960511,1.340192
3,1.225705,-0.764972
