# Using apply() and transform() with groupby()

- transform() returns a Series that has the same length as the input
- apply() works with multiple Series at a time. But, transform() is only allowed to work with a single Series at a time.

In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.DataFrame({
    'key': ['a','b','c'] * 4,
    'A': np.arange(12),
    'B': [1,2,3] * 4,
})

In [3]:
df

Unnamed: 0,key,A,B
0,a,0,1
1,b,1,2
2,c,2,3
3,a,3,1
4,b,4,2
5,c,5,3
6,a,6,1
7,b,7,2
8,c,8,3
9,a,9,1


In [4]:
# Aggregating the sum of the given Series
def group_sum(x):
    return x.sum()

In [5]:
gr_data_ap = df.groupby('key')['A'].apply(group_sum)
gr_data_ap

key
a    18
b    22
c    26
Name: A, dtype: int64

In [6]:
gr_data_tr = df.groupby('key')['A'].transform(group_sum)
gr_data_tr

0     18
1     22
2     26
3     18
4     22
5     26
6     18
7     22
8     26
9     18
10    22
11    26
Name: A, dtype: int64

In [7]:
def subtract_two(x):
    return x['B'] - x['A']

In [8]:
df.groupby('key').apply(subtract_two)

key    
a    0     1
     3    -2
     6    -5
     9    -8
b    1     1
     4    -2
     7    -5
     10   -8
c    2     1
     5    -2
     8    -5
     11   -8
dtype: int64

In [10]:
# You will get an error
#df.groupby('key').transform(subtract_two)

# Error >>> KeyError: 'B'