In [1]:
import pandas as pd
import numpy as np

### Applymap : applique une fonction élément par élément
Applymap permet d'appliquer une fonction à chaque cellule de ma dataframe

In [2]:
df = pd.DataFrame(np.random.randint(10, size=(6, 4)), columns=list('ABCD'))

df

Unnamed: 0,A,B,C,D
0,1,0,0,4
1,5,2,3,7
2,3,6,6,8
3,8,9,4,9
4,4,2,5,0
5,7,5,5,5


In [3]:
df.applymap(lambda x: str(x) * x)

Unnamed: 0,A,B,C,D
0,1,,,4444.0
1,55555,22.0,333.0,7777777.0
2,333,666666.0,666666.0,88888888.0
3,88888888,999999999.0,4444.0,999999999.0
4,4444,22.0,55555.0,
5,7777777,55555.0,55555.0,55555.0


### Apply applique une fonction le long d'un axe

In [4]:
df.B.apply(lambda x: str(x) * x)

0             
1           22
2       666666
3    999999999
4           22
5        55555
Name: B, dtype: object

In [5]:
df.B.apply(np.sum)

0    0
1    2
2    6
3    9
4    2
5    5
Name: B, dtype: int64

In [6]:
df.apply(np.sum, axis=0)

A    28
B    24
C    23
D    33
dtype: int64

In [7]:
df.apply(lambda x: (x - x.mean()) / x.std())

Unnamed: 0,A,B,C,D
0,-1.420094,-1.217161,-1.793812,-0.458563
1,0.129099,-0.608581,-0.389959,0.458563
2,-0.645497,0.608581,1.013894,0.764272
3,1.290994,1.521452,0.077992,1.069981
4,-0.258199,-0.608581,0.545943,-1.681399
5,0.903696,0.30429,0.545943,-0.152854


In [8]:
def normalize_data(s):
    return s - s.mean() /s.std()

In [9]:
df.apply(normalize_data)

Unnamed: 0,A,B,C,D
0,-0.807392,-1.217161,-1.793812,2.318601
1,3.192608,0.782839,1.206188,5.318601
2,1.192608,4.782839,4.206188,6.318601
3,6.192608,7.782839,2.206188,7.318601
4,2.192608,0.782839,3.206188,-1.681399
5,5.192608,3.782839,3.206188,3.318601


⚠️ Transform fait la même chose que apply quand ce n'est pas appliqué à un groupe. 

In [10]:
df.transform(normalize_data)

Unnamed: 0,A,B,C,D
0,-0.807392,-1.217161,-1.793812,2.318601
1,3.192608,0.782839,1.206188,5.318601
2,1.192608,4.782839,4.206188,6.318601
3,6.192608,7.782839,2.206188,7.318601
4,2.192608,0.782839,3.206188,-1.681399
5,5.192608,3.782839,3.206188,3.318601


Cependant elles ne se comportent pas parreil quand on les utilise sur des groupes (avec un groupby):
- Apply  : envoie les groupes en DataFrame à la fonction appliquée
- Transform : envoie les groupes en série à la fonction appliquée
    
[Plus d'information ici](https://stackoverflow.com/questions/27517425/apply-vs-transform-on-a-group-object)

En règle générale, si on travaille sur un groupby, c'est plutôt `transform` qu'il faut utiliser.

In [11]:
import seaborn as sns
df = sns.load_dataset('penguins')

In [12]:
df.head()

Unnamed: 0,species,island,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g,sex
0,Adelie,Torgersen,39.1,18.7,181.0,3750.0,Male
1,Adelie,Torgersen,39.5,17.4,186.0,3800.0,Female
2,Adelie,Torgersen,40.3,18.0,195.0,3250.0,Female
3,Adelie,Torgersen,,,,,
4,Adelie,Torgersen,36.7,19.3,193.0,3450.0,Female


In [13]:
df.groupby("species")[["bill_depth_mm","flipper_length_mm","body_mass_g"]
                     ].apply(lambda x:  x.sum())

Unnamed: 0_level_0,bill_depth_mm,flipper_length_mm,body_mass_g
species,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Adelie,2770.3,28683.0,558800.0
Chinstrap,1252.6,13316.0,253850.0
Gentoo,1842.8,26714.0,624350.0


In [14]:
df.groupby("species")[["bill_depth_mm","flipper_length_mm","body_mass_g"]
                     ].apply(lambda x:  x- x.sum())

Unnamed: 0,bill_depth_mm,flipper_length_mm,body_mass_g
0,-2751.6,-28502.0,-555050.0
1,-2752.9,-28497.0,-555000.0
2,-2752.3,-28488.0,-555550.0
3,,,
4,-2751.0,-28490.0,-555350.0
...,...,...,...
339,,,
340,-1828.5,-26499.0,-619500.0
341,-1827.1,-26492.0,-618600.0
342,-1828.0,-26502.0,-619150.0


In [15]:
df.groupby("species")[["bill_depth_mm","flipper_length_mm","body_mass_g"]
                     ].transform(lambda x: x.sum())

Unnamed: 0,bill_depth_mm,flipper_length_mm,body_mass_g
0,2770.3,28683.0,558800.0
1,2770.3,28683.0,558800.0
2,2770.3,28683.0,558800.0
3,2770.3,28683.0,558800.0
4,2770.3,28683.0,558800.0
...,...,...,...
339,1842.8,26714.0,624350.0
340,1842.8,26714.0,624350.0
341,1842.8,26714.0,624350.0
342,1842.8,26714.0,624350.0


In [16]:
df.groupby("species")[["bill_depth_mm","flipper_length_mm","body_mass_g"]
                     ].transform(lambda x: x-x.sum())

Unnamed: 0,bill_depth_mm,flipper_length_mm,body_mass_g
0,-2751.6,-28502.0,-555050.0
1,-2752.9,-28497.0,-555000.0
2,-2752.3,-28488.0,-555550.0
3,,,
4,-2751.0,-28490.0,-555350.0
...,...,...,...
339,,,
340,-1828.5,-26499.0,-619500.0
341,-1827.1,-26492.0,-618600.0
342,-1828.0,-26502.0,-619150.0


### Agg

In [18]:
df = pd.DataFrame(np.random.randint(10, size=(6, 4)), columns=list('ABCD'))
df.agg(['sum', 'min', 'std'])

Unnamed: 0,A,B,C,D
sum,25.0,29.0,39.0,27.0
min,0.0,1.0,1.0,0.0
std,3.488075,3.250641,3.885872,3.209361
