In [3]:
import pandas as pd
import numpy as np

### Applymap : applique une fonction élément par élément
Applymap permet d'appliquer une fonction à chaque cellule de ma dataframe

In [4]:
df = pd.DataFrame(np.random.randint(10, size=(6, 4)), columns=list('ABCD'))
df

Unnamed: 0,A,B,C,D
0,8,6,2,9
1,9,5,2,0
2,0,6,2,4
3,5,9,5,6
4,1,1,9,7
5,0,3,5,8


In [5]:
df.applymap(lambda x: str(x) * x)

Unnamed: 0,A,B,C,D
0,88888888.0,666666,22,999999999.0
1,999999999.0,55555,22,
2,,666666,22,4444.0
3,55555.0,999999999,55555,666666.0
4,1.0,1,999999999,7777777.0
5,,333,55555,88888888.0


### Apply applique une fonction le long d'un axe

In [8]:
df.B.apply(lambda x: str(x) * x)

0       666666
1        55555
2       666666
3    999999999
4            1
5          333
Name: B, dtype: object

In [9]:
df.B.apply(np.sum)

0    6
1    5
2    6
3    9
4    1
5    3
Name: B, dtype: int64

In [10]:
df.apply(np.sum, axis=0)

A    23
B    30
C    25
D    34
dtype: int64

In [11]:
df.apply(lambda x: (x - x.mean()) / x.std())

Unnamed: 0,A,B,C,D
0,1.023696,0.362738,-0.777454,1.020621
1,1.269384,0.0,-0.777454,-1.735055
2,-0.941801,0.362738,-0.777454,-0.51031
3,0.286635,1.450953,0.299021,0.102062
4,-0.696114,-1.450953,1.734321,0.408248
5,-0.941801,-0.725476,0.299021,0.714435


In [12]:
def normalize_data(s):
    return s - s.mean() /s.std()

In [13]:
df.apply(normalize_data)

Unnamed: 0,A,B,C,D
0,7.058199,4.186309,0.504896,7.264945
1,8.058199,3.186309,0.504896,-1.735055
2,-0.941801,4.186309,0.504896,2.264945
3,4.058199,7.186309,3.504896,4.264945
4,0.058199,-0.813691,7.504896,5.264945
5,-0.941801,1.186309,3.504896,6.264945


⚠️ Transform fait la même chose que apply quand ce n'est pas appliqué à un groupe. 

In [14]:
df.transform(normalize_data)

Unnamed: 0,A,B,C,D
0,7.058199,4.186309,0.504896,7.264945
1,8.058199,3.186309,0.504896,-1.735055
2,-0.941801,4.186309,0.504896,2.264945
3,4.058199,7.186309,3.504896,4.264945
4,0.058199,-0.813691,7.504896,5.264945
5,-0.941801,1.186309,3.504896,6.264945


Cependant elles ne se comportent pas parreil quand on les utilise sur des groupes (avec un groupby):
- Apply  : envoie les groupes en DataFrame à la fonction appliquée
- Transform : envoie les groupes en série à la fonction appliquée
    
[Plus d'information ici](https://stackoverflow.com/questions/27517425/apply-vs-transform-on-a-group-object)

En règle générale, si on travaille sur un groupby, c'est plutôt `transform` qu'il faut utiliser.

In [15]:
import seaborn as sns
df = sns.load_dataset('penguins')

In [16]:
df.head()

Unnamed: 0,species,island,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g,sex
0,Adelie,Torgersen,39.1,18.7,181.0,3750.0,Male
1,Adelie,Torgersen,39.5,17.4,186.0,3800.0,Female
2,Adelie,Torgersen,40.3,18.0,195.0,3250.0,Female
3,Adelie,Torgersen,,,,,
4,Adelie,Torgersen,36.7,19.3,193.0,3450.0,Female


In [17]:
df.groupby("species")[["bill_depth_mm","flipper_length_mm","body_mass_g"]
                     ].apply(lambda x:  x.sum())

Unnamed: 0_level_0,bill_depth_mm,flipper_length_mm,body_mass_g
species,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Adelie,2770.3,28683.0,558800.0
Chinstrap,1252.6,13316.0,253850.0
Gentoo,1842.8,26714.0,624350.0


In [14]:
df.groupby("species")[["bill_depth_mm","flipper_length_mm","body_mass_g"]
                     ].apply(lambda x:  x- x.sum())

Unnamed: 0,bill_depth_mm,flipper_length_mm,body_mass_g
0,-2751.6,-28502.0,-555050.0
1,-2752.9,-28497.0,-555000.0
2,-2752.3,-28488.0,-555550.0
3,,,
4,-2751.0,-28490.0,-555350.0
...,...,...,...
339,,,
340,-1828.5,-26499.0,-619500.0
341,-1827.1,-26492.0,-618600.0
342,-1828.0,-26502.0,-619150.0


In [18]:
df.groupby("species")[["bill_depth_mm","flipper_length_mm","body_mass_g"]
                     ].transform(lambda x: x.sum())

Unnamed: 0,bill_depth_mm,flipper_length_mm,body_mass_g
0,2770.3,28683.0,558800.0
1,2770.3,28683.0,558800.0
2,2770.3,28683.0,558800.0
3,2770.3,28683.0,558800.0
4,2770.3,28683.0,558800.0
...,...,...,...
339,1842.8,26714.0,624350.0
340,1842.8,26714.0,624350.0
341,1842.8,26714.0,624350.0
342,1842.8,26714.0,624350.0


In [16]:
df.groupby("species")[["bill_depth_mm","flipper_length_mm","body_mass_g"]
                     ].transform(lambda x: x-x.sum())

Unnamed: 0,bill_depth_mm,flipper_length_mm,body_mass_g
0,-2751.6,-28502.0,-555050.0
1,-2752.9,-28497.0,-555000.0
2,-2752.3,-28488.0,-555550.0
3,,,
4,-2751.0,-28490.0,-555350.0
...,...,...,...
339,,,
340,-1828.5,-26499.0,-619500.0
341,-1827.1,-26492.0,-618600.0
342,-1828.0,-26502.0,-619150.0


### Agg

In [18]:
df = pd.DataFrame(np.random.randint(10, size=(6, 4)), columns=list('ABCD'))
df.agg(['sum', 'min', 'std'])

Unnamed: 0,A,B,C,D
sum,25.0,29.0,39.0,27.0
min,0.0,1.0,1.0,0.0
std,3.488075,3.250641,3.885872,3.209361
