In [1]:
import pandas as pd
from utilities.reader_tools import GenericCSVReader

df_players = GenericCSVReader.DataframeFromCSV("players_20.csv", setIndexName="short_name")
df_players.head()

Unnamed: 0_level_0,sofifa_id,player_url,long_name,age,dob,height_cm,weight_kg,nationality,club,overall,...,lwb,ldm,cdm,rdm,rwb,lb,lcb,cb,rcb,rb
short_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
L. Messi,158023,https://sofifa.com/player/158023/lionel-messi/...,Lionel Andrés Messi Cuccittini,32,1987-06-24,170,72,Argentina,FC Barcelona,94,...,68+2,66+2,66+2,66+2,68+2,63+2,52+2,52+2,52+2,63+2
Cristiano Ronaldo,20801,https://sofifa.com/player/20801/c-ronaldo-dos-...,Cristiano Ronaldo dos Santos Aveiro,34,1985-02-05,187,83,Portugal,Juventus,93,...,65+3,61+3,61+3,61+3,65+3,61+3,53+3,53+3,53+3,61+3
Neymar Jr,190871,https://sofifa.com/player/190871/neymar-da-sil...,Neymar da Silva Santos Junior,27,1992-02-05,175,68,Brazil,Paris Saint-Germain,92,...,66+3,61+3,61+3,61+3,66+3,61+3,46+3,46+3,46+3,61+3
J. Oblak,200389,https://sofifa.com/player/200389/jan-oblak/20/...,Jan Oblak,26,1993-01-07,188,87,Slovenia,Atlético Madrid,91,...,,,,,,,,,,
E. Hazard,183277,https://sofifa.com/player/183277/eden-hazard/2...,Eden Hazard,28,1991-01-07,175,74,Belgium,Real Madrid,91,...,66+3,63+3,63+3,63+3,66+3,61+3,49+3,49+3,49+3,61+3


##### APPLY

Create function and apply it to columns/series

In [3]:
import numpy as np

df_players["age"].apply(np.sqrt)

short_name
L. Messi             5.656854
Cristiano Ronaldo    5.830952
Neymar Jr            5.196152
J. Oblak             5.099020
E. Hazard            5.291503
                       ...   
Shao Shuai           4.690416
Xiao Mingjie         4.690416
Zhang Wei            4.358899
Wang Haijian         4.242641
Pan Ximing           5.099020
Name: age, Length: 18278, dtype: float64

In [4]:
def calculate_bmi(row):
    # BMI Formula: kg/m^2
    return row["weight_kg"]/(row["height_cm"]/100)**2

In [5]:
df_players.apply(calculate_bmi, axis=1)

short_name
L. Messi             24.913495
Cristiano Ronaldo    23.735308
Neymar Jr            22.204082
J. Oblak             24.615211
E. Hazard            24.163265
                       ...    
Shao Shuai           22.835010
Xiao Mingjie         21.066743
Zhang Wei            21.678807
Wang Haijian         21.621622
Pan Ximing           23.547881
Length: 18278, dtype: float64

##### LAMBDA FUNCTION

Useful if we need a disposable function

In [7]:
# Basic function

def sum_values(a,b):
    return a + b

In [8]:
sum_values(2,3)

5

In [9]:
# Lambda definition: (one liner). I separe input and output with ":""
sum_values_lambda = lambda a,b : a+b

In [10]:
sum_values_lambda

<function __main__.<lambda>(a, b)>

In [11]:
sum_values_lambda(2,3)

5

##### APPLY + LAMBDA FUNCTION

In [None]:
lambda_heightConverter = lambda x : x/100
# df_players["height_cm"].apply(lambda_heightConverter)

# Of course is useless in this case, this is enough:
df_players["height_cm"]/100

short_name
L. Messi             1.70
Cristiano Ronaldo    1.87
Neymar Jr            1.75
J. Oblak             1.88
E. Hazard            1.75
                     ... 
Shao Shuai           1.86
Xiao Mingjie         1.77
Zhang Wei            1.86
Wang Haijian         1.85
Pan Ximing           1.82
Name: height_cm, Length: 18278, dtype: float64

In [17]:
# Lambda to convert long name series to upper case
df_players["long_name"].apply(lambda x : x.upper())

# In this case the lambda function is mandatory. We have to apply the upper function to a string, not a series. The instruction below won't work
# df_players["long_name"].upper

short_name
L. Messi                  LIONEL ANDRÉS MESSI CUCCITTINI
Cristiano Ronaldo    CRISTIANO RONALDO DOS SANTOS AVEIRO
Neymar Jr                  NEYMAR DA SILVA SANTOS JUNIOR
J. Oblak                                       JAN OBLAK
E. Hazard                                    EDEN HAZARD
                                    ...                 
Shao Shuai                                            邵帅
Xiao Mingjie                                MINGJIE XIAO
Zhang Wei                                             张威
Wang Haijian                                         汪海健
Pan Ximing                                           潘喜明
Name: long_name, Length: 18278, dtype: object

In [18]:
# Get the year of dob series
df_players.dtypes

sofifa_id      int64
player_url    object
long_name     object
age            int64
dob           object
               ...  
lb            object
lcb           object
cb            object
rcb           object
rb            object
Length: 103, dtype: object

In [None]:
df_players["dob"] = pd.to_datetime(df_players["dob"])
df_players.dtypes

# Use lambda to get the year
extract_year_function = lambda x: x.year
df_players["dob"].apply(extract_year_function)

# With dt attribute. Again, the below data type is series, but with lambda function the input is directly the item
# df_players["dob"].dt.year

short_name
L. Messi             1987
Cristiano Ronaldo    1985
Neymar Jr            1992
J. Oblak             1993
E. Hazard            1991
                     ... 
Shao Shuai           1997
Xiao Mingjie         1997
Zhang Wei            2000
Wang Haijian         2000
Pan Ximing           1993
Name: dob, Length: 18278, dtype: int32

In [26]:
bmi_calculus_lambda = lambda x:x["weight_kg"] / (x["height_cm"]/100)**2

df_players.apply(bmi_calculus_lambda, axis=1)

short_name
L. Messi             24.913495
Cristiano Ronaldo    23.735308
Neymar Jr            22.204082
J. Oblak             24.615211
E. Hazard            24.163265
                       ...    
Shao Shuai           22.835010
Xiao Mingjie         21.066743
Zhang Wei            21.678807
Wang Haijian         21.621622
Pan Ximing           23.547881
Length: 18278, dtype: float64