In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('players_20.csv')
df.set_index('short_name', inplace = True)
df = df[['long_name','age','dob', 'height_cm','weight_kg','nationality','club']]
df

Unnamed: 0_level_0,long_name,age,dob,height_cm,weight_kg,nationality,club
short_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
L. Messi,Lionel Andrés Messi Cuccittini,32,1987-06-24,170,72,Argentina,FC Barcelona
Cristiano Ronaldo,Cristiano Ronaldo dos Santos Aveiro,34,1985-02-05,187,83,Portugal,Juventus
Neymar Jr,Neymar da Silva Santos Junior,27,1992-02-05,175,68,Brazil,Paris Saint-Germain
J. Oblak,Jan Oblak,26,1993-01-07,188,87,Slovenia,Atlético Madrid
E. Hazard,Eden Hazard,28,1991-01-07,175,74,Belgium,Real Madrid
...,...,...,...,...,...,...,...
Shao Shuai,邵帅,22,1997-03-10,186,79,China PR,Beijing Renhe FC
Xiao Mingjie,Mingjie Xiao,22,1997-01-01,177,66,China PR,Shanghai SIPG FC
Zhang Wei,张威,19,2000-05-16,186,75,China PR,Hebei China Fortune FC
Wang Haijian,汪海健,18,2000-08-02,185,74,China PR,Shanghai Greenland Shenhua FC


# Lambda Function

In [3]:
#basic function
def sum_values(a,b):
    x = a+b
    return x 

In [4]:
sum_values(2,3)

5

In [5]:
#Lambda Function (One liner)
#Input:Output
sum_values_lambda = lambda a,b: a+b

In [6]:
sum_values_lambda(2,3)

5

In [7]:
#Lambda function is useful when we used to create temporary function.Or the function we are going to use it only once.

# Apply + Lambda Function

In [8]:
#Use lambda function to convert 'height_cm' series to meter
df['height_cm'].apply(lambda x:x/100)

short_name
L. Messi             1.70
Cristiano Ronaldo    1.87
Neymar Jr            1.75
J. Oblak             1.88
E. Hazard            1.75
                     ... 
Shao Shuai           1.86
Xiao Mingjie         1.77
Zhang Wei            1.86
Wang Haijian         1.85
Pan Ximing           1.82
Name: height_cm, Length: 18278, dtype: float64

In [9]:
#alternative without using lambda function and apply method
df['height_cm']/100

short_name
L. Messi             1.70
Cristiano Ronaldo    1.87
Neymar Jr            1.75
J. Oblak             1.88
E. Hazard            1.75
                     ... 
Shao Shuai           1.86
Xiao Mingjie         1.77
Zhang Wei            1.86
Wang Haijian         1.85
Pan Ximing           1.82
Name: height_cm, Length: 18278, dtype: float64

In [10]:
#Use lambda function to convert 'long_name' series to upper case
df['long_name'].apply(lambda x:x.upper())

short_name
L. Messi                  LIONEL ANDRÉS MESSI CUCCITTINI
Cristiano Ronaldo    CRISTIANO RONALDO DOS SANTOS AVEIRO
Neymar Jr                  NEYMAR DA SILVA SANTOS JUNIOR
J. Oblak                                       JAN OBLAK
E. Hazard                                    EDEN HAZARD
                                    ...                 
Shao Shuai                                            邵帅
Xiao Mingjie                                MINGJIE XIAO
Zhang Wei                                             张威
Wang Haijian                                         汪海健
Pan Ximing                                           潘喜明
Name: long_name, Length: 18278, dtype: object

In [11]:
#alternative with str attribute
df['long_name'].str.upper()

short_name
L. Messi                  LIONEL ANDRÉS MESSI CUCCITTINI
Cristiano Ronaldo    CRISTIANO RONALDO DOS SANTOS AVEIRO
Neymar Jr                  NEYMAR DA SILVA SANTOS JUNIOR
J. Oblak                                       JAN OBLAK
E. Hazard                                    EDEN HAZARD
                                    ...                 
Shao Shuai                                            邵帅
Xiao Mingjie                                MINGJIE XIAO
Zhang Wei                                             张威
Wang Haijian                                         汪海健
Pan Ximing                                           潘喜明
Name: long_name, Length: 18278, dtype: object

# Why df['long_name'].upper() doesn’t work?

df['long_name'].apply(lambda x: x.upper())   ✅

df['long_name'].str.upper()                 ✅

df['long_name'].upper()                     ❌  ← Why this fails?


Because df['long_name'] is a pandas Series, not a string.

.upper() is a method for strings, not for Series.

If you try df['long_name'].upper(), Python says:

"'Series' object has no attribute 'upper'"

    .str is a special accessor in pandas that allows you to apply string methods to each element in a Series.

In [16]:
df['dob'] = df['dob'].astype('datetime64[ns]')

In [17]:
df['dob'].apply(lambda x:x.year)

short_name
L. Messi             1987
Cristiano Ronaldo    1985
Neymar Jr            1992
J. Oblak             1993
E. Hazard            1991
                     ... 
Shao Shuai           1997
Xiao Mingjie         1997
Zhang Wei            2000
Wang Haijian         2000
Pan Ximing           1993
Name: dob, Length: 18278, dtype: int64

In [18]:
#Alternative with dt attribute
df['dob'].dt.year

short_name
L. Messi             1987
Cristiano Ronaldo    1985
Neymar Jr            1992
J. Oblak             1993
E. Hazard            1991
                     ... 
Shao Shuai           1997
Xiao Mingjie         1997
Zhang Wei            2000
Wang Haijian         2000
Pan Ximing           1993
Name: dob, Length: 18278, dtype: int32

In [20]:
#apply lambda function to dataframe in order to calculate bmi
df.apply(lambda x:x['weight_kg'] / ((x['height_cm']/100)**2), axis = 1)

short_name
L. Messi             24.913495
Cristiano Ronaldo    23.735308
Neymar Jr            22.204082
J. Oblak             24.615211
E. Hazard            24.163265
                       ...    
Shao Shuai           22.835010
Xiao Mingjie         21.066743
Zhang Wei            21.678807
Wang Haijian         21.621622
Pan Ximing           23.547881
Length: 18278, dtype: float64