# Apply, Map and Applymap

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
# If we want to create a new column and set value on rows of other column 
# we use .apply()
def ranking_cat(mov):
    if mov < 4:
        return "Flop"
    elif mov < 4.5:
        return "Hit"
    elif mov >= 4.5:
        return "Super-Hit"

In [6]:
bs = pd.read_csv("bestsellers.csv")
bs.head()

Unnamed: 0,Name,Author,User Rating,Reviews,Price,Year,Genre
0,10-Day Green Smoothie Cleanse,JJ Smith,4.7,17350,8,2016,Non Fiction
1,11/22/63: A Novel,Stephen King,4.6,2052,22,2011,Fiction
2,12 Rules for Life: An Antidote to Chaos,Jordan B. Peterson,4.7,18979,15,2018,Non Fiction
3,1984 (Signet Classics),George Orwell,4.7,21424,6,2017,Fiction
4,"5,000 Awesome Facts (About Everything!) (Natio...",National Geographic Kids,4.8,7665,12,2019,Non Fiction


In [9]:
bs["Score_Cat"]=bs["User Rating"].apply(ranking_cat)
bs["Score_Cat"].value_counts()

Super-Hit    452
Hit           91
Flop           7
Name: Score_Cat, dtype: int64

## .apply() with 'lambda' and 'arguments'

## .apply()

In [11]:
bs["Price"]

0       8
1      22
2      15
3       6
4      12
       ..
545     8
546     8
547     8
548     8
549     8
Name: Price, Length: 550, dtype: int64

In [12]:
# To convert price from $ to "₹" i.e 1$ --> 85₹
# We can use lambda or apply()
# syntax: .apply(lambda x: f"{x*85}₹")

In [14]:
bs["Price"].apply(lambda x:f"{x*85} ₹")

0       680 ₹
1      1870 ₹
2      1275 ₹
3       510 ₹
4      1020 ₹
        ...  
545     680 ₹
546     680 ₹
547     680 ₹
548     680 ₹
549     680 ₹
Name: Price, Length: 550, dtype: object

## args = () / argument

In [15]:
# We apply a function, then we use .apply() with argument
def to_rupee(nums,multiplier):
    return f"{nums*multiplier} ₹"
# Now we will apply this function with an argument i.e args

In [16]:
bs["Price"].apply(to_rupee,args=(82,))
# Here we don't need the argument, as function will automatically take 
# number from every row as 2nd argument.

0       656 ₹
1      1804 ₹
2      1230 ₹
3       492 ₹
4       984 ₹
        ...  
545     656 ₹
546     656 ₹
547     656 ₹
548     656 ₹
549     656 ₹
Name: Price, Length: 550, dtype: object

# .apply() with Dataframes

In [18]:
# Till now we used .apply() with series only.
# Now we'll use .apply() on Dataframe
df = bs[["User Rating", "Reviews", "Price","Year"]]
df

Unnamed: 0,User Rating,Reviews,Price,Year
0,4.7,17350,8,2016
1,4.6,2052,22,2011
2,4.7,18979,15,2018
3,4.7,21424,6,2017
4,4.8,7665,12,2019
...,...,...,...,...
545,4.9,9413,8,2019
546,4.7,14331,8,2016
547,4.7,14331,8,2017
548,4.7,14331,8,2018


In [19]:
def get_range(s):
    return s.max() - s.min()

In [20]:
df.apply(get_range)

User Rating        1.6
Reviews        87804.0
Price            105.0
Year              10.0
dtype: float64

In [None]:
# By default, .apply() only applies on columns i.e axis = 0
# .apply() on axis=1 or on 'rows' is not common

In [23]:
titanic = pd.read_csv("titanic.csv")
titanic.head()

Unnamed: 0,pclass,survived,name,sex,age,sibsp,parch,ticket,fare,cabin,embarked,boat,body,home.dest
0,1,1,"Allen, Miss. Elisabeth Walton",female,29.0,0,0,24160,211.3375,B5,S,2,?,"St Louis, MO"
1,1,1,"Allison, Master. Hudson Trevor",male,0.9167,1,2,113781,151.55,C22 C26,S,11,?,"Montreal, PQ / Chesterville, ON"
2,1,0,"Allison, Miss. Helen Loraine",female,2.0,1,2,113781,151.55,C22 C26,S,?,?,"Montreal, PQ / Chesterville, ON"
3,1,0,"Allison, Mr. Hudson Joshua Creighton",male,30.0,1,2,113781,151.55,C22 C26,S,?,135,"Montreal, PQ / Chesterville, ON"
4,1,0,"Allison, Mrs. Hudson J C (Bessie Waldo Daniels)",female,25.0,1,2,113781,151.55,C22 C26,S,?,?,"Montreal, PQ / Chesterville, ON"


In [24]:
# We'll add sibsb i.e siblings and parch i.e parent,children columns
# and get the total family members and categorize them
# At first, we'll define a function, then .apply()
def family_cat(s):
    fam_size = s.sibsp + s.parch
    if fam_size == 0:
        return "Solo"
    elif fam_size < 5:
        return "Average"
    elif fam_size > 5:
        return "Large"

In [25]:
titanic.apply(family_cat,axis=1)

0          Solo
1       Average
2       Average
3       Average
4       Average
         ...   
1304    Average
1305    Average
1306       Solo
1307       Solo
1308       Solo
Length: 1309, dtype: object

In [26]:
titanic["family_size"] = titanic.apply(family_cat,axis=1)

In [27]:
titanic["family_size"].value_counts()

Solo       790
Average    459
Large       35
Name: family_size, dtype: int64

In [28]:
# We can analyze further by grouping
titanic.groupby(["sex","family_size"])["survived"].mean()

sex     family_size
female  Average        0.771429
        Large          0.176471
        Solo           0.731959
male    Average        0.294393
        Large          0.055556
        Solo           0.162752
Name: survived, dtype: float64

## The series .map() method

In [29]:
titanic['pclass']

0       1
1       1
2       1
3       1
4       1
       ..
1304    3
1305    3
1306    3
1307    3
1308    3
Name: pclass, Length: 1309, dtype: int64

In [33]:
titanic['pclass'].map({1:"1st",2:"2nd",3:"3rd"})

0       1st
1       1st
2       1st
3       1st
4       1st
       ... 
1304    3rd
1305    3rd
1306    3rd
1307    3rd
1308    3rd
Name: pclass, Length: 1309, dtype: object

## apply lambda with map()

In [38]:
titanic["age"].map(lambda a: a < 18)
# It should print a series with boolean of True of False but showing error for some reason

TypeError: '<' not supported between instances of 'str' and 'int'

## .applymap()

In [42]:
# This is a Dataframe only method.
titanic[["name","sex"]].applymap(str.upper)

Unnamed: 0,name,sex
0,"ALLEN, MISS. ELISABETH WALTON",FEMALE
1,"ALLISON, MASTER. HUDSON TREVOR",MALE
2,"ALLISON, MISS. HELEN LORAINE",FEMALE
3,"ALLISON, MR. HUDSON JOSHUA CREIGHTON",MALE
4,"ALLISON, MRS. HUDSON J C (BESSIE WALDO DANIELS)",FEMALE
...,...,...
1304,"ZABOUR, MISS. HILENI",FEMALE
1305,"ZABOUR, MISS. THAMINE",FEMALE
1306,"ZAKARIAN, MR. MAPRIEDEDER",MALE
1307,"ZAKARIAN, MR. ORTIN",MALE


In [43]:
df = titanic[["pclass","survived","age","fare"]]

In [44]:
df.applymap(lambda el: el*7)

Unnamed: 0,pclass,survived,age,fare
0,7,7,29292929292929,211.3375211.3375211.3375211.3375211.3375211.33...
1,7,7,0.91670.91670.91670.91670.91670.91670.9167,151.55151.55151.55151.55151.55151.55151.55
2,7,0,2222222,151.55151.55151.55151.55151.55151.55151.55
3,7,0,30303030303030,151.55151.55151.55151.55151.55151.55151.55
4,7,0,25252525252525,151.55151.55151.55151.55151.55151.55151.55
...,...,...,...,...
1304,21,0,14.514.514.514.514.514.514.5,14.454214.454214.454214.454214.454214.454214.4542
1305,21,0,???????,14.454214.454214.454214.454214.454214.454214.4542
1306,21,0,26.526.526.526.526.526.526.5,7.2257.2257.2257.2257.2257.2257.225
1307,21,0,27272727272727,7.2257.2257.2257.2257.2257.2257.225


In [46]:
titanic[["name","sex"]].applymap(len)

Unnamed: 0,name,sex
0,29,6
1,30,4
2,28,6
3,36,4
4,47,6
...,...,...
1304,20,6
1305,21,6
1306,25,4
1307,19,4
