# Apply, applymap & map
Referencia: https://towardsdatascience.com/introduction-to-pandas-apply-applymap-and-map-5d3e044e93ff

In [1]:
import pandas as pd
import numpy as np

## Series methods

### Map

Map values of Series according to input correspondence.

Used for substituting each value in a Series with another value, that may be derived from a function, a dict or a Series.

**When arg is a dictionary, values in Series that are not in the dictionary (as keys) are converted to NaN.**

In [2]:
s = pd.Series(["cat", "dog",np.nan, "rabbit"])
s

0       cat
1       dog
2       NaN
3    rabbit
dtype: object

In [3]:
dictionary = {"cat":"kitten", "dog":"puppy"}
s.map(dictionary)

0    kitten
1     puppy
2       NaN
3       NaN
dtype: object

In [4]:
s.map("My fav animal is {}".format)

0       My fav animal is cat
1       My fav animal is dog
2       My fav animal is nan
3    My fav animal is rabbit
dtype: object

### Format

In [5]:
a = "Dani"
b = "Madrid"
c = 15

In [6]:
print("{0} vive en {1}. \nTHe Bridge está en el numero {2}.".format(a,b,c))

Dani vive en Madrid. 
THe Bridge está en el numero 15.


In [7]:
#dataframe example

In [8]:
data = pd.DataFrame({'food': ['bacon', 'pulled pork', 'bacon',
                              'Pastrami', 'corned beef', 'Bacon',
                              'pastrami', 'honey ham', 'nova lox'],
                     'ounces': [4, 3, 12, 6, 7.5, 8, 3, 5, 6]})

In [9]:
meat_to_animal = {
  'bacon': 'pig',
  'pulled pork': 'pig',
  'pastrami': 'cow',
  'corned beef': 'cow',
  'honey ham': 'pig',
  'nova lox': 'salmon'
}

In [10]:
data

Unnamed: 0,food,ounces
0,bacon,4.0
1,pulled pork,3.0
2,bacon,12.0
3,Pastrami,6.0
4,corned beef,7.5
5,Bacon,8.0
6,pastrami,3.0
7,honey ham,5.0
8,nova lox,6.0


In [11]:
loweredcased = data["food"].str.lower()
loweredcased

0          bacon
1    pulled pork
2          bacon
3       pastrami
4    corned beef
5          bacon
6       pastrami
7      honey ham
8       nova lox
Name: food, dtype: object

In [12]:
data["animal"] = loweredcased.map(meat_to_animal)

In [13]:
data["animal2"] = data["food"].map(lambda x: meat_to_animal[x.lower()])

In [14]:
data

Unnamed: 0,food,ounces,animal,animal2
0,bacon,4.0,pig,pig
1,pulled pork,3.0,pig,pig
2,bacon,12.0,pig,pig
3,Pastrami,6.0,cow,cow
4,corned beef,7.5,cow,cow
5,Bacon,8.0,pig,pig
6,pastrami,3.0,cow,cow
7,honey ham,5.0,pig,pig
8,nova lox,6.0,salmon,salmon


### Str
Vectorized string functions for Series and Index.

https://towardsdatascience.com/mastering-string-methods-in-pandas-8d3cd00b720d

- [`str.contains`](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Series.str.contains.html)
- [`str.startswith`](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Series.str.startswith.html#pandas.Series.str.startswith)

In [15]:
print([i for i in dir(pd.Series.str) if not i.startswith("_")])

['capitalize', 'casefold', 'cat', 'center', 'contains', 'count', 'decode', 'encode', 'endswith', 'extract', 'extractall', 'find', 'findall', 'get', 'get_dummies', 'index', 'isalnum', 'isalpha', 'isdecimal', 'isdigit', 'islower', 'isnumeric', 'isspace', 'istitle', 'isupper', 'join', 'len', 'ljust', 'lower', 'lstrip', 'match', 'normalize', 'pad', 'partition', 'repeat', 'replace', 'rfind', 'rindex', 'rjust', 'rpartition', 'rsplit', 'rstrip', 'slice', 'slice_replace', 'split', 'startswith', 'strip', 'swapcase', 'title', 'translate', 'upper', 'wrap', 'zfill']


### Apply

Invoke function on values of Series.

Can be ufunc (a NumPy function that applies to the entire Series) or a Python function that only works on single values.

In [16]:
chipo = pd.read_csv("https://raw.githubusercontent.com/justmarkham/DAT8/master/data/chipotle.tsv", sep='\t')

In [17]:
chipo.columns

Index(['order_id', 'quantity', 'item_name', 'choice_description',
       'item_price'],
      dtype='object')

In [18]:
chipo["item_price"].str[1:].astype(float)

0        2.39
1        3.39
2        3.39
3        2.39
4       16.98
        ...  
4617    11.75
4618    11.75
4619    11.25
4620     8.75
4621     8.75
Name: item_price, Length: 4622, dtype: float64

In [19]:
chipo.item_price.apply(lambda x: float(x[1:]))

0        2.39
1        3.39
2        3.39
3        2.39
4       16.98
        ...  
4617    11.75
4618    11.75
4619    11.25
4620     8.75
4621     8.75
Name: item_price, Length: 4622, dtype: float64

## Dataframe methods

### Apply

Apply a function along an axis of the DataFrame.

Objects passed to the function are Series objects whose index is either the DataFrame’s index (axis=0) or the DataFrame’s columns (axis=1). By default (result_type=None), the final return type is inferred from the return type of the applied function. Otherwise, it depends on the result_type argument.

`**axis{0 or ‘index’, 1 or ‘columns’}, default 0**`

The second parameter axis is to specify which axis the function is applied to. 0 for applying the function to each column and 1 for applying the function to each row.

In [25]:
#la operación se aplica a toda la row


In [20]:
df = pd.DataFrame({"A": [1,2,3,4],
                  "B": [10,20,30,40],
                  "C": [20,40,60,80]},
                  index = ["Row 1", "Row 2", "Row 3", "Row 4"])

In [21]:
df

Unnamed: 0,A,B,C
Row 1,1,10,20
Row 2,2,20,40
Row 3,3,30,60
Row 4,4,40,80


In [22]:
def suma_serie(row):
    return row.sum()

In [23]:
df["D"] = df.apply(suma_serie,0)
df

Unnamed: 0,A,B,C,D
Row 1,1,10,20,
Row 2,2,20,40,
Row 3,3,30,60,
Row 4,4,40,80,


In [24]:
df["E"] =df.apply(lambda x: x.sum(),1)
df

Unnamed: 0,A,B,C,D,E
Row 1,1,10,20,,31.0
Row 2,2,20,40,,62.0
Row 3,3,30,60,,93.0
Row 4,4,40,80,,124.0


In [30]:
df["F"] = df.apply(lambda x: x.mean(),axis =1)
df

Unnamed: 0,A,B,C,D,E,F
Row 1,1,10,20,,31.0,15.5
Row 2,2,20,40,,62.0,31.0
Row 3,3,30,60,,93.0,46.5
Row 4,4,40,80,,124.0,62.0


In [35]:
df.loc["Row Rafa"] = df.apply(suma_serie)
df

Unnamed: 0,A,B,C,D,E,F
Row 1,1.0,10.0,20.0,,31.0,15.5
Row 2,2.0,20.0,40.0,,62.0,31.0
Row 3,3.0,30.0,60.0,,93.0,46.5
Row 4,4.0,40.0,80.0,,124.0,62.0
Row Rafa,40.0,400.0,800.0,0.0,1240.0,620.0


In [34]:
# por Series
df.loc["Row Rafa"] = df.apply(suma_serie,axis = 0)
df

Unnamed: 0,A,B,C,D,E,F
Row 1,1.0,10.0,20.0,,31.0,15.5
Row 2,2.0,20.0,40.0,,62.0,31.0
Row 3,3.0,30.0,60.0,,93.0,46.5
Row 4,4.0,40.0,80.0,,124.0,62.0
Row Rafa,30.0,300.0,600.0,0.0,930.0,465.0


In [36]:
df.applymap(np.square)

Unnamed: 0,A,B,C,D,E,F
Row 1,1.0,100.0,400.0,,961.0,240.25
Row 2,4.0,400.0,1600.0,,3844.0,961.0
Row 3,9.0,900.0,3600.0,,8649.0,2162.25
Row 4,16.0,1600.0,6400.0,,15376.0,3844.0
Row Rafa,1600.0,160000.0,640000.0,0.0,1537600.0,384400.0


### Applymap
Apply a function to a Dataframe elementwise.

This method applies a function that accepts and returns a scalar to every element of a DataFrame.

In [40]:
data

Unnamed: 0,food,ounces,animal,animal2
0,bacon,4.0,pig,pig
1,pulled pork,3.0,pig,pig
2,bacon,12.0,pig,pig
3,Pastrami,6.0,cow,cow
4,corned beef,7.5,cow,cow
5,Bacon,8.0,pig,pig
6,pastrami,3.0,cow,cow
7,honey ham,5.0,pig,pig
8,nova lox,6.0,salmon,salmon


In [37]:
data.dtypes

food        object
ounces     float64
animal      object
animal2     object
dtype: object

In [39]:
data.drop("ounces", axis=1).applymap(lambda x: len(x))

Unnamed: 0,food,animal,animal2
0,5,3,3
1,11,3,3
2,5,3,3
3,8,3,3
4,11,3,3
5,5,3,3
6,8,3,3
7,9,3,3
8,8,6,6


In [41]:
data.applymap(lambda x: len(str(x)))

Unnamed: 0,food,ounces,animal,animal2
0,5,3,3,3
1,11,3,3,3
2,5,4,3,3
3,8,3,3,3
4,11,3,3,3
5,5,3,3,3
6,8,3,3,3
7,9,3,3,3
8,8,3,6,6
