In [1]:
import pandas as pd
import numpy as np

# Arithmetic and Data Alignment

## Union

Adding DataFrame objects results in the union of index pairs for rows and columns if the pairs are not the same, resulting in NaN for indices that do not overlap:

In [6]:
np.random.seed(0)
df1 = pd.DataFrame( np.random.rand(9).reshape((3,3)),
                  columns = ['a', 'b', 'c'])
df1

Unnamed: 0,a,b,c
0,0.548814,0.715189,0.602763
1,0.544883,0.423655,0.645894
2,0.437587,0.891773,0.963663


In [7]:
df2 = pd.DataFrame( np.random.rand(3, 3),
                   columns = ['b', 'c', 'd'])
df2

Unnamed: 0,b,c,d
0,0.383442,0.791725,0.528895
1,0.568045,0.925597,0.071036
2,0.087129,0.020218,0.83262


In [8]:
df1 + df2

Unnamed: 0,a,b,c,d
0,,1.098631,1.394488,
1,,0.991699,1.571491,
2,,0.978902,0.983881,


Set a fill value instead of NaN for indices that do not overlap:

In [30]:
df3 = df1.add(df2, fill_value=0)
df3

Unnamed: 0,a,b,c,d
0,0.548814,1.098631,1.394488,0.528895
1,0.544883,0.991699,1.571491,0.071036
2,0.437587,0.978902,0.983881,0.83262


## Series on the DataFrame's columns

Like NumPy, pandas supports arithmetic operations between DataFrames and Series.

Match the index of the Series on the DataFrame's columns, broadcasting down the rows:

In [34]:
ser1 = df3.iloc[2]
ser1

a    0.437587
b    0.978902
c    0.983881
d    0.832620
Name: 2, dtype: float64

In [35]:
df4 = df3 - ser1
df4

Unnamed: 0,a,b,c,d
0,0.111226,0.119729,0.410607,-0.303725
1,0.107296,0.012797,0.58761,-0.761584
2,0.0,0.0,0.0,0.0


Match the index of the Series on the DataFrame's columns, broadcasting down the rows and union the indices that do not match:

In [36]:
ser2 = pd.Series( range(3), index=['a','d','e'])
ser2

a    0
d    1
e    2
dtype: int64

In [37]:
df4 - ser2

Unnamed: 0,a,b,c,d,e
0,0.111226,,,-1.303725,
1,0.107296,,,-1.761584,
2,0.0,,,-1.0,


Broadcast over the columns and match the rows (axis=0) by using an arithmetic method:

In [38]:
df3

Unnamed: 0,a,b,c,d
0,0.548814,1.098631,1.394488,0.528895
1,0.544883,0.991699,1.571491,0.071036
2,0.437587,0.978902,0.983881,0.83262


In [39]:
ser3 = pd.Series([100, 200, 300])
ser3

0    100
1    200
2    300
dtype: int64

In [40]:
df3.sub(ser3, axis=0)

Unnamed: 0,a,b,c,d
0,-99.451186,-98.901369,-98.605512,-99.471105
1,-199.455117,-199.008301,-198.428509,-199.928964
2,-299.562413,-299.021098,-299.016119,-299.16738


## Function Application and Mapping

NumPy ufuncs (element-wise array methods) operate on pandas objects:

In [41]:
df4

Unnamed: 0,a,b,c,d
0,0.111226,0.119729,0.410607,-0.303725
1,0.107296,0.012797,0.58761,-0.761584
2,0.0,0.0,0.0,0.0


In [44]:
df5 = np.abs(df4)
df5

Unnamed: 0,a,b,c,d
0,0.111226,0.119729,0.410607,0.303725
1,0.107296,0.012797,0.58761,0.761584
2,0.0,0.0,0.0,0.0


Apply a function on 1D arrays to each column:

In [45]:
func1 = lambda x: x.max() - x.min()
df4.apply(func1)

a    0.111226
b    0.119729
c    0.587610
d    0.761584
dtype: float64

Apply a function on 1D arrays to each row:

In [46]:
df4.apply(func1, axis=1)

0    0.299381
1    0.748787
2    0.000000
dtype: float64

Apply a function and return a DataFrame:

In [50]:
func2 = lambda x: pd.Series( [x.min(), x.max(), x.max() - x.min()],
                            index=['min', 'max', 'difference'])
df4.apply(func2)

Unnamed: 0,a,b,c,d
min,0.0,0.0,0.0,0.0
max,0.111226,0.119729,0.58761,0.761584
difference,0.111226,0.119729,0.58761,0.761584


In [51]:
df4.apply(func2, axis=1)

Unnamed: 0,min,max,difference
0,0.111226,0.410607,0.299381
1,0.012797,0.761584,0.748787
2,0.0,0.0,0.0


Apply an element-wise Python function to a DataFrame:

In [52]:
func3 = lambda x: '%.2f' %x
df4.applymap(func3)

Unnamed: 0,a,b,c,d
0,0.11,0.12,0.41,0.3
1,0.11,0.01,0.59,0.76
2,0.0,0.0,0.0,0.0


In [61]:
func3 = lambda x: f'{x:.2f}'
df4.applymap(func3)

Unnamed: 0,a,b,c,d
0,0.11,0.12,0.41,0.3
1,0.11,0.01,0.59,0.76
2,0.0,0.0,0.0,0.0


In [62]:
func3 = lambda x: '{:.2f}'.format(x)
df4.applymap(func3)

Unnamed: 0,a,b,c,d
0,0.11,0.12,0.41,0.3
1,0.11,0.01,0.59,0.76
2,0.0,0.0,0.0,0.0


Apply an element-wise Python function to a Series:

In [66]:
df4['extra'] = df4['a'].map(func3)
df4

Unnamed: 0,a,b,c,d,e,extra
0,0.111226,0.119729,0.410607,0.11,0.11,0.11
1,0.107296,0.012797,0.58761,0.11,0.11,0.11
2,0.0,0.0,0.0,0.0,0.0,0.0
