In [2]:
import numpy as np
import pandas as pd

### 算术运算和数据对齐

#### Series

In [3]:
s1 = pd.Series(np.arange(4), index = ['a', 'b', 'c', 'd'])
s2 = pd.Series(np.arange(5), index = ['a', 'c', 'e', 'f', 'g'])
print("<s1>\n", s1)
print("<s2>\n", s2)

<s1>
 a    0
b    1
c    2
d    3
dtype: int64
<s2>
 a    0
c    1
e    2
f    3
g    4
dtype: int64


In [4]:
# 对应标签元素做运算，无法对应标签运算结果为Nan
print("<s1 + s2>\n", s1 + s2)

<s1 + s2>
 a    0.0
b    NaN
c    3.0
d    NaN
e    NaN
f    NaN
g    NaN
dtype: float64


#### DataFrame

In [5]:
d1 = pd.DataFrame(np.arange(12).reshape(4, 3), index = ['a', 'b', 'c', 'd'], columns = list("ABC"))
d2 = pd.DataFrame(np.arange(9).reshape(3, 3), index = ['a', 'd', 'f'], columns = list("ABD"))

In [6]:
d1

Unnamed: 0,A,B,C
a,0,1,2
b,3,4,5
c,6,7,8
d,9,10,11


In [7]:
d2

Unnamed: 0,A,B,D
a,0,1,2
d,3,4,5
f,6,7,8


In [9]:
# 具有相同标签的值会进行运算，反之用Nan填充
d1 + d2

Unnamed: 0,A,B,C,D
a,0.0,2.0,,
b,,,,
c,,,,
d,12.0,14.0,,
f,,,,


### 使用填充值的算术方法

`+`            - add/radd

`-`           - sub/rsub

`/`            - div/rdiv

`//` （整除）    - floordiv

`*`  （乘法）    - mul/rmul

`**` （幂次方）  - pow/rpow

#### Series

In [12]:
s1 = pd.Series(np.arange(4), index = ['a', 'b', 'c', 'd'])
s2 = pd.Series(np.arange(5), index = ['a', 'c', 'e', 'f', 'g'])
print("<s1>\n", s1)
print("<s2>\n", s2)
# 通过fill_value指定缺失值的填充值
print("<s1 + s2>\n", s1.add(s2, fill_value = 0))

<s1>
 a    0
b    1
c    2
d    3
dtype: int64
<s2>
 a    0
c    1
e    2
f    3
g    4
dtype: int64
<s1 + s2>
 a    0.0
b    1.0
c    3.0
d    3.0
e    2.0
f    3.0
g    4.0
dtype: float64


#### DataFrame

In [14]:
d1 = pd.DataFrame(np.arange(12).reshape(4, 3), index = ['a', 'b', 'c', 'd'], columns = list("ABC"))
d2 = pd.DataFrame(np.arange(9).reshape(3, 3), index = ['a', 'd', 'f'], columns = list("ABD"))

In [15]:
d1

Unnamed: 0,A,B,C
a,0,1,2
b,3,4,5
c,6,7,8
d,9,10,11


In [16]:
d2

Unnamed: 0,A,B,D
a,0,1,2
d,3,4,5
f,6,7,8


In [18]:
# ⚠️ 结果中的b行D列计算结果仍然是NaN
#    主要是因为d1和d2的b行D列的值都是NaN
d1.add(d2, fill_value = 0)

Unnamed: 0,A,B,C,D
a,0.0,2.0,2.0,2.0
b,3.0,4.0,5.0,
c,6.0,7.0,8.0,
d,12.0,14.0,11.0,5.0
f,6.0,7.0,,8.0


In [20]:
1 / d1

Unnamed: 0,A,B,C
a,inf,1.0,0.5
b,0.333333,0.25,0.2
c,0.166667,0.142857,0.125
d,0.111111,0.1,0.090909


In [21]:
# 这种写法等价于 1 / d1
d1.rdiv(1)

Unnamed: 0,A,B,C
a,inf,1.0,0.5
b,0.333333,0.25,0.2
c,0.166667,0.142857,0.125
d,0.111111,0.1,0.090909


### 混合运算

In [31]:
d3 = pd.DataFrame(np.arange(12).reshape(4, 3), index = ['a', 'b', 'c', 'd'], columns = list("ABC"))
d3

Unnamed: 0,A,B,C
a,0,1,2
b,3,4,5
c,6,7,8
d,9,10,11


In [32]:
s3 = d3.iloc[0]
s3

A    0
B    1
C    2
Name: a, dtype: int64

In [33]:
d3 - s3

Unnamed: 0,A,B,C
a,0,0,0
b,3,3,3
c,6,6,6
d,9,9,9


In [36]:
d4 = d3['A']
d4

a    0
b    3
c    6
d    9
Name: A, dtype: int64

In [40]:
# 将d4沿着列进行广播
d3.sub(d4, axis = "index")
# 等价于：d3.sub(d4, axis = 0)

Unnamed: 0,A,B,C
a,0,1,2
b,0,1,2
c,0,1,2
d,0,1,2
