# Operating on Data in Pandas

In [1]:
import numpy as np
import pandas as pd

## Ufuncs: Index Preservation

In [6]:
rng = np.random.default_rng(seed=42)
ser = pd.Series(rng.integers(0, 10, 4))
ser

0    0
1    7
2    6
3    4
dtype: int64

In [11]:
df = pd.DataFrame(rng.integers(0, 10, (3, 4)), columns=['A', 'B', 'C', 'D'])
df

Unnamed: 0,A,B,C,D
0,5,1,8,4
1,5,3,1,9
2,7,6,4,8


In [12]:
np.exp(ser)

0       1.000000
1    1096.633158
2     403.428793
3      54.598150
dtype: float64

In [13]:
np.sin(df * np.pi/4)

Unnamed: 0,A,B,C,D
0,-0.707107,0.707107,-2.449294e-16,1.224647e-16
1,-0.707107,0.707107,0.7071068,0.7071068
2,-0.707107,-1.0,1.224647e-16,-2.449294e-16


## Ufuncs: Index Alignment

### Index Alignment in Series

In [17]:
series1 = pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'])
series2 = pd.Series([4, 5, 0, 8], index=['c', 'd', 'g', 'a'])

series1 + series2

a    9.0
b    NaN
c    7.0
d    9.0
g    NaN
dtype: float64

In [21]:
series1.index.union(series2.index)

Index(['a', 'b', 'c', 'd', 'g'], dtype='object')

In [22]:
series1.add(series2, fill_value=0)

a    9.0
b    2.0
c    7.0
d    9.0
g    0.0
dtype: float64

### Index Alignment in DataFrames

In [27]:
df1 = pd.DataFrame(rng.integers(0, 10, (3, 2)), columns=['a', 'b'])
df1

Unnamed: 0,a,b
0,4,8
1,6,7
2,7,1


In [34]:
df2 = pd.DataFrame(rng.integers(0, 10, (3, 4)), columns=['b', 'c', 'a', 'd'])
df2

Unnamed: 0,b,c,a,d
0,3,2,5,6
1,9,4,1,8
2,6,7,0,3


In [35]:
df2 + df1

Unnamed: 0,a,b,c,d
0,9,11,,
1,7,16,,
2,7,7,,


## Ufuncs: Operations Between DataFrames and Series

In [37]:
A = rng.integers(10, size=(3, 4))
A

array([[8, 1, 8, 0],
       [7, 7, 7, 6],
       [4, 7, 2, 7]])

In [38]:
A - A[0]

array([[ 0,  0,  0,  0],
       [-1,  6, -1,  6],
       [-4,  6, -6,  7]])

In [43]:
df = pd.DataFrame(A, columns=['Q', 'R', 'S', 'T'])
df

Unnamed: 0,Q,R,S,T
0,8,1,8,0
1,7,7,7,6
2,4,7,2,7


In [46]:
df - df.iloc[0]

Unnamed: 0,Q,R,S,T
0,0,0,0,0
1,-1,6,-1,6
2,-4,6,-6,7


In [50]:
df.sub(df['Q'], axis=0)

Unnamed: 0,Q,R,S,T
0,0,-7,0,-8
1,0,0,0,-1
2,0,3,-2,3


In [49]:
df.subtract(df.loc[:, 'Q'], axis=0)

Unnamed: 0,Q,R,S,T
0,0,-7,0,-8
1,0,0,0,-1
2,0,3,-2,3


In [51]:
halfrow = df.iloc[0, ::2]
halfrow

Q    8
S    8
Name: 0, dtype: int64

In [52]:
df - halfrow

Unnamed: 0,Q,R,S,T
0,0.0,,0.0,
1,-1.0,,-1.0,
2,-4.0,,-6.0,
