In [None]:
## Operating on Data in Pandas

In [2]:
import pandas as pd
import numpy as np
## UFuncs

In [4]:
rng = np.random.default_rng(42)
ser = pd.Series(rng.integers(0,10,4))

In [5]:
ser

0    0
1    7
2    6
3    4
dtype: int64

In [6]:
df = pd.DataFrame(rng.integers(0,10, (3,4)), columns=['A','B','C','D'])

In [7]:
df

Unnamed: 0,A,B,C,D
0,4,8,0,6
1,2,0,5,9
2,7,7,7,7


In [None]:
## Filling and Not a Numbers
#  Any item which one or the other does not have an entry is marked with Nan or "Not a Number"
#  this is how Pandas marks missing data

# the below operation will not have values for index 1 or 3 because the index integrity is maintainted through the addition 
# of both Series A and B


In [9]:
A = pd.Series([2,4,6], index=[0,1,2])
B = pd.Series([1,3,5], index=[1,2,3])


In [10]:
seriesAB = A + B

In [11]:
seriesAB

0    NaN
1    5.0
2    9.0
3    NaN
dtype: float64

In [12]:
## if this behavior is undesired it is possible to fill the missing values in either A or B
seriesFilledAB = A.add(B, fill_value=0)
# this will result in 0 being the default value for the missing index in either A or B
seriesFilledAB

0    2.0
1    5.0
2    9.0
3    5.0
dtype: float64

In [16]:
# Pandas Dataframes will exhibit the same behavior
A = pd.DataFrame(rng.integers(0,20, (2,2)), columns=['a','b'])


In [14]:
A

Unnamed: 0,a,b
0,10,2
1,16,9


In [15]:
B = pd.DataFrame(rng.integers(0,10, (3,3)), columns=['a','b','c'])
B

Unnamed: 0,a,b,c
0,5,3,1
1,9,7,6
2,4,8,5


In [17]:
A + B
# notice below this same behavior is repeated

Unnamed: 0,a,b,c
0,13.0,12.0,
1,13.0,8.0,
2,,,


**this can be avoided with the fill_value added to the A Dataframe being added to B**


In [19]:

A.add(B,fill_value=A.values.mean())

Unnamed: 0,a,b,c
0,13.0,12.0,6.5
1,13.0,8.0,11.5
2,9.5,13.5,10.5


In [28]:
## Operations between Dataframes and Series
A = rng.integers(10, size=(3,4))
A

array([[1, 7, 6, 9],
       [7, 3, 9, 4],
       [3, 9, 3, 0]])

In [29]:
# this behavior also happens row wise similar to numpy
A - A[0]

array([[ 0,  0,  0,  0],
       [ 6, -4,  3, -5],
       [ 2,  2, -3, -9]])

In [31]:
df = pd.DataFrame(A, columns=['Q','R','S','T'])
df

Unnamed: 0,Q,R,S,T
0,1,7,6,9
1,7,3,9,4
2,3,9,3,0


In [32]:
df -df.iloc[0]

Unnamed: 0,Q,R,S,T
0,0,0,0,0
1,6,-4,3,-5
2,2,2,-3,-9


In [33]:
# to subtract columnwise you will need to supply the axis = 0 
df.subtract(df['S'], axis=0)

Unnamed: 0,Q,R,S,T
0,-5,1,0,3
1,-2,-6,0,-5
2,0,6,0,-3


In [34]:
# Because of the preservation properties Pandas provides, this allows the data context to prevent common errors with 
# indexes and misalignments.