Operating on Data in Pandas

In [2]:
"""                    Ufuncs: Index Preservation                     """

'                    Ufuncs: Index Preservation                     '

In [3]:
import numpy as np
import pandas as pd

In [4]:
rng = np.random.RandomState(42)
ser = pd.Series(rng.randint(0, 10, 4))
ser

0    6
1    3
2    7
3    4
dtype: int32

In [5]:
df = pd.DataFrame(rng.randint(0, 10, (3, 4)), 
                  columns= ["A", "B", "C", "D"])
df

Unnamed: 0,A,B,C,D
0,6,9,2,6
1,7,4,3,7
2,7,2,5,4


In [6]:
np.exp(ser)

0     403.428793
1      20.085537
2    1096.633158
3      54.598150
dtype: float64

In [7]:
np.sin(df * np.pi / 4)

Unnamed: 0,A,B,C,D
0,-1.0,0.7071068,1.0,-1.0
1,-0.707107,1.224647e-16,0.707107,-0.7071068
2,-0.707107,1.0,-0.707107,1.224647e-16


In [8]:
"""                         UFuncs: Index Alignment                   """

'                         UFuncs: Index Alignment                   '

In [9]:
# Index alignment in Series

In [10]:
area = pd.Series({'Alaska': 1723337, 'Texas': 695662,
                  'California': 423967}, name='area')

population = pd.Series({'California': 38332521, 'Texas': 26448193,
                         'New York': 19651127}, name='population')


In [11]:
population / area



Alaska              NaN
California    90.413926
New York            NaN
Texas         38.018740
dtype: float64

In [12]:
A = pd.Series([2, 4, 6], index=[0, 1, 2])
B = pd.Series([1, 3, 5], index=[1, 2, 3])

A + B


0    NaN
1    5.0
2    9.0
3    NaN
dtype: float64

In [13]:
A.add(B, fill_value= 0)

0    2.0
1    5.0
2    9.0
3    5.0
dtype: float64

In [14]:
# Index alignment in DataFrame

In [15]:
A = pd.DataFrame(rng.randint(0, 20, (2, 2)), columns= list("AB"))
A

Unnamed: 0,A,B
0,1,11
1,5,1


In [16]:
B = pd.DataFrame(rng.randint(0, 10, (3, 3)), 
                 columns= list("BAC"))
B

Unnamed: 0,B,A,C
0,4,0,9
1,5,8,0
2,9,2,6


In [17]:
A + B

Unnamed: 0,A,B,C
0,1.0,15.0,
1,13.0,6.0,
2,,,


In [18]:
fill = A.stack().mean()
A.add(B, fill_value= fill)

Unnamed: 0,A,B,C
0,1.0,15.0,13.5
1,13.0,6.0,4.5
2,6.5,13.5,10.5


In [19]:
"""          Ufuncs: Operations Between DataFrame and Series          """

'          Ufuncs: Operations Between DataFrame and Series          '

In [20]:
A = rng.randint(10, size= (3, 4))
A

array([[3, 8, 2, 4],
       [2, 6, 4, 8],
       [6, 1, 3, 8]], dtype=int32)

In [21]:
A - A[0]

array([[ 0,  0,  0,  0],
       [-1, -2,  2,  4],
       [ 3, -7,  1,  4]], dtype=int32)

In [22]:
df = pd.DataFrame(A, columns= list("QRST"))
df - df.iloc[0]

Unnamed: 0,Q,R,S,T
0,0,0,0,0
1,-1,-2,2,4
2,3,-7,1,4


In [23]:
df.subtract(df["R"], axis = 0)

Unnamed: 0,Q,R,S,T
0,-5,0,-6,-4
1,-4,0,-2,2
2,5,0,2,7


In [24]:
halfrow = df.iloc[0, ::2]
halfrow

Q    3
S    2
Name: 0, dtype: int32

In [25]:
df - halfrow

Unnamed: 0,Q,R,S,T
0,0.0,,0.0,
1,-1.0,,2.0,
2,3.0,,1.0,


In [26]:
"""                         Examples                                  """

'                         Examples                                  '

In [27]:
y = pd.Series([1, 2, 3, 4])
np.exp(y)

0     2.718282
1     7.389056
2    20.085537
3    54.598150
dtype: float64

In [46]:
x = pd.Series([10, 20, 30], index= ["a","b", "c"])
y = pd.Series([10, 20, 30], index= ["b", "c", "d"])
x + y

a     NaN
b    30.0
c    50.0
d     NaN
dtype: float64

In [48]:
result_fill = x.add(y, fill_value= 0)
result_fill

a    10.0
b    30.0
c    50.0
d    30.0
dtype: float64

In [64]:
A = np.random.randint(1, 10, (3, 3))
df = pd.DataFrame(A, columns= list("ABC"))
np.sin(df)

Unnamed: 0,A,B,C
0,0.909297,0.656987,0.412118
1,-0.279415,-0.756802,0.989358
2,-0.958924,0.909297,0.989358


In [68]:
A = np.random.randint(1, 10, (3, 2))
df1_A = pd.DataFrame(A, columns= list("AB"))

B = np.random.randint(1, 10, (3, 2))
df2_B = pd.DataFrame(B, columns= list("BC"))

np.add(df1_A, df2_B)

Unnamed: 0,A,B,C
0,,7,
1,,11,
2,,6,


In [71]:
A = np.random.randint(1, 10, (3, 2))
df1_A = pd.DataFrame(A, columns= list("AB"))

B = np.random.randint(1, 10, (3, 2))
df2_B = pd.DataFrame(B, columns= list("BC"))

result_fill_A_B = df1_A.add(df2_B, fill_value= 1)
result_fill_A_B

Unnamed: 0,A,B,C
0,7.0,12,3.0
1,9.0,10,10.0
2,8.0,12,4.0


In [74]:
df_matrix = pd.DataFrame(np.random.randint(1, 10, (4, 4)), 
                         columns= list("ABCD"))
df_matrix - df.iloc[0]

Unnamed: 0,A,B,C,D
0,5.0,-2.0,-5.0,
1,0.0,-6.0,-2.0,
2,3.0,-6.0,-7.0,
3,5.0,-3.0,-2.0,


In [75]:
df_matrix.subtract(df_matrix["A"], axis = 0)

Unnamed: 0,A,B,C,D
0,0,-2,-3,2
1,0,-1,5,-1
2,0,-4,-3,-4
3,0,-3,0,-4


In [83]:
df_matrix1 = pd.DataFrame(np.random.randint(1, 10, (8, 16)), 
                         columns= list("ABCDEFGHIJKLMPQR"))
df_matrix1.iloc[:2, ::2]

Unnamed: 0,A,C,E,G,I,K,M,Q
0,9,6,9,5,2,1,1,6
1,8,8,8,7,7,9,5,8


In [86]:
df_matrix2 = pd.DataFrame(np.random.randint(1, 10, (8, 16)), 
                         columns= list("ABCDEFGHIJKLMPQR"))

df_matrix3 = pd.DataFrame(np.random.randint(1, 10, (8, 16)), 
                         columns= list("BCEFGIJLMPQRSTVW"))

np.add(df_matrix2, df_matrix3)

Unnamed: 0,A,B,C,D,E,F,G,H,I,J,K,L,M,P,Q,R,S,T,V,W
0,,12,3,,18,8,4,,12,10,,13,4,7,14,11,,,,
1,,10,14,,13,9,6,,13,15,,8,5,6,17,11,,,,
2,,17,11,,12,14,16,,7,11,,7,13,11,11,14,,,,
3,,10,5,,8,8,9,,9,9,,10,9,8,4,6,,,,
4,,6,6,,8,11,7,,5,15,,14,7,16,16,10,,,,
5,,14,11,,8,11,11,,8,13,,6,7,11,15,13,,,,
6,,11,13,,9,16,8,,9,12,,15,12,11,12,8,,,,
7,,4,17,,14,8,9,,7,4,,9,8,8,10,12,,,,


In [87]:
df_matrix2.add(df_matrix3, fill_value= 0)

Unnamed: 0,A,B,C,D,E,F,G,H,I,J,K,L,M,P,Q,R,S,T,V,W
0,8.0,12,3,9.0,18,8,4,2.0,12,10,7.0,13,4,7,14,11,9.0,7.0,7.0,5.0
1,4.0,10,14,8.0,13,9,6,8.0,13,15,1.0,8,5,6,17,11,5.0,8.0,9.0,7.0
2,4.0,17,11,2.0,12,14,16,6.0,7,11,1.0,7,13,11,11,14,5.0,8.0,5.0,3.0
3,9.0,10,5,1.0,8,8,9,6.0,9,9,7.0,10,9,8,4,6,5.0,5.0,2.0,2.0
4,6.0,6,6,5.0,8,11,7,6.0,5,15,5.0,14,7,16,16,10,2.0,3.0,7.0,6.0
5,1.0,14,11,1.0,8,11,11,8.0,8,13,5.0,6,7,11,15,13,8.0,8.0,2.0,9.0
6,5.0,11,13,9.0,9,16,8,9.0,9,12,6.0,15,12,11,12,8,7.0,8.0,9.0,5.0
7,8.0,4,17,9.0,14,8,9,2.0,7,4,9.0,9,8,8,10,12,2.0,8.0,2.0,5.0
