# Essential Functionalities in Pandas

## Author: Sheikh Irfan Ullah Khan

### Contact Me: shirfan.math@gmail.com

#### 1. How to `Reindex` Pandas Objects

In [1]:
import pandas as pd
import numpy as np

In [2]:
obj = pd.Series([4, 7, -5, 3], index=['d', 'b', 'a', 'c'])
obj

d    4
b    7
a   -5
c    3
dtype: int64

In [3]:
obj1 = obj.reindex(index=['a', 'b', 'c', 'd'])
obj1

a   -5
b    7
c    3
d    4
dtype: int64

In [4]:
obj2 = obj.reindex(index=['a', 'b', 'c', 'd', 'e'])
obj2

a   -5.0
b    7.0
c    3.0
d    4.0
e    NaN
dtype: float64

In [5]:
obj3 = pd.Series([1, 2, 3], index=[0, 1, 2])
obj3

0    1
1    2
2    3
dtype: int64

In [6]:
obj3.reindex(index = np.arange(5))

0    1.0
1    2.0
2    3.0
3    NaN
4    NaN
dtype: float64

In [7]:
obj3.reindex(index = np.arange(5), method='ffill')

0    1
1    2
2    3
3    3
4    3
dtype: int64

In [8]:
# Create a data frame of 3 rows and 3 columns using numpy
obj4 = pd.DataFrame(np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
                    index = list('cab'),
                    columns = ['Abbottabd', 'Mansehra', 'Haripur'])
obj4

Unnamed: 0,Abbottabd,Mansehra,Haripur
c,1,2,3
a,4,5,6
b,7,8,9


In [9]:
# Or create a data frame of 3 rows and 3 columns using numpy
obj5 = pd.DataFrame(np.arange(9).reshape((3, 3)),
                  index=['a', 'b', 'c'],
                  columns=['Abbottabd', 'Mansehra', 'Haripur'])
obj5

Unnamed: 0,Abbottabd,Mansehra,Haripur
a,0,1,2
b,3,4,5
c,6,7,8


In [10]:
obj6 = obj4.reindex(['a', 'b', 'c', 'd'])
obj6

Unnamed: 0,Abbottabd,Mansehra,Haripur
a,4.0,5.0,6.0
b,7.0,8.0,9.0
c,1.0,2.0,3.0
d,,,


In [11]:
capitals = ['Dera', 'Abbottabd', 'Mansehra', 'Haripur']
print(obj4)
obj4.reindex(columns=capitals)

   Abbottabd  Mansehra  Haripur
c          1         2        3
a          4         5        6
b          7         8        9


Unnamed: 0,Dera,Abbottabd,Mansehra,Haripur
c,,1,2,3
a,,4,5,6
b,,7,8,9


---
#### 2. How to `Drop` Entries from an Axis

In [12]:
# Create a series
s = pd.Series(np.arange(5), index=['a', 'b', 'c', 'd', 'e'])
s

a    0
b    1
c    2
d    3
e    4
dtype: int32

In [13]:
s.drop('a')

b    1
c    2
d    3
e    4
dtype: int32

In [14]:
ns = s.drop('a')
ns

b    1
c    2
d    3
e    4
dtype: int32

In [15]:
s.drop(['a', 'e'])

b    1
c    2
d    3
dtype: int32

In [16]:
# Create a data frame of 4 rows and 4 columns using numpy
df = pd.DataFrame(np.arange(16).reshape((4, 4)),
                  index=['a', 'b', 'd', 'e'],
                  columns=['Abbottabd', 'Mansehra', 'Haripur', 'Dera'])
df

Unnamed: 0,Abbottabd,Mansehra,Haripur,Dera
a,0,1,2,3
b,4,5,6,7
d,8,9,10,11
e,12,13,14,15


In [17]:
df.drop(['a', 'e']) # By default axis=0

Unnamed: 0,Abbottabd,Mansehra,Haripur,Dera
b,4,5,6,7
d,8,9,10,11


In [18]:
df.drop('Dera', axis = 1) # To rmove column we mention the axis = 1

Unnamed: 0,Abbottabd,Mansehra,Haripur
a,0,1,2
b,4,5,6
d,8,9,10
e,12,13,14


In [19]:
df.drop(['Haripur', 'Dera'], axis = 1) 

Unnamed: 0,Abbottabd,Mansehra
a,0,1
b,4,5
d,8,9
e,12,13


In [20]:
# Here we can recover the data frame in original form
df

Unnamed: 0,Abbottabd,Mansehra,Haripur,Dera
a,0,1,2,3
b,4,5,6,7
d,8,9,10,11
e,12,13,14,15


In [21]:
df.drop(['Haripur', 'Dera'], axis = 1, inplace = True)

In [22]:
# Setting inplace = True will change the original data frame
df

Unnamed: 0,Abbottabd,Mansehra
a,0,1
b,4,5
d,8,9
e,12,13


In [23]:
# Or create a data frame of 4 rows and 4 columns using numpy randomly
df1 = pd.DataFrame(np.random.randint(0, 100, size=(4, 4)),
                   index=['a', 'b', 'd', 'e'],
                   columns=['Abbottabd', 'Mansehra', 'Haripur', 'Dera'])
df1

Unnamed: 0,Abbottabd,Mansehra,Haripur,Dera
a,17,29,59,41
b,35,34,80,41
d,52,17,73,34
e,48,16,41,75


---
#### 3. Arithmetic and Data Allignment

In [24]:
s1 = pd.Series([1, 2, 3, 4], index = ['a', 'b', 'd', 'e'])
s1

a    1
b    2
d    3
e    4
dtype: int64

In [25]:
s2 = pd.Series([1, 2, 3, 4, 5], index = ['a', 'c', 'e', 'f', 'g'])
s2

a    1
c    2
e    3
f    4
g    5
dtype: int64

In [26]:
s1 + s2

a    2.0
b    NaN
c    NaN
d    NaN
e    7.0
f    NaN
g    NaN
dtype: float64

In [27]:
df2 = pd.DataFrame(np.arange(9).reshape((3, 3)),
                   columns=['a', 'c', 'd'],
                   index=['Abbottabd', 'Mansehra', 'Haripur'])
df2

Unnamed: 0,a,c,d
Abbottabd,0,1,2
Mansehra,3,4,5
Haripur,6,7,8


In [28]:
df3 = pd.DataFrame(np.arange(16).reshape((4, 4)),
                   columns=['a', 'b', 'd', 'e'],
                   index=['Abbottabd', 'Mansehra', 'Haripur', 'Dera'])
df3

Unnamed: 0,a,b,d,e
Abbottabd,0,1,2,3
Mansehra,4,5,6,7
Haripur,8,9,10,11
Dera,12,13,14,15


In [29]:
print('df2:'); print(df2)
print()
print('df3:'); print(df3)
df2 + df3

df2:
           a  c  d
Abbottabd  0  1  2
Mansehra   3  4  5
Haripur    6  7  8

df3:
            a   b   d   e
Abbottabd   0   1   2   3
Mansehra    4   5   6   7
Haripur     8   9  10  11
Dera       12  13  14  15


Unnamed: 0,a,b,c,d,e
Abbottabd,0.0,,,4.0,
Dera,,,,,
Haripur,14.0,,,18.0,
Mansehra,7.0,,,11.0,


---
#### 4. How to `Fill` Missing Values

In [30]:
df4 = pd.DataFrame(np.arange(12).reshape((3, 4)), columns=list('abcd'))
df4

Unnamed: 0,a,b,c,d
0,0,1,2,3
1,4,5,6,7
2,8,9,10,11


In [31]:
df5 = pd.DataFrame(np.arange(20).reshape((4, 5)), columns=list('abcde'))
df5

Unnamed: 0,a,b,c,d,e
0,0,1,2,3,4
1,5,6,7,8,9
2,10,11,12,13,14
3,15,16,17,18,19


In [32]:
df5.loc[1, 'd'] = np.nan
df5

Unnamed: 0,a,b,c,d,e
0,0,1,2,3.0,4
1,5,6,7,,9
2,10,11,12,13.0,14
3,15,16,17,18.0,19


In [33]:
print('df4:'); print(df4)
print()
print('df5:'); print(df5)
df4 + df5

df4:
   a  b   c   d
0  0  1   2   3
1  4  5   6   7
2  8  9  10  11

df5:
    a   b   c     d   e
0   0   1   2   3.0   4
1   5   6   7   NaN   9
2  10  11  12  13.0  14
3  15  16  17  18.0  19


Unnamed: 0,a,b,c,d,e
0,0.0,2.0,4.0,6.0,
1,9.0,11.0,13.0,,
2,18.0,20.0,22.0,24.0,
3,,,,,


In [34]:
print('df4:'); print(df4)
print()
print('df5:'); print(df5)
df4.add(df5, fill_value=0)

df4:
   a  b   c   d
0  0  1   2   3
1  4  5   6   7
2  8  9  10  11

df5:
    a   b   c     d   e
0   0   1   2   3.0   4
1   5   6   7   NaN   9
2  10  11  12  13.0  14
3  15  16  17  18.0  19


Unnamed: 0,a,b,c,d,e
0,0.0,2.0,4.0,6.0,4.0
1,9.0,11.0,13.0,7.0,9.0
2,18.0,20.0,22.0,24.0,14.0
3,15.0,16.0,17.0,18.0,19.0


In [35]:
# To check the types of df4 and df5 before addition and after addition
print('Before addition:')

print(type(df4.loc[1, 'c'])); print(type(df5.loc[1, 'c']))
print()

print('After addition:')
print(type(df4.add(df5, fill_value=0).loc[1, 'c']))

Before addition:
<class 'numpy.int32'>
<class 'numpy.int32'>

After addition:
<class 'numpy.float64'>


In [36]:
# Similarly we can use another value to fill the missing values instead of 0
print('df4:'); print(df4)
print()
print('df5:'); print(df5)
df4.add(df5, fill_value=5)

df4:
   a  b   c   d
0  0  1   2   3
1  4  5   6   7
2  8  9  10  11

df5:
    a   b   c     d   e
0   0   1   2   3.0   4
1   5   6   7   NaN   9
2  10  11  12  13.0  14
3  15  16  17  18.0  19


Unnamed: 0,a,b,c,d,e
0,0.0,2.0,4.0,6.0,9.0
1,9.0,11.0,13.0,12.0,14.0
2,18.0,20.0,22.0,24.0,19.0
3,20.0,21.0,22.0,23.0,24.0


#### 5. Scalar Operations

In [37]:
print(df4)
# Scalar division
1/df4

   a  b   c   d
0  0  1   2   3
1  4  5   6   7
2  8  9  10  11


Unnamed: 0,a,b,c,d
0,inf,1.0,0.5,0.333333
1,0.25,0.2,0.166667,0.142857
2,0.125,0.111111,0.1,0.090909


In [38]:
# Scalar multiplication
df4 * 4

Unnamed: 0,a,b,c,d
0,0,4,8,12
1,16,20,24,28
2,32,36,40,44


In [39]:
# Scalar subtraction
df4 - 2

Unnamed: 0,a,b,c,d
0,-2,-1,0,1
1,2,3,4,5
2,6,7,8,9


In [40]:
print(df4)
df4.rdiv(1)

   a  b   c   d
0  0  1   2   3
1  4  5   6   7
2  8  9  10  11


Unnamed: 0,a,b,c,d
0,inf,1.0,0.5,0.333333
1,0.25,0.2,0.166667,0.142857
2,0.125,0.111111,0.1,0.090909


In [41]:
print(df4)
df4.rmul(4)

   a  b   c   d
0  0  1   2   3
1  4  5   6   7
2  8  9  10  11


Unnamed: 0,a,b,c,d
0,0,4,8,12
1,16,20,24,28
2,32,36,40,44


In [42]:
print(df4)
df4.rsub(2)

   a  b   c   d
0  0  1   2   3
1  4  5   6   7
2  8  9  10  11


Unnamed: 0,a,b,c,d
0,2,1,0,-1
1,-2,-3,-4,-5
2,-6,-7,-8,-9


In [43]:
print(df4)
df4.rpow(2)

   a  b   c   d
0  0  1   2   3
1  4  5   6   7
2  8  9  10  11


Unnamed: 0,a,b,c,d
0,1,2,4,8
1,16,32,64,128
2,256,512,1024,2048


In [44]:
print(df4)
df4.radd(5)

   a  b   c   d
0  0  1   2   3
1  4  5   6   7
2  8  9  10  11


Unnamed: 0,a,b,c,d
0,5,6,7,8
1,9,10,11,12
2,13,14,15,16
