As data comes in many shapes and forms, pandas aims to be flexible with regard to handling missing data. While NaN is the default missing value marker for reasons of computational speed and convenience, we need to be able to easily detect this value with data of different types: floating point, integer, boolean, and general object.

#np.nan is used to represent missing data in pandas.
Reindexing allows you to change/add/delete the index on a specified axis. This returns a copy of the data.

In [1]:
import numpy as np
import pandas as pd

In [2]:
s=pd.Series([1,4,np.nan,6,8])

In [3]:
dates=pd.date_range('20230822',periods=8)

In [4]:
df=pd.DataFrame(np.random.randn(8,4),index=dates,columns=list('PQRS'))

In [5]:
df1=df.reindex(index=dates[0:4],columns=list(df.columns)+['E'])

In [6]:
df1.loc[dates[0]:dates[1],'E']=1

In [7]:
df1

Unnamed: 0,P,Q,R,S,E
2023-08-22,-1.500691,0.933382,0.86168,0.393773,1.0
2023-08-23,-1.567193,-0.16376,-1.210168,-1.415711,1.0
2023-08-24,-1.326429,-0.779503,-0.954912,-0.208629,
2023-08-25,1.744253,-0.089047,0.833903,-0.075474,


In [9]:
#To drop any rows that have missing data.
df1.dropna(how='any')

Unnamed: 0,P,Q,R,S,E
2023-08-22,-1.500691,0.933382,0.86168,0.393773,1.0
2023-08-23,-1.567193,-0.16376,-1.210168,-1.415711,1.0


In [11]:
#Filling missing data.
df1.fillna(value=5)

Unnamed: 0,P,Q,R,S,E
2023-08-22,-1.500691,0.933382,0.86168,0.393773,1.0
2023-08-23,-1.567193,-0.16376,-1.210168,-1.415711,1.0
2023-08-24,-1.326429,-0.779503,-0.954912,-0.208629,5.0
2023-08-25,1.744253,-0.089047,0.833903,-0.075474,5.0


In [12]:
#To get the boolean mask where values are nan.
pd.isna(df1)

Unnamed: 0,P,Q,R,S,E
2023-08-22,False,False,False,False,False
2023-08-23,False,False,False,False,False
2023-08-24,False,False,False,False,True
2023-08-25,False,False,False,False,True
