In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.DataFrame([[1]*4,
                    [np.nan, 2, np.nan, 0],
                   [3, 4, np.nan, 1],
                   [np.nan, np.nan, 1,np.nan],
                   [np.nan, 3, np.nan,1]],
                  columns=list('ABCD'))

In [3]:
df

Unnamed: 0,A,B,C,D
0,1.0,1.0,1.0,1.0
1,,2.0,,0.0
2,3.0,4.0,,1.0
3,,,1.0,
4,,3.0,,1.0


In [4]:
pd.isnull(df)

Unnamed: 0,A,B,C,D
0,False,False,False,False
1,True,False,True,False
2,False,False,True,False
3,True,True,False,True
4,True,False,True,False


In [5]:
df.fillna(0)

Unnamed: 0,A,B,C,D
0,1.0,1.0,1.0,1.0
1,0.0,2.0,0.0,0.0
2,3.0,4.0,0.0,1.0
3,0.0,0.0,1.0,0.0
4,0.0,3.0,0.0,1.0


In [6]:
df.fillna(method='ffill')

Unnamed: 0,A,B,C,D
0,1.0,1.0,1.0,1.0
1,1.0,2.0,1.0,0.0
2,3.0,4.0,1.0,1.0
3,3.0,4.0,1.0,1.0
4,3.0,3.0,1.0,1.0


In [7]:
values = {'A': 0, 'B': 1, 'C': 2, 'D': 3}
df.fillna(value=values)

Unnamed: 0,A,B,C,D
0,1.0,1.0,1.0,1.0
1,0.0,2.0,2.0,0.0
2,3.0,4.0,2.0,1.0
3,0.0,1.0,1.0,3.0
4,0.0,3.0,2.0,1.0


In [8]:
#Only replace the first NaN element.
df.fillna(value=values, limit=1)

Unnamed: 0,A,B,C,D
0,1.0,1.0,1.0,1.0
1,0.0,2.0,2.0,0.0
2,3.0,4.0,,1.0
3,,1.0,1.0,3.0
4,,3.0,,1.0


In [9]:
column_means = df.mean()
df.fillna(column_means)


Unnamed: 0,A,B,C,D
0,1.0,1.0,1.0,1.0
1,2.0,2.0,1.0,0.0
2,3.0,4.0,1.0,1.0
3,2.0,2.5,1.0,0.75
4,2.0,3.0,1.0,1.0


# Fill missing with max

In [33]:
df.to_csv('missing.csv')
df

Unnamed: 0,A,B,C,D
0,1.0,1.0,1.0,1.0
1,,2.0,,0.0
2,3.0,4.0,,1.0
3,,,1.0,
4,,3.0,,1.0


## fill missing with max of column

In [34]:
df.fillna(df.max())

Unnamed: 0,A,B,C,D
0,1.0,1.0,1.0,1.0
1,3.0,2.0,1.0,0.0
2,3.0,4.0,1.0,1.0
3,3.0,4.0,1.0,1.0
4,3.0,3.0,1.0,1.0


## fill missing with max of row

In [41]:
# transpose of df
df.T

Unnamed: 0,0,1,2,3,4
A,1.0,,3.0,,
B,1.0,2.0,4.0,,3.0
C,1.0,,,1.0,
D,1.0,0.0,1.0,,1.0


In [40]:
# trying to take max of the transposed df, which doesn't work
df.T.fillna(df.max())

Unnamed: 0,0,1,2,3,4
A,1.0,,3.0,,
B,1.0,2.0,4.0,,3.0
C,1.0,,,1.0,
D,1.0,0.0,1.0,,1.0


In [42]:
# taking the max of the transposed df over the other axis
df.T.fillna(df.max(axis=1))

Unnamed: 0,0,1,2,3,4
A,1.0,2.0,3.0,1.0,3.0
B,1.0,2.0,4.0,1.0,3.0
C,1.0,2.0,4.0,1.0,3.0
D,1.0,0.0,1.0,1.0,1.0


In [45]:
# another way for max, by taking the max of the transposed df
df.T.fillna(df.T.max())

Unnamed: 0,0,1,2,3,4
A,1.0,2.0,3.0,1.0,3.0
B,1.0,2.0,4.0,1.0,3.0
C,1.0,2.0,4.0,1.0,3.0
D,1.0,0.0,1.0,1.0,1.0


In [46]:
# after filling with max, transposing back
df.T.fillna(df.max(axis=1)).T

Unnamed: 0,A,B,C,D
0,1.0,1.0,1.0,1.0
1,2.0,2.0,2.0,0.0
2,3.0,4.0,4.0,1.0
3,1.0,1.0,1.0,1.0
4,3.0,3.0,3.0,1.0


# A by max, B by min, C by mean, D by median

In [56]:
values = {'A': df.A.max(), 'B': df.B.min(), 'C': df.C.mean(), 'D': df.D.median()}
df.fillna(value=values)

Unnamed: 0,A,B,C,D
0,1.0,1.0,1.0,1.0
1,3.0,2.0,1.0,0.0
2,3.0,4.0,1.0,1.0
3,3.0,1.0,1.0,1.0
4,3.0,3.0,1.0,1.0


In [57]:
values = {'A': df.A.max(), 'B': df.B.min(), 'C': df.C.mean(), 'D': np.nanmedian(df.D)}
df.fillna(value=values)

Unnamed: 0,A,B,C,D
0,1.0,1.0,1.0,1.0
1,3.0,2.0,1.0,0.0
2,3.0,4.0,1.0,1.0
3,3.0,1.0,1.0,1.0
4,3.0,3.0,1.0,1.0
