## 결측값 확인 (isna, isnull, notna, notnull)

In [16]:
import pandas as pd
import numpy as np

col  = ['col1','col2','col3','col4']
row  = ['row1','row2','row3']
data = [[1,2,pd.NA,4],
        [np.nan,6,7,8],
        [9,10,11,None]]
df = pd.DataFrame(data,row,col)
print(df)


      col1  col2  col3  col4
row1   1.0     2  <NA>   4.0
row2   NaN     6     7   8.0
row3   9.0    10    11   NaN


In [17]:
print(df.isna())
print()
print(df.isnull())
print()
print(df.notna())
print()
print(df.notnull())



       col1   col2   col3   col4
row1  False  False   True  False
row2   True  False  False  False
row3  False  False  False   True

       col1   col2   col3   col4
row1  False  False   True  False
row2   True  False  False  False
row3  False  False  False   True

       col1  col2   col3   col4
row1   True  True  False   True
row2  False  True   True   True
row3   True  True   True  False

       col1  col2   col3   col4
row1   True  True  False   True
row2  False  True   True   True
row3   True  True   True  False


## 결측값 제거 (dropna)

In [18]:
col  = ['col1','col2','col3','col4','col5']
row  = ['row1','row2','row3','row4']
data = [[1,2,3,pd.NA,5],[6,pd.NA,8,pd.NA,10],[11,12,13,14,15],[pd.NA,pd.NA,pd.NA,pd.NA,pd.NA]]
df = pd.DataFrame(data,row,col)
print(df)

      col1  col2  col3  col4  col5
row1     1     2     3  <NA>     5
row2     6  <NA>     8  <NA>    10
row3    11    12    13    14    15
row4  <NA>  <NA>  <NA>  <NA>  <NA>


In [19]:
print(df.dropna(axis=0))

     col1 col2 col3 col4 col5
row3   11   12   13   14   15


In [20]:
print(df.dropna(axis=1))

Empty DataFrame
Columns: []
Index: [row1, row2, row3, row4]


In [21]:
print(df.dropna(how='any'))

     col1 col2 col3 col4 col5
row3   11   12   13   14   15


In [22]:
print(df.dropna(how='all'))

     col1  col2 col3  col4 col5
row1    1     2    3  <NA>    5
row2    6  <NA>    8  <NA>   10
row3   11    12   13    14   15


In [23]:
print(df.dropna(thresh=3))

     col1  col2 col3  col4 col5
row1    1     2    3  <NA>    5
row2    6  <NA>    8  <NA>   10
row3   11    12   13    14   15


In [24]:
print(df.dropna(subset=['col1','col2']))


     col1 col2 col3  col4 col5
row1    1    2    3  <NA>    5
row3   11   12   13    14   15


In [26]:
df.dropna(inplace=True)
print(df)

     col1 col2 col3 col4 col5
row3   11   12   13   14   15


## 결측값 없는 인덱스 확인 (first_valid_index / last_valid_index)

In [27]:
col  = ['col1','col2']
row  = ['row1','row2','row3','row4','row5']
data = [[np.nan,np.nan],[pd.NA,4],[pd.NA,pd.NaT],[5,6],[np.nan,pd.NA]]
df = pd.DataFrame(data,row,col)
print(df)

      col1  col2
row1   NaN   NaN
row2  <NA>     4
row3  <NA>   NaT
row4     5     6
row5   NaN  <NA>


In [29]:
print(df.first_valid_index())
print()
print(df.last_valid_index())

row2

row4


## 결측값 변경 (fillna / backfill / bfill / pad / ffill)

In [34]:
col  = ['col1','col2','col3','col4','col5']
row  = ['row1','row2','row3','row4','row5']
na = np.nan
data = [[na, 2,na, 4,na],
        [ 6, 7,na, 9,na],
        [11,na,na,14,15],
        [na,17,na,na,20],
        [na,22,na,na,25]]
df = pd.DataFrame(data,row,col)
print(df)

      col1  col2  col3  col4  col5
row1   NaN   2.0   NaN   4.0   NaN
row2   6.0   7.0   NaN   9.0   NaN
row3  11.0   NaN   NaN  14.0  15.0
row4   NaN  17.0   NaN   NaN  20.0
row5   NaN  22.0   NaN   NaN  25.0


In [32]:
print(df.fillna('A'))

      col1  col2 col3  col4  col5
row1     A   2.0    A   4.0     A
row2   6.0   7.0    A   9.0     A
row3  11.0     A    A  14.0  15.0
row4     A  17.0    A     A  20.0
row5     A  22.0    A     A  25.0


In [35]:
dict = {'col1':'A','col2':'B','col3':'C','col4':'D','col5':'E'}
print(df.fillna(value=dict))

      col1  col2 col3  col4  col5
row1     A   2.0    C   4.0     E
row2   6.0   7.0    C   9.0     E
row3  11.0     B    C  14.0  15.0
row4     A  17.0    C     D  20.0
row5     A  22.0    C     D  25.0


In [None]:
# method인수를 사용하는 경우
# method인수에 bfill을 입력할 경우 결측값이 바로 아래값과 동일하게 설정됩니다.
print(df.fillna(method='bfill'))

      col1  col2  col3  col4  col5
row1   6.0   2.0   NaN   4.0  15.0
row2   6.0   7.0   NaN   9.0  15.0
row3  11.0  17.0   NaN  14.0  15.0
row4   NaN  17.0   NaN   NaN  20.0
row5   NaN  22.0   NaN   NaN  25.0


  print(df.fillna(method='bfill'))


: 