In [4]:
# create a series of integers contaning missing values:
import numpy as np
import pandas as pd
pd.Series([1,2,np.nan,4], dtype = pd.Int64Dtype())

0       1
1       2
2    <NA>
3       4
dtype: Int64

In [6]:
# check which element is missing
s = pd.Series([5,6, np.nan])
s.isna()

0    False
1    False
2     True
dtype: bool

In [7]:
s.notnull()

0     True
1     True
2    False
dtype: bool

In [8]:
# delete missing data
s.dropna()

0    5.0
1    6.0
dtype: float64

In [9]:
df = pd.DataFrame({"name": ['Alfred', 'Batman', 'Catwoman'],
                   "toy": [np.nan, 'Batmobile', 'Bullwhip'],
                   "born": [pd.NaT, pd.Timestamp("1940-04-25"),
                            pd.NaT]})
df

Unnamed: 0,name,toy,born
0,Alfred,,NaT
1,Batman,Batmobile,1940-04-25
2,Catwoman,Bullwhip,NaT


In [10]:
# xoas theo dong
df.dropna()

Unnamed: 0,name,toy,born
1,Batman,Batmobile,1940-04-25


In [11]:
# xoas theo cot
df.dropna(axis = 1)

Unnamed: 0,name
0,Alfred
1,Batman
2,Catwoman


In [12]:
# xoa tat ca cac gia tri khuyet thieu trong df
df.dropna(how='all')

Unnamed: 0,name,toy,born
0,Alfred,,NaT
1,Batman,Batmobile,1940-04-25
2,Catwoman,Bullwhip,NaT


In [13]:
# keep rows with at least 2 non-missing values:
df.dropna(thresh = 2)

Unnamed: 0,name,toy,born
1,Batman,Batmobile,1940-04-25
2,Catwoman,Bullwhip,NaT


In [14]:
# chon cot can loai bo gia tri bij khuyet
df.dropna(subset = ['name', 'toy'])

Unnamed: 0,name,toy,born
1,Batman,Batmobile,1940-04-25
2,Catwoman,Bullwhip,NaT


In [15]:
# replace missing data
df = pd.DataFrame([[np.nan, 2, np.nan, 0],
                   [3, 4, np.nan, 1],
                   [np.nan, np.nan, np.nan, 5],
                   [np.nan, 3, np.nan, 4]],
                  columns=list("ABCD"))
df

Unnamed: 0,A,B,C,D
0,,2.0,,0
1,3.0,4.0,,1
2,,,,5
3,,3.0,,4


In [17]:
# dien tat ca gia tri bi thieu = phan tu nam trong ()
df.fillna(5)

Unnamed: 0,A,B,C,D
0,5.0,2.0,5.0,0
1,3.0,4.0,5.0,1
2,5.0,5.0,5.0,5
3,5.0,3.0,5.0,4


In [18]:
# dien bang gia tri ow truoc(tren)
df.fillna(method = 'ffill')

Unnamed: 0,A,B,C,D
0,,2.0,,0
1,3.0,4.0,,1
2,3.0,4.0,,5
3,3.0,3.0,,4


In [20]:
values = {"A": 0, "B": 1, "C": 2, "D": 3} 
df.fillna(value = values )

Unnamed: 0,A,B,C,D
0,0.0,2.0,2.0,0
1,3.0,4.0,2.0,1
2,0.0,1.0,2.0,5
3,0.0,3.0,2.0,4


In [22]:
# noi suuy 
display(df)
df.interpolate()

Unnamed: 0,A,B,C,D
0,,2.0,,0
1,3.0,4.0,,1
2,,,,5
3,,3.0,,4


Unnamed: 0,A,B,C,D
0,,2.0,,0
1,3.0,4.0,,1
2,3.0,3.5,,5
3,3.0,3.0,,4


In [26]:
import numpy as np
from scipy.interpolate import interp1d

# Dữ liệu gốc
x = [1, 2, 3, 4, 6, 7, 9]
y = [3, np.nan, 5, 6, 8, np.nan, 10]

# Tạo hàm nội suy spline
f = interp1d(x, y, kind='cubic')

# Điền giá trị khuyết thiếu
x_interp = np.arange(1, 9)
y_interp = f(x_interp)

# In kết quả
y_interp

array([nan, nan, nan, nan, nan, nan, nan, nan])