# Handling Missing Values

* Real-life data comes with missing values
* Do not ignore them
* Proper ways to handle missing values

In [1]:
import numpy as np
import pandas as pd

df = pd.DataFrame(np.random.randint(10, size=(8,6)), columns=list("ABCDEF"))

df.iloc[[1,4],[0,3]] = np.nan
df.iloc[[3,7],[1,2,4]] = None

df

Unnamed: 0,A,B,C,D,E,F
0,7.0,5.0,9.0,8.0,2.0,2
1,,2.0,7.0,,1.0,9
2,4.0,4.0,7.0,2.0,1.0,0
3,6.0,,,0.0,,8
4,,8.0,9.0,,9.0,7
5,0.0,1.0,9.0,0.0,8.0,4
6,7.0,5.0,4.0,7.0,3.0,3
7,3.0,,,9.0,,7


In [2]:
df.dtypes

A    float64
B    float64
C    float64
D    float64
E    float64
F      int64
dtype: object

* A new missing data type (<NA>) introduced with Pandas 1.0 which is an integer type missing value representation.

In [3]:
df = pd.DataFrame(
    np.random.randint(10, size=(8, 6)), 
    columns=list("ABCDEF"),
    dtype=pd.Int64Dtype()
)

df.iloc[[1,4],[0,3]] = np.nan
df.iloc[[3,7],[1,2,4]] = None

df

Unnamed: 0,A,B,C,D,E,F
0,9.0,2.0,0.0,7.0,0.0,9
1,,1.0,4.0,,6.0,1
2,4.0,5.0,3.0,9.0,4.0,7
3,9.0,,,9.0,,2
4,,7.0,0.0,,0.0,2
5,5.0,9.0,6.0,5.0,0.0,4
6,9.0,3.0,7.0,7.0,7.0,1
7,3.0,,,8.0,,9


In [4]:
df.dtypes

A    Int64
B    Int64
C    Int64
D    Int64
E    Int64
F    Int64
dtype: object

## 1. Finding missing values - isna

In [5]:
df

Unnamed: 0,A,B,C,D,E,F
0,9.0,2.0,0.0,7.0,0.0,9
1,,1.0,4.0,,6.0,1
2,4.0,5.0,3.0,9.0,4.0,7
3,9.0,,,9.0,,2
4,,7.0,0.0,,0.0,2
5,5.0,9.0,6.0,5.0,0.0,4
6,9.0,3.0,7.0,7.0,7.0,1
7,3.0,,,8.0,,9


In [6]:
df.isna()

Unnamed: 0,A,B,C,D,E,F
0,False,False,False,False,False,False
1,True,False,False,True,False,False
2,False,False,False,False,False,False
3,False,True,True,False,True,False
4,True,False,False,True,False,False
5,False,False,False,False,False,False
6,False,False,False,False,False,False
7,False,True,True,False,True,False


## 2. Finding missing values - isnull

In [7]:
df.isnull()

Unnamed: 0,A,B,C,D,E,F
0,False,False,False,False,False,False
1,True,False,False,True,False,False
2,False,False,False,False,False,False
3,False,True,True,False,True,False
4,True,False,False,True,False,False
5,False,False,False,False,False,False
6,False,False,False,False,False,False
7,False,True,True,False,True,False


## 3. Finding missing values - notna

In [8]:
df.notna()

Unnamed: 0,A,B,C,D,E,F
0,True,True,True,True,True,True
1,False,True,True,False,True,True
2,True,True,True,True,True,True
3,True,False,False,True,False,True
4,False,True,True,False,True,True
5,True,True,True,True,True,True
6,True,True,True,True,True,True
7,True,False,False,True,False,True


## 4. Number of missing values - column

In [9]:
df

Unnamed: 0,A,B,C,D,E,F
0,9.0,2.0,0.0,7.0,0.0,9
1,,1.0,4.0,,6.0,1
2,4.0,5.0,3.0,9.0,4.0,7
3,9.0,,,9.0,,2
4,,7.0,0.0,,0.0,2
5,5.0,9.0,6.0,5.0,0.0,4
6,9.0,3.0,7.0,7.0,7.0,1
7,3.0,,,8.0,,9


In [10]:
df.isna().sum()

A    2
B    2
C    2
D    2
E    2
F    0
dtype: int64

In [11]:
df.isna().sum().sum()

10

## 5. Number of missing values - row

In [12]:
df

Unnamed: 0,A,B,C,D,E,F
0,9.0,2.0,0.0,7.0,0.0,9
1,,1.0,4.0,,6.0,1
2,4.0,5.0,3.0,9.0,4.0,7
3,9.0,,,9.0,,2
4,,7.0,0.0,,0.0,2
5,5.0,9.0,6.0,5.0,0.0,4
6,9.0,3.0,7.0,7.0,7.0,1
7,3.0,,,8.0,,9


In [13]:
df.isna().sum(axis=1)

0    0
1    2
2    0
3    3
4    2
5    0
6    0
7    3
dtype: int64

## 6. Dropping missing values

In [14]:
df

Unnamed: 0,A,B,C,D,E,F
0,9.0,2.0,0.0,7.0,0.0,9
1,,1.0,4.0,,6.0,1
2,4.0,5.0,3.0,9.0,4.0,7
3,9.0,,,9.0,,2
4,,7.0,0.0,,0.0,2
5,5.0,9.0,6.0,5.0,0.0,4
6,9.0,3.0,7.0,7.0,7.0,1
7,3.0,,,8.0,,9


In [15]:
df.dropna()

Unnamed: 0,A,B,C,D,E,F
0,9,2,0,7,0,9
2,4,5,3,9,4,7
5,5,9,6,5,0,4
6,9,3,7,7,7,1


## 7. Dropping missing values - axis parameter

In [16]:
df.dropna(axis=1)

Unnamed: 0,F
0,9
1,1
2,7
3,2
4,2
5,4
6,1
7,9


## 8. Dropping missing values - how parameter

In [17]:
df

Unnamed: 0,A,B,C,D,E,F
0,9.0,2.0,0.0,7.0,0.0,9
1,,1.0,4.0,,6.0,1
2,4.0,5.0,3.0,9.0,4.0,7
3,9.0,,,9.0,,2
4,,7.0,0.0,,0.0,2
5,5.0,9.0,6.0,5.0,0.0,4
6,9.0,3.0,7.0,7.0,7.0,1
7,3.0,,,8.0,,9


In [18]:
df.dropna(how="any")

Unnamed: 0,A,B,C,D,E,F
0,9,2,0,7,0,9
2,4,5,3,9,4,7
5,5,9,6,5,0,4
6,9,3,7,7,7,1


In [19]:
df.dropna(how="all")

Unnamed: 0,A,B,C,D,E,F
0,9.0,2.0,0.0,7.0,0.0,9
1,,1.0,4.0,,6.0,1
2,4.0,5.0,3.0,9.0,4.0,7
3,9.0,,,9.0,,2
4,,7.0,0.0,,0.0,2
5,5.0,9.0,6.0,5.0,0.0,4
6,9.0,3.0,7.0,7.0,7.0,1
7,3.0,,,8.0,,9


In [20]:
df.dropna(how="any", axis=1)

Unnamed: 0,F
0,9
1,1
2,7
3,2
4,2
5,4
6,1
7,9


In [21]:
df.dropna(how="all", axis=1)

Unnamed: 0,A,B,C,D,E,F
0,9.0,2.0,0.0,7.0,0.0,9
1,,1.0,4.0,,6.0,1
2,4.0,5.0,3.0,9.0,4.0,7
3,9.0,,,9.0,,2
4,,7.0,0.0,,0.0,2
5,5.0,9.0,6.0,5.0,0.0,4
6,9.0,3.0,7.0,7.0,7.0,1
7,3.0,,,8.0,,9


## 9. Dropping missing values - thresh parameter

* We can set a threshold based on the number of non-missing values in order for a row/column to be dropped.

* The thresh parameter requires a row or column to have at least the specified number of non-missing values not to be dropped.

* Cannot be combined with the how parameter

In [22]:
df

Unnamed: 0,A,B,C,D,E,F
0,9.0,2.0,0.0,7.0,0.0,9
1,,1.0,4.0,,6.0,1
2,4.0,5.0,3.0,9.0,4.0,7
3,9.0,,,9.0,,2
4,,7.0,0.0,,0.0,2
5,5.0,9.0,6.0,5.0,0.0,4
6,9.0,3.0,7.0,7.0,7.0,1
7,3.0,,,8.0,,9


In [23]:
df.dropna(thresh=4)

Unnamed: 0,A,B,C,D,E,F
0,9.0,2,0,7.0,0,9
1,,1,4,,6,1
2,4.0,5,3,9.0,4,7
4,,7,0,,0,2
5,5.0,9,6,5.0,0,4
6,9.0,3,7,7.0,7,1


In [24]:
df.dropna(thresh=2)

Unnamed: 0,A,B,C,D,E,F
0,9.0,2.0,0.0,7.0,0.0,9
1,,1.0,4.0,,6.0,1
2,4.0,5.0,3.0,9.0,4.0,7
3,9.0,,,9.0,,2
4,,7.0,0.0,,0.0,2
5,5.0,9.0,6.0,5.0,0.0,4
6,9.0,3.0,7.0,7.0,7.0,1
7,3.0,,,8.0,,9


## 10. Dropping missing values - thresh parameter - columns

In [25]:
df.dropna(thresh=6, axis=1)

Unnamed: 0,A,B,C,D,E,F
0,9.0,2.0,0.0,7.0,0.0,9
1,,1.0,4.0,,6.0,1
2,4.0,5.0,3.0,9.0,4.0,7
3,9.0,,,9.0,,2
4,,7.0,0.0,,0.0,2
5,5.0,9.0,6.0,5.0,0.0,4
6,9.0,3.0,7.0,7.0,7.0,1
7,3.0,,,8.0,,9


In [26]:
df.iloc[0, 0] = np.nan

df

Unnamed: 0,A,B,C,D,E,F
0,,2.0,0.0,7.0,0.0,9
1,,1.0,4.0,,6.0,1
2,4.0,5.0,3.0,9.0,4.0,7
3,9.0,,,9.0,,2
4,,7.0,0.0,,0.0,2
5,5.0,9.0,6.0,5.0,0.0,4
6,9.0,3.0,7.0,7.0,7.0,1
7,3.0,,,8.0,,9


In [27]:
df.dropna(thresh=6, axis=1)

Unnamed: 0,B,C,D,E,F
0,2.0,0.0,7.0,0.0,9
1,1.0,4.0,,6.0,1
2,5.0,3.0,9.0,4.0,7
3,,,9.0,,2
4,7.0,0.0,,0.0,2
5,9.0,6.0,5.0,0.0,4
6,3.0,7.0,7.0,7.0,1
7,,,8.0,,9


## 11. Dropping missing values - inplace

In [28]:
df.dropna(thresh=4)

Unnamed: 0,A,B,C,D,E,F
0,,2,0,7.0,0,9
1,,1,4,,6,1
2,4.0,5,3,9.0,4,7
4,,7,0,,0,2
5,5.0,9,6,5.0,0,4
6,9.0,3,7,7.0,7,1


In [29]:
df

Unnamed: 0,A,B,C,D,E,F
0,,2.0,0.0,7.0,0.0,9
1,,1.0,4.0,,6.0,1
2,4.0,5.0,3.0,9.0,4.0,7
3,9.0,,,9.0,,2
4,,7.0,0.0,,0.0,2
5,5.0,9.0,6.0,5.0,0.0,4
6,9.0,3.0,7.0,7.0,7.0,1
7,3.0,,,8.0,,9


In [30]:
df.dropna(thresh=4, inplace=True)

In [31]:
df

Unnamed: 0,A,B,C,D,E,F
0,,2,0,7.0,0,9
1,,1,4,,6,1
2,4.0,5,3,9.0,4,7
4,,7,0,,0,2
5,5.0,9,6,5.0,0,4
6,9.0,3,7,7.0,7,1


## 12. Dropping missing values - assign to the same variable

In [32]:
df = pd.DataFrame(
    np.random.randint(10, size=(8, 6)), 
    columns=list("ABCDEF")
)

df.iloc[[1,4],[0,3]] = np.nan
df.iloc[[3,7],[1,2,4]] = None

df

Unnamed: 0,A,B,C,D,E,F
0,6.0,2.0,6.0,3.0,3.0,2
1,,2.0,4.0,,1.0,1
2,3.0,0.0,3.0,6.0,1.0,8
3,1.0,,,4.0,,4
4,,0.0,1.0,,4.0,8
5,1.0,8.0,4.0,7.0,1.0,8
6,5.0,8.0,6.0,7.0,0.0,3
7,4.0,,,1.0,,4


In [33]:
df = df.dropna(thresh=4)

In [34]:
df

Unnamed: 0,A,B,C,D,E,F
0,6.0,2.0,6.0,3.0,3.0,2
1,,2.0,4.0,,1.0,1
2,3.0,0.0,3.0,6.0,1.0,8
4,,0.0,1.0,,4.0,8
5,1.0,8.0,4.0,7.0,1.0,8
6,5.0,8.0,6.0,7.0,0.0,3


## 13. Dropping missing values - subset parameter

In [35]:
df = pd.DataFrame(
    np.random.randint(10, size=(8, 6)), 
    columns=list("ABCDEF")
)

df.iloc[[1,4],[0]] = np.nan
df.iloc[[4,5],[1]] = np.nan
df.iloc[[3,6],[2]] = np.nan
df.iloc[[1,7],[3]] = np.nan

df

Unnamed: 0,A,B,C,D,E,F
0,5.0,4.0,8.0,5.0,5,7
1,,1.0,3.0,,4,9
2,5.0,7.0,7.0,3.0,0,0
3,9.0,4.0,,4.0,5,1
4,,,5.0,0.0,8,9
5,0.0,,4.0,7.0,6,3
6,9.0,2.0,,8.0,2,4
7,6.0,5.0,5.0,,8,8


In [36]:
df.dropna(subset=["A", "B"])

Unnamed: 0,A,B,C,D,E,F
0,5.0,4.0,8.0,5.0,5,7
2,5.0,7.0,7.0,3.0,0,0
3,9.0,4.0,,4.0,5,1
6,9.0,2.0,,8.0,2,4
7,6.0,5.0,5.0,,8,8


In [37]:
df.dropna(subset=["A"])

Unnamed: 0,A,B,C,D,E,F
0,5.0,4.0,8.0,5.0,5,7
2,5.0,7.0,7.0,3.0,0,0
3,9.0,4.0,,4.0,5,1
5,0.0,,4.0,7.0,6,3
6,9.0,2.0,,8.0,2,4
7,6.0,5.0,5.0,,8,8


## 14. Dropping missing values - subset and thresh

In [38]:
df

Unnamed: 0,A,B,C,D,E,F
0,5.0,4.0,8.0,5.0,5,7
1,,1.0,3.0,,4,9
2,5.0,7.0,7.0,3.0,0,0
3,9.0,4.0,,4.0,5,1
4,,,5.0,0.0,8,9
5,0.0,,4.0,7.0,6,3
6,9.0,2.0,,8.0,2,4
7,6.0,5.0,5.0,,8,8


In [39]:
df.dropna(subset=["A", "B"], thresh=1)

Unnamed: 0,A,B,C,D,E,F
0,5.0,4.0,8.0,5.0,5,7
1,,1.0,3.0,,4,9
2,5.0,7.0,7.0,3.0,0,0
3,9.0,4.0,,4.0,5,1
5,0.0,,4.0,7.0,6,3
6,9.0,2.0,,8.0,2,4
7,6.0,5.0,5.0,,8,8


## 15. Filling missing values - constant

In [40]:
df

Unnamed: 0,A,B,C,D,E,F
0,5.0,4.0,8.0,5.0,5,7
1,,1.0,3.0,,4,9
2,5.0,7.0,7.0,3.0,0,0
3,9.0,4.0,,4.0,5,1
4,,,5.0,0.0,8,9
5,0.0,,4.0,7.0,6,3
6,9.0,2.0,,8.0,2,4
7,6.0,5.0,5.0,,8,8


In [41]:
df.fillna(value=0)

Unnamed: 0,A,B,C,D,E,F
0,5.0,4.0,8.0,5.0,5,7
1,0.0,1.0,3.0,0.0,4,9
2,5.0,7.0,7.0,3.0,0,0
3,9.0,4.0,0.0,4.0,5,1
4,0.0,0.0,5.0,0.0,8,9
5,0.0,0.0,4.0,7.0,6,3
6,9.0,2.0,0.0,8.0,2,4
7,6.0,5.0,5.0,0.0,8,8


## 16. Filling missing values - constant - 2

In [42]:
df.fillna(value={"A": 0, "B": 10})

Unnamed: 0,A,B,C,D,E,F
0,5.0,4.0,8.0,5.0,5,7
1,0.0,1.0,3.0,,4,9
2,5.0,7.0,7.0,3.0,0,0
3,9.0,4.0,,4.0,5,1
4,0.0,10.0,5.0,0.0,8,9
5,0.0,10.0,4.0,7.0,6,3
6,9.0,2.0,,8.0,2,4
7,6.0,5.0,5.0,,8,8


## 17. Filling missing values - basic statistics

In [43]:
df

Unnamed: 0,A,B,C,D,E,F
0,5.0,4.0,8.0,5.0,5,7
1,,1.0,3.0,,4,9
2,5.0,7.0,7.0,3.0,0,0
3,9.0,4.0,,4.0,5,1
4,,,5.0,0.0,8,9
5,0.0,,4.0,7.0,6,3
6,9.0,2.0,,8.0,2,4
7,6.0,5.0,5.0,,8,8


In [44]:
df["A"] = df["A"].fillna(value=df["A"].mean())

df

Unnamed: 0,A,B,C,D,E,F
0,5.0,4.0,8.0,5.0,5,7
1,5.666667,1.0,3.0,,4,9
2,5.0,7.0,7.0,3.0,0,0
3,9.0,4.0,,4.0,5,1
4,5.666667,,5.0,0.0,8,9
5,0.0,,4.0,7.0,6,3
6,9.0,2.0,,8.0,2,4
7,6.0,5.0,5.0,,8,8


In [45]:
df["B"] = df["B"].fillna(value=df["B"].min())

df

Unnamed: 0,A,B,C,D,E,F
0,5.0,4.0,8.0,5.0,5,7
1,5.666667,1.0,3.0,,4,9
2,5.0,7.0,7.0,3.0,0,0
3,9.0,4.0,,4.0,5,1
4,5.666667,1.0,5.0,0.0,8,9
5,0.0,1.0,4.0,7.0,6,3
6,9.0,2.0,,8.0,2,4
7,6.0,5.0,5.0,,8,8


## 18. Filling missing values - with another DataFrame

In [46]:
df1 = pd.DataFrame(np.random.randint(1, 10, size=(4, 5)))
df1.iloc[[0, 2], [1, 3]] = np.nan

df1

Unnamed: 0,0,1,2,3,4
0,7,,1,,3
1,4,4.0,2,4.0,7
2,3,,2,,9
3,4,1.0,1,5.0,2


In [47]:
df2 = pd.DataFrame(np.random.randint(1, 10, size=(4, 5)))

df2

Unnamed: 0,0,1,2,3,4
0,3,5,2,1,9
1,9,9,5,3,8
2,5,6,3,8,8
3,8,2,3,9,2


In [48]:
df1 = df1.fillna(df2)

df1

Unnamed: 0,0,1,2,3,4
0,7,5.0,1,1.0,3
1,4,4.0,2,4.0,7
2,3,6.0,2,8.0,9
3,4,1.0,1,5.0,2


## 19. Filling missing values - method parameter

In [49]:
df = pd.DataFrame(
    np.random.randint(1, 50, size=(10, 5)),
    columns=["col1", "col2", "col3", "col4", "col5"],
    index=pd.date_range(start="2022-12-05", periods=10, freq="D")
)

df.iloc[[3, 4], [0, 1]] = np.nan
df.iloc[[5], [0]] = np.nan
df.iloc[[5, 6], [2]] = np.nan

df

Unnamed: 0,col1,col2,col3,col4,col5
2022-12-05,42.0,3.0,3.0,49,24
2022-12-06,11.0,37.0,35.0,27,36
2022-12-07,22.0,5.0,15.0,24,30
2022-12-08,,,48.0,25,16
2022-12-09,,,22.0,18,35
2022-12-10,,41.0,,31,9
2022-12-11,21.0,39.0,,14,26
2022-12-12,48.0,32.0,4.0,44,35
2022-12-13,1.0,20.0,39.0,9,5
2022-12-14,33.0,32.0,20.0,4,37


In [50]:
df.fillna(method="ffill")

Unnamed: 0,col1,col2,col3,col4,col5
2022-12-05,42.0,3.0,3.0,49,24
2022-12-06,11.0,37.0,35.0,27,36
2022-12-07,22.0,5.0,15.0,24,30
2022-12-08,22.0,5.0,48.0,25,16
2022-12-09,22.0,5.0,22.0,18,35
2022-12-10,22.0,41.0,22.0,31,9
2022-12-11,21.0,39.0,22.0,14,26
2022-12-12,48.0,32.0,4.0,44,35
2022-12-13,1.0,20.0,39.0,9,5
2022-12-14,33.0,32.0,20.0,4,37


## 20. Filling missing values - method parameter 2 

In [51]:
df["col3"] = df["col3"].fillna(method="ffill")

df

Unnamed: 0,col1,col2,col3,col4,col5
2022-12-05,42.0,3.0,3.0,49,24
2022-12-06,11.0,37.0,35.0,27,36
2022-12-07,22.0,5.0,15.0,24,30
2022-12-08,,,48.0,25,16
2022-12-09,,,22.0,18,35
2022-12-10,,41.0,22.0,31,9
2022-12-11,21.0,39.0,22.0,14,26
2022-12-12,48.0,32.0,4.0,44,35
2022-12-13,1.0,20.0,39.0,9,5
2022-12-14,33.0,32.0,20.0,4,37


## 21. Filling missing values - method parameter 3

In [52]:
df

Unnamed: 0,col1,col2,col3,col4,col5
2022-12-05,42.0,3.0,3.0,49,24
2022-12-06,11.0,37.0,35.0,27,36
2022-12-07,22.0,5.0,15.0,24,30
2022-12-08,,,48.0,25,16
2022-12-09,,,22.0,18,35
2022-12-10,,41.0,22.0,31,9
2022-12-11,21.0,39.0,22.0,14,26
2022-12-12,48.0,32.0,4.0,44,35
2022-12-13,1.0,20.0,39.0,9,5
2022-12-14,33.0,32.0,20.0,4,37


In [53]:
df.fillna(method="bfill")

Unnamed: 0,col1,col2,col3,col4,col5
2022-12-05,42.0,3.0,3.0,49,24
2022-12-06,11.0,37.0,35.0,27,36
2022-12-07,22.0,5.0,15.0,24,30
2022-12-08,21.0,41.0,48.0,25,16
2022-12-09,21.0,41.0,22.0,18,35
2022-12-10,21.0,41.0,22.0,31,9
2022-12-11,21.0,39.0,22.0,14,26
2022-12-12,48.0,32.0,4.0,44,35
2022-12-13,1.0,20.0,39.0,9,5
2022-12-14,33.0,32.0,20.0,4,37


## 22. Filling missing values - method and limit

In [54]:
df

Unnamed: 0,col1,col2,col3,col4,col5
2022-12-05,42.0,3.0,3.0,49,24
2022-12-06,11.0,37.0,35.0,27,36
2022-12-07,22.0,5.0,15.0,24,30
2022-12-08,,,48.0,25,16
2022-12-09,,,22.0,18,35
2022-12-10,,41.0,22.0,31,9
2022-12-11,21.0,39.0,22.0,14,26
2022-12-12,48.0,32.0,4.0,44,35
2022-12-13,1.0,20.0,39.0,9,5
2022-12-14,33.0,32.0,20.0,4,37


In [55]:
df.fillna(method="ffill", limit=2)

Unnamed: 0,col1,col2,col3,col4,col5
2022-12-05,42.0,3.0,3.0,49,24
2022-12-06,11.0,37.0,35.0,27,36
2022-12-07,22.0,5.0,15.0,24,30
2022-12-08,22.0,5.0,48.0,25,16
2022-12-09,22.0,5.0,22.0,18,35
2022-12-10,,41.0,22.0,31,9
2022-12-11,21.0,39.0,22.0,14,26
2022-12-12,48.0,32.0,4.0,44,35
2022-12-13,1.0,20.0,39.0,9,5
2022-12-14,33.0,32.0,20.0,4,37


## 23. The ffill function

In [56]:
df

Unnamed: 0,col1,col2,col3,col4,col5
2022-12-05,42.0,3.0,3.0,49,24
2022-12-06,11.0,37.0,35.0,27,36
2022-12-07,22.0,5.0,15.0,24,30
2022-12-08,,,48.0,25,16
2022-12-09,,,22.0,18,35
2022-12-10,,41.0,22.0,31,9
2022-12-11,21.0,39.0,22.0,14,26
2022-12-12,48.0,32.0,4.0,44,35
2022-12-13,1.0,20.0,39.0,9,5
2022-12-14,33.0,32.0,20.0,4,37


In [57]:
df.ffill(limit=2)

Unnamed: 0,col1,col2,col3,col4,col5
2022-12-05,42.0,3.0,3.0,49,24
2022-12-06,11.0,37.0,35.0,27,36
2022-12-07,22.0,5.0,15.0,24,30
2022-12-08,22.0,5.0,48.0,25,16
2022-12-09,22.0,5.0,22.0,18,35
2022-12-10,,41.0,22.0,31,9
2022-12-11,21.0,39.0,22.0,14,26
2022-12-12,48.0,32.0,4.0,44,35
2022-12-13,1.0,20.0,39.0,9,5
2022-12-14,33.0,32.0,20.0,4,37


## 24. The bfill function

In [58]:
df.bfill(limit=2)

Unnamed: 0,col1,col2,col3,col4,col5
2022-12-05,42.0,3.0,3.0,49,24
2022-12-06,11.0,37.0,35.0,27,36
2022-12-07,22.0,5.0,15.0,24,30
2022-12-08,,41.0,48.0,25,16
2022-12-09,21.0,41.0,22.0,18,35
2022-12-10,21.0,41.0,22.0,31,9
2022-12-11,21.0,39.0,22.0,14,26
2022-12-12,48.0,32.0,4.0,44,35
2022-12-13,1.0,20.0,39.0,9,5
2022-12-14,33.0,32.0,20.0,4,37


## 25. na_values parameter of the read_csv function

In [59]:
df = pd.read_csv("Data/sample_dataset.csv")

df

Unnamed: 0,col1,col2,col3,col4,col5
0,23.0,16.0,45,17,2
1,46.0,16.0,24,2,31
2,2.0,29.0,2,46,24
3,,,25,23,7
4,,,30,34,29
5,,30.0,?,5,6
6,35.0,37.0,?,26,39
7,9.0,5.0,35,11,41
8,13.0,39.0,25,5,39
9,40.0,15.0,32,47,24


In [60]:
df = pd.read_csv("Data/sample_dataset.csv", na_values=["?"])

df

Unnamed: 0,col1,col2,col3,col4,col5
0,23.0,16.0,45.0,17,2
1,46.0,16.0,24.0,2,31
2,2.0,29.0,2.0,46,24
3,,,25.0,23,7
4,,,30.0,34,29
5,,30.0,,5,6
6,35.0,37.0,,26,39
7,9.0,5.0,35.0,11,41
8,13.0,39.0,25.0,5,39
9,40.0,15.0,32.0,47,24
