# Replacing NaN Values

##### 1. Finding NaN Values

##### 2. Dropping NaN Values

##### 3. Filling NaN Values: 0s (or any other value)

##### 4. Filling NaN Values: String

##### 5. Filling NaN Values: Most Frequent Observation entire data or specific group

##### 6. Filling NaN Values: Mean of entire data or specific Group

##### 7. Filling NaN Values: Median of entire data or specific Group

##### 8. Filling NaN Values: forward & backward fill (sequential data)

##### 9. Filling NaN Values: Interpolation

In [148]:
import pandas as pd
import numpy as np

df = pd.DataFrame({'value': [1, np.nan, np.nan, 2, 3, 1, 3, np.nan, 3], 
                   'name': ['A','A', 'B','B','B','B', 'C','C','C']})
df

Unnamed: 0,value,name
0,1.0,A
1,,A
2,,B
3,2.0,B
4,3.0,B
5,1.0,B
6,3.0,C
7,,C
8,3.0,C


## 1. Finding NaN Values

In [101]:
# Missing values for every column
df.isna().sum()

value    3
name     0
dtype: int64

## 2. Dropping NaN Values

In [127]:
df1 = df.copy()

df1.dropna(inplace = True)
df1

Unnamed: 0,value,name
0,1.0,A
3,2.0,B
4,3.0,B
5,1.0,B
6,3.0,C
8,3.0,C


## 3. Filling NaN Values: 0s (or any other value)

In [128]:
df2 = df.copy()

df2.fillna(0, inplace = True)
df2

Unnamed: 0,value,name
0,1.0,A
1,0.0,A
2,0.0,B
3,2.0,B
4,3.0,B
5,1.0,B
6,3.0,C
7,0.0,C
8,3.0,C


## 4. Fillinf NaN Values: String

In [129]:
df3 = df.copy()

df3.fillna('Empty', inplace = True)
df3

Unnamed: 0,value,name
0,1.0,A
1,Empty,A
2,Empty,B
3,2.0,B
4,3.0,B
5,1.0,B
6,3.0,C
7,Empty,C
8,3.0,C


## 5. Filling NaN Values: Most Frequent Observation entire data or specific group

In [134]:
#Entire Data 
df4 = df.copy()
most_frequent = df.value.value_counts().index[0]

df4.value.fillna(most_frequent, inplace = True)
df4

Unnamed: 0,value,name
0,1.0,A
1,3.0,A
2,3.0,B
3,2.0,B
4,3.0,B
5,1.0,B
6,3.0,C
7,3.0,C
8,3.0,C


In [113]:
## Specific Group
df4 = df.copy()

df4["value"] = df4.groupby("name").transform(lambda x: x.fillna(x.value_counts().index[0]))
df4

Unnamed: 0,value,name
0,1.0,A
1,1.0,A
2,2.0,B
3,2.0,B
4,3.0,B
5,1.0,B
6,3.0,C
7,3.0,C
8,3.0,C


## 6. Filling NaN Values: Mean entire data or specific group

In [131]:
#Entire Data 
df5 = df.copy()
mean = df.value.mean()

df5.value.fillna(mean, inplace = True)
df5

Unnamed: 0,value,name
0,1.0,A
1,3.0,A
2,3.0,B
3,2.0,B
4,3.0,B
5,1.0,B
6,3.0,C
7,3.0,C
8,3.0,C


In [117]:
#Specific Group
df5 = df.copy()

df5.value = df5.groupby('name').transform(lambda x: x.fillna(x.mean()))
df5

Unnamed: 0,value,name
0,1.0,A
1,1.0,A
2,2.0,B
3,2.0,B
4,3.0,B
5,1.0,B
6,3.0,C
7,3.0,C
8,3.0,C


## 7. Filling NaN Values: Median entire data or specific group

In [132]:
#Entire data
df6 = df.copy()
median = df.value.median()

df6.value.fillna(median, inplace = True)
df6

Unnamed: 0,value,name
0,1.0,A
1,3.0,A
2,3.0,B
3,2.0,B
4,3.0,B
5,1.0,B
6,3.0,C
7,3.0,C
8,3.0,C


In [121]:
#Specific Group
df6 = df.copy()

df6.value = df6.groupby('name').transform(lambda x: x.fillna(x.median()))
df6

Unnamed: 0,value,name
0,1.0,A
1,1.0,A
2,2.0,B
3,2.0,B
4,3.0,B
5,1.0,B
6,3.0,C
7,3.0,C
8,3.0,C


## 8. Filling NaN Values: forward & backward fill (sequential data)

In [124]:
#fill forward: uses most recent predecessor 
df7 = df.copy()

df7.ffill(inplace = True)
df7

Unnamed: 0,value,name
0,1.0,A
1,1.0,A
2,1.0,B
3,2.0,B
4,3.0,B
5,1.0,B
6,3.0,C
7,3.0,C
8,3.0,C


In [126]:
df7 = df.copy()

df7.bfill(inplace = True)
df7

Unnamed: 0,value,name
0,1.0,A
1,2.0,A
2,2.0,B
3,2.0,B
4,3.0,B
5,1.0,B
6,3.0,C
7,3.0,C
8,3.0,C


## 9. Filling NaN Values: Interpolation

In [153]:
#Linear Interpolation
df8 = df.copy().sort_values('name')

df8.value = df8.value.interpolate()
df8

Unnamed: 0,value,name
0,1.0,A
1,1.333333,A
2,1.666667,B
3,2.0,B
4,3.0,B
5,1.0,B
6,3.0,C
7,3.0,C
8,3.0,C


In [155]:
#Spline Interpolation
df8 = df.copy().sort_values('name')

df8.value = df8.value.interpolate(method = 'spline', order = 2)
df8

Unnamed: 0,value,name
0,1.0,A
1,1.375325,A
2,1.645455,B
3,2.0,B
4,3.0,B
5,1.0,B
6,3.0,C
7,2.762338,C
8,3.0,C
