# Handling Missing data using Replace

In [1]:
import pandas as pd
import numpy as np

In [11]:
data = pd.read_csv("weather_data.csv")
data

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32F,6mph,Rain
1,1/2/2017,-99999,7,Sunny
2,1/3/2017,28,-99999,Snow
3,1/4/2017,-99999,7,No event
4,1/5/2017,32,-99999,Rain
5,1/6/2017,31C,4mph,Sunny
6,1/6/2017,34,5,No event


### Replace one value with NaN

In [3]:
#Replace -99999 with NaN throughout the dataset
df1=data.replace(-99999,np.NaN)
df1

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32.0,6.0,Rain
1,1/2/2017,,7.0,Sunny
2,1/3/2017,28.0,,Snow
3,1/4/2017,,7.0,0
4,1/5/2017,32.0,,Rain
5,1/6/2017,31.0,4.0,Sunny
6,1/6/2017,34.0,5.0,0


### Replace multiple values with NaN

In [5]:
df2=data.replace([-99999,-88888],np.NaN)
df2

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32.0,6.0,Rain
1,1/2/2017,,7.0,Sunny
2,1/3/2017,28.0,,Snow
3,1/4/2017,,7.0,0
4,1/5/2017,32.0,,Rain
5,1/6/2017,31.0,,Sunny
6,1/6/2017,34.0,5.0,0


### Replace different multiple values in different columns with NaN

In [7]:
df3=data.replace({
    'temperature':-99999,
    'windspeed':[-88888,-99999],
    'event':'0'
}, np.NaN)
df3

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32.0,6.0,Rain
1,1/2/2017,,7.0,Sunny
2,1/3/2017,28.0,,Snow
3,1/4/2017,,7.0,
4,1/5/2017,32.0,,Rain
5,1/6/2017,31.0,,Sunny
6,1/6/2017,34.0,5.0,


### Replace multiple values with NaN and some other values

In [10]:
df4=data.replace({
    -99999:np.NaN,
    'No event': "Sunny"
})
df4

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32.0,6.0,Rain
1,1/2/2017,,7.0,Sunny
2,1/3/2017,28.0,,Snow
3,1/4/2017,,7.0,No event
4,1/5/2017,32.0,,Rain
5,1/6/2017,31.0,4.0,Sunny
6,1/6/2017,34.0,5.0,No event


### Replace measurement units from dataset using Regular Expressions

In [12]:
df5=data.replace('[A-Za-z]',"",regex=True)
df5

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,
1,1/2/2017,-99999,7,
2,1/3/2017,28,-99999,
3,1/4/2017,-99999,7,
4,1/5/2017,32,-99999,
5,1/6/2017,31,4,
6,1/6/2017,34,5,


Above code removed values from categorical column too. This coulfd be overcome by creating a dictionary. 

In [13]:
df5=data.replace({
    'temperature':'[A-Za-z]',
    'windspeed':'[A-Za-z]'},
    "",regex=True)
df5

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,-99999,7,Sunny
2,1/3/2017,28,-99999,Snow
3,1/4/2017,-99999,7,No event
4,1/5/2017,32,-99999,Rain
5,1/6/2017,31,4,Sunny
6,1/6/2017,34,5,No event


### Replace list with another list

In [14]:
df = pd.DataFrame({
    'score': ['exceptional','average', 'good', 'poor', 'average', 'exceptional'],
    'student': ['rob', 'maya', 'parthiv', 'tom', 'julian', 'erica']
})
df

Unnamed: 0,score,student
0,exceptional,rob
1,average,maya
2,good,parthiv
3,poor,tom
4,average,julian
5,exceptional,erica


In [15]:
df.replace(['poor','average','good','exceptional'],[1,2,3,4])

Unnamed: 0,score,student
0,4,rob
1,2,maya
2,3,parthiv
3,1,tom
4,2,julian
5,4,erica


<b>Conclusion: </b> Replace method is quite useful method for Handling Missing values