# Python Pandas

#### Handing Missing Values: Replace Function

In [11]:
import pandas as pd
import numpy as np
df=pd.read_csv('weather_data.csv')
df

Unnamed: 0,day,temperature,windspeed,event
0,01-01-17,32,6,Rainy
1,02-01-17,-99999,7,Sunny
2,03-01-17,28,-99999,Snow
3,04-01-17,-99999,7,0
4,05-01-17,32,-88888,Rainy
5,06-01-17,31,2,Sunny
6,06-01-17,34,5,0


In [12]:
# new_df=df.replace(-99999,np.NaN) when replace a single value for all
# new_df=df.replace([-99999,-88888],np.NaN)    # replace multiple value in a list, this replace is going throug all columns

# replace based on columns, providing dictionary

new_df = df.replace({
    'temperature': -99999,
    'windspeed': -88888,
    'event':'0'
},np.NaN)
new_df

Unnamed: 0,day,temperature,windspeed,event
0,01-01-17,32.0,6.0,Rainy
1,02-01-17,,7.0,Sunny
2,03-01-17,28.0,-99999.0,Snow
3,04-01-17,,7.0,
4,05-01-17,32.0,,Rainy
5,06-01-17,31.0,2.0,Sunny
6,06-01-17,34.0,5.0,


In [13]:
df2=pd.read_csv('weather_data2.csv')
df2

Unnamed: 0,day,temperature,windspeed,event
0,01-01-17,32,6,Rainy
1,02-01-17,-99999,7,Sunny
2,03-01-17,28,-99999,Snow
3,04-01-17,-99999,7,No Event
4,05-01-17,32,-99999,Rainy
5,06-01-17,31,2,Sunny
6,06-01-17,34,5,No Event


In [4]:
# Mapping Values
# Want to replace 'No Event' by 'Sunny'. Dictionary

new_df2 = df2.replace({
    -99999:np.NaN,
    'No Event':'Sunny'
})
new_df2

Unnamed: 0,day,temperature,windspeed,event
0,01-01-17,32.0,6.0,Rainy
1,02-01-17,,7.0,Sunny
2,03-01-17,28.0,,Snow
3,04-01-17,,7.0,Sunny
4,05-01-17,32.0,,Rainy
5,06-01-17,31.0,2.0,Sunny
6,06-01-17,34.0,5.0,Sunny


In [14]:
df3=pd.read_csv('weather_data3.csv')
df3

Unnamed: 0,day,temperature,windspeed,event
0,01-01-17,32 F,6 mph,Rainy
1,02-01-17,-99999,7 mph,Sunny
2,03-01-17,28,-99999,Snow
3,04-01-17,-99999,7,No Event
4,05-01-17,32 C,-99999,Rainy
5,06-01-17,31,2,Sunny
6,06-01-17,34,5,No Event


In [15]:
# Replace unit of measure by Ragex Application

In [7]:
new_df3 = df3.replace('[A-Za-z]','', regex=True) # the range between upper & lower case A-Z, removed, with event value, want to keep event value
new_df3

Unnamed: 0,day,temperature,windspeed,event
0,01-01-17,32,6,
1,02-01-17,-99999,7,
2,03-01-17,28,-99999,
3,04-01-17,-99999,7,
4,05-01-17,32,-99999,
5,06-01-17,31,2,
6,06-01-17,34,5,


In [16]:
# replace first two column wiht '' nothing when A-Za-z, Keeping event as it is
df3 = df3.replace({
    'temperature':'[A-Za-z]',
    'windspeed':'[A-Za-z]'
},'', regex=True)
df3

Unnamed: 0,day,temperature,windspeed,event
0,01-01-17,32,6,Rainy
1,02-01-17,-99999,7,Sunny
2,03-01-17,28,-99999,Snow
3,04-01-17,-99999,7,No Event
4,05-01-17,32,-99999,Rainy
5,06-01-17,31,2,Sunny
6,06-01-17,34,5,No Event


# Replace list of value on other list of values

#### Construction a different DataFrame

In [18]:
df4 = pd.DataFrame({
    'score':['exceptional', 'average','good','poor','average','exceptional'],
    'student':['rob','maya','partriv','tom','julian','erica']
})
df4

# let say I have a internal mapping for score e.g poor=1, exceptional=4

Unnamed: 0,score,student
0,exceptional,rob
1,average,maya
2,good,partriv
3,poor,tom
4,average,julian
5,exceptional,erica


In [21]:
new_df4 = df4.replace(['poor','average','good','exceptional'], [1,2,3,4])
new_df4

Unnamed: 0,score,student
0,4,rob
1,2,maya
2,3,partriv
3,1,tom
4,2,julian
5,4,erica
