#Handling missing values with replace

##Replace values given in 'to_replace' with 'value'.
## By default: `data.replace(to_replace=None, value=None, inplace=False, limit=None, regex=False, method='pad',) `







##to_replace
str, regex, list, dict, Series, int, float, or None

How to find the values that will be replaced.

##value
scalar, dict, list, str, regex

default None

Value to replace any values matching `to_replace` with.

##inplace:
bool, default False

##regex :
bool or same types as `to_replace`,

default False

Whether to interpret `to_replace` and/or `value` as regular expressions.

In [1]:
import pandas as pd

In [3]:
data = pd.read_csv('/content/data_#na2.csv')
data

Unnamed: 0,Name,Marks,Grades
0,Priyang,98.0,
1,Aadhya,,AB
2,Krisha,99.0,AA
3,Vedant,87.0,
4,Parshv,90.0,AC
5,Mittal,,BA
6,Archana,82.0,not available
7,Priyang,78.0,-99


## Replace 'Priyang' with 'Anil'

In [None]:
data.replace(to_replace = 'Priyang', value = 'Anil')

Unnamed: 0,Name,Marks,Grades
0,Anil,98.0,
1,Aadhya,,AB
2,Krisha,99.0,AA
3,Vedant,87.0,
4,Parshv,90.0,AC
5,Mittal,,BA
6,Archana,82.0,not available
7,Anil,78.0,-99


##Replace name 'Priyang' and 'Aadhya' with 'Anil'

In [None]:
data.replace(to_replace = ['Priyang', 'Aadhya'], value = 'Anil')

Unnamed: 0,Name,Marks,Grades
0,Anil,98.0,
1,Anil,,AB
2,Krisha,99.0,AA
3,Vedant,87.0,
4,Parshv,90.0,AC
5,Mittal,,BA
6,Archana,82.0,not available
7,Anil,78.0,-99


##Replace 'NaN' with 40

In [None]:
# Importamos numpy para manejar los valores NaN
import numpy as np
data.replace(to_replace = np.nan, value = 40)

Unnamed: 0,Name,Marks,Grades
0,Priyang,98.0,40
1,Aadhya,40.0,AB
2,Krisha,99.0,AA
3,Vedant,87.0,40
4,Parshv,90.0,AC
5,Mittal,40.0,BA
6,Archana,82.0,not available
7,Priyang,78.0,-99


## Replace marks column NaN with 40 and grades column NaN with FF

In [None]:
data.replace({'Marks':{np.nan:40}, 'Grades':{np.nan:'FF'}})

Unnamed: 0,Name,Marks,Grades
0,Priyang,98.0,FF
1,Aadhya,40.0,AB
2,Krisha,99.0,AA
3,Vedant,87.0,FF
4,Parshv,90.0,AC
5,Mittal,40.0,BA
6,Archana,82.0,not available
7,Priyang,78.0,-99


##Replace 'not available', np.nan & '-99' of grades column to 'FF'

In [None]:
# Con inplace en True hacemos efectivos los cambios en el dataframe
data.replace(to_replace = {'Grades':['not available', np.nan, '-99']}, value = 'FF',
             inplace =  True)
data

Unnamed: 0,Name,Marks,Grades
0,Priyang,98.0,FF
1,Aadhya,,AB
2,Krisha,99.0,AA
3,Vedant,87.0,FF
4,Parshv,90.0,AC
5,Mittal,,BA
6,Archana,82.0,FF
7,Priyang,78.0,FF


## Replace Grades values
FF: Fail
AB: Very good
AA: Excellent
BA: Good

In [None]:
data.replace(to_replace = ['FF', 'AB', 'AA', 'BA'],
             value = ['Fail', 'Very good', 'Excellent', 'Good'])

Unnamed: 0,Name,Marks,Grades
0,Priyang,98.0,Fail
1,Aadhya,,Very good
2,Krisha,99.0,Excellent
3,Vedant,87.0,Fail
4,Parshv,90.0,AC
5,Mittal,,Good
6,Archana,82.0,Fail
7,Priyang,78.0,Fail


## Clean data of marks columns using regex



In [4]:
data = pd.read_csv('/content/data_#na_new.csv')
data

Unnamed: 0,Name,Marks,Grades
0,Priyang,98M,
1,Aadhya,,AB
2,Krisha,99M,AA
3,Vedant,,
4,Parshv,90M,
5,Mittal,,BA
6,Archana,82M,not available
7,Priyang,78M,-99


In [None]:
# Usamos una expresión regular para quitar las M en la columna Marks
data.replace(to_replace = {'Marks':'[A-Za-z]'}, value = ' ', regex = True)

Unnamed: 0,Name,Marks,Grades
0,Priyang,98.0,
1,Aadhya,,AB
2,Krisha,99.0,AA
3,Vedant,,
4,Parshv,90.0,
5,Mittal,,BA
6,Archana,82.0,not available
7,Priyang,78.0,-99


## Replace NaN values of Marks column with 0

In [17]:
data = pd.read_csv('/content/data_#na.csv')
data

Unnamed: 0,Name,Marks,Grades
0,Priyang,98.0,
1,Aadhya,,AB
2,Krisha,99.0,AA
3,Vedant,87.0,
4,Parshv,90.0,AC
5,Mittal,,BA
6,Archana,82.0,not available
7,Priyang,78.0,-99


In [None]:
data.replace(to_replace = {'Marks':np.nan}, value = 0)

Unnamed: 0,Name,Marks,Grades
0,Priyang,98.0,
1,Aadhya,0.0,AB
2,Krisha,99.0,AA
3,Vedant,87.0,
4,Parshv,90.0,AC
5,Mittal,0.0,BA
6,Archana,82.0,not available
7,Priyang,78.0,-99


## Replace NaN value (First only) of Grades column with 'FF' using at

In [18]:
data.at[0, 'Grades'] = 'FF'
data

Unnamed: 0,Name,Marks,Grades
0,Priyang,98.0,FF
1,Aadhya,,AB
2,Krisha,99.0,AA
3,Vedant,87.0,
4,Parshv,90.0,AC
5,Mittal,,BA
6,Archana,82.0,not available
7,Priyang,78.0,-99


## Replace NaN value (First only) of Marks column with 0 using iat

In [21]:
data.iat[1, 1] = 0
data

Unnamed: 0,Name,Marks,Grades
0,Priyang,98.0,FF
1,Aadhya,0.0,AB
2,Krisha,99.0,AA
3,Vedant,87.0,
4,Parshv,90.0,AC
5,Mittal,,BA
6,Archana,82.0,not available
7,Priyang,78.0,-99


## Replace NaN value (First only) of Marks column with 0 using at

In [26]:
data.at[5, 'Marks'] = 0
data

Unnamed: 0,Name,Marks,Grades
0,Priyang,98.0,FF
1,Aadhya,0.0,AB
2,Krisha,99.0,AA
3,Vedant,87.0,
4,Parshv,90.0,AC
5,Mittal,0.0,BA
6,Archana,82.0,not available
7,Priyang,78.0,-99


## Replace consecutive NaN values of Grades column with 'FF' using iloc

In [30]:
data = pd.read_csv('/content/data_#na.csv')
data

Unnamed: 0,Name,Marks,Grades
0,Priyang,98.0,
1,Aadhya,,AB
2,Krisha,99.0,AA
3,Vedant,87.0,
4,Parshv,90.0,
5,Mittal,,BA
6,Archana,82.0,not available
7,Priyang,78.0,-99


In [33]:
data.iloc[3:5, 2] = 'FF'
data

Unnamed: 0,Name,Marks,Grades
0,Priyang,98.0,
1,Aadhya,,AB
2,Krisha,99.0,AA
3,Vedant,87.0,FF
4,Parshv,90.0,FF
5,Mittal,,BA
6,Archana,82.0,not available
7,Priyang,78.0,-99


## Replace consecutive NaN values of Grades column with 'FF' using loc

In [34]:
data = pd.read_csv('/content/data_#na.csv')
data

Unnamed: 0,Name,Marks,Grades
0,Priyang,98.0,
1,Aadhya,,AB
2,Krisha,99.0,AA
3,Vedant,87.0,
4,Parshv,90.0,
5,Mittal,,BA
6,Archana,82.0,not available
7,Priyang,78.0,-99


In [36]:
data.loc[3:4, 'Grades'] = 'FF'
data

Unnamed: 0,Name,Marks,Grades
0,Priyang,98.0,
1,Aadhya,,AB
2,Krisha,99.0,AA
3,Vedant,87.0,FF
4,Parshv,90.0,FF
5,Mittal,,BA
6,Archana,82.0,not available
7,Priyang,78.0,-99
