# Handling missing values

Default: `data.dropna(axis=0, how='any', thresh=None, subset=None, inplace=False)`

# **1. Do you want to drop rows or columns having missing values?** `(axis)`

## 0, or 'index' : Drop rows which contain missing values.
## 1, or 'columns' : Drop columns which contain missing value.

# **2. Do you want to drop rows or columns having at least one missing value or all missing values?** `(how)`
## 'any' : If any NA values are present, drop that row or column.
## 'all' : If all values are NA, drop that row or column.

# **3. Do you want to set some threshold value to drop missing values?** `(thresh)`

## thresh : int, optional
##    Require that many non-NA values.

## **4. Do you want to drop rows of some selected columns only?** `(subset)`

## subset:  Define in which column(s) to look for missing values.

# **5.Do you want to drop rows or columns temporarily or permanently?** `(inplace)`

## inplace : bool, default False
##    If True, do operation inplace and return None.

In [1]:
import pandas as pd

In [33]:
data = pd.read_csv('/content/data_m.csv')
data

Unnamed: 0,Name,Marks,Grades
0,Priyang,98.0,
1,Aadhya,,AB
2,Krisha,99.0,AA
3,Vedant,87.0,
4,Parshv,90.0,AC
5,Mittal,,BA
6,Archana,82.0,BB
7,,,


## Drop the rows where at least one value is missing

In [10]:
# Manera más sencilla
data.dropna()

Unnamed: 0,Name,Marks,Grades
2,Krisha,99.0,AA
4,Parshv,90.0,AC
6,Archana,82.0,BB


In [11]:
# Esta es otra manera
data.dropna(axis = 0, how = 'any')

Unnamed: 0,Name,Marks,Grades
2,Krisha,99.0,AA
4,Parshv,90.0,AC
6,Archana,82.0,BB


In [14]:
# Esta es otra manera
data.dropna(axis = 'index', how = 'any')

Unnamed: 0,Name,Marks,Grades
2,Krisha,99.0,AA
4,Parshv,90.0,AC
6,Archana,82.0,BB


## Drop the columns where at least one value is missing

In [13]:
data.dropna(axis = 1)

0
1
2
3
4
5
6
7


In [15]:
# Esta es otra manera
data.dropna(axis = 1, how ='any')

0
1
2
3
4
5
6
7


## Drop the rows where all the values are missing

In [16]:
data.dropna(axis = 0, how = "all")

Unnamed: 0,Name,Marks,Grades
0,Priyang,98.0,
1,Aadhya,,AB
2,Krisha,99.0,AA
3,Vedant,87.0,
4,Parshv,90.0,AC
5,Mittal,,BA
6,Archana,82.0,BB


# Drop the columns where all values are missing

In [17]:
data.dropna(axis = 1, how = 'all')

Unnamed: 0,Name,Marks,Grades
0,Priyang,98.0,
1,Aadhya,,AB
2,Krisha,99.0,AA
3,Vedant,87.0,
4,Parshv,90.0,AC
5,Mittal,,BA
6,Archana,82.0,BB
7,,,


In [19]:
# También se puede así
data.dropna(axis = 'columns', how = 'all')

Unnamed: 0,Name,Marks,Grades
0,Priyang,98.0,
1,Aadhya,,AB
2,Krisha,99.0,AA
3,Vedant,87.0,
4,Parshv,90.0,AC
5,Mittal,,BA
6,Archana,82.0,BB
7,,,


## Keep only the rows with at least 2 non-NA values

In [20]:
data.dropna(axis = 'index', thresh = 2)

Unnamed: 0,Name,Marks,Grades
0,Priyang,98.0,
1,Aadhya,,AB
2,Krisha,99.0,AA
3,Vedant,87.0,
4,Parshv,90.0,AC
5,Mittal,,BA
6,Archana,82.0,BB


## Define in which columns to look for missing/NA values

In [30]:
# Quitamos las filas con valores faltantes en las columnas especificadas
data.dropna(subset = ['Name', 'Grades'])

Unnamed: 0,Name,Marks,Grades
2,Krisha,99.0,AA
4,Parshv,90.0,AC
6,Archana,82.0,BB


## Inplace

In [35]:
# Con inplace hacemos efectivo el borrado de filas y columnas
data.dropna(inplace = True)
data

Unnamed: 0,Name,Marks,Grades
2,Krisha,99.0,AA
4,Parshv,90.0,AC
6,Archana,82.0,BB
