# 03 - Deleting & handling missing values - Iris

### Step 1. Import the necessary libraries

In [4]:
import pandas as pd
import numpy as np

### Step 2. Import the dataset from this [address](https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data). 

### Step 3. Assign it to a variable called iris

In [5]:
iris = pd.read_csv("iris.data")
iris

Unnamed: 0,5.1,3.5,1.4,0.2,Iris-setosa
0,4.9,3.0,1.4,0.2,Iris-setosa
1,4.7,3.2,1.3,0.2,Iris-setosa
2,4.6,3.1,1.5,0.2,Iris-setosa
3,5.0,3.6,1.4,0.2,Iris-setosa
4,5.4,3.9,1.7,0.4,Iris-setosa
...,...,...,...,...,...
144,6.7,3.0,5.2,2.3,Iris-virginica
145,6.3,2.5,5.0,1.9,Iris-virginica
146,6.5,3.0,5.2,2.0,Iris-virginica
147,6.2,3.4,5.4,2.3,Iris-virginica


### Step 4. Call the columns for the dataset as follows:
1. sepal_length (in cm)
2. sepal_width (in cm)
3. petal_length (in cm)
4. petal_width (in cm)
5. class

In [6]:
iris = pd.read_csv("iris.data", names=["sepal_length (in cm)", "sepal_width (in cm)","petal_length (in cm)","petal_width (in cm)", "class"])
iris

Unnamed: 0,sepal_length (in cm),sepal_width (in cm),petal_length (in cm),petal_width (in cm),class
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,Iris-virginica
146,6.3,2.5,5.0,1.9,Iris-virginica
147,6.5,3.0,5.2,2.0,Iris-virginica
148,6.2,3.4,5.4,2.3,Iris-virginica


### Step 5.  Is there any missing value in the dataframe?

In [7]:
iris.isnull()
pd.isna(iris)

Unnamed: 0,sepal_length (in cm),sepal_width (in cm),petal_length (in cm),petal_width (in cm),class
0,False,False,False,False,False
1,False,False,False,False,False
2,False,False,False,False,False
3,False,False,False,False,False
4,False,False,False,False,False
...,...,...,...,...,...
145,False,False,False,False,False
146,False,False,False,False,False
147,False,False,False,False,False
148,False,False,False,False,False


### Step 6.  Lets set the values of the rows 10 to 29 of the column 'petal_length' to NaN

In [8]:
iris.loc[10:30, "petal_legnth (in cm)"] = np.NaN
iris[10:30]


Unnamed: 0,sepal_length (in cm),sepal_width (in cm),petal_length (in cm),petal_width (in cm),class,petal_legnth (in cm)
10,5.4,3.7,1.5,0.2,Iris-setosa,
11,4.8,3.4,1.6,0.2,Iris-setosa,
12,4.8,3.0,1.4,0.1,Iris-setosa,
13,4.3,3.0,1.1,0.1,Iris-setosa,
14,5.8,4.0,1.2,0.2,Iris-setosa,
15,5.7,4.4,1.5,0.4,Iris-setosa,
16,5.4,3.9,1.3,0.4,Iris-setosa,
17,5.1,3.5,1.4,0.3,Iris-setosa,
18,5.7,3.8,1.7,0.3,Iris-setosa,
19,5.1,3.8,1.5,0.3,Iris-setosa,


### Step 7. Good, now lets substitute the NaN values to 1.0

In [9]:
values = {"petal_legnth (in cm)": 1.0}
iris = iris.fillna(value=values)
iris[10:30]

Unnamed: 0,sepal_length (in cm),sepal_width (in cm),petal_length (in cm),petal_width (in cm),class,petal_legnth (in cm)
10,5.4,3.7,1.5,0.2,Iris-setosa,1.0
11,4.8,3.4,1.6,0.2,Iris-setosa,1.0
12,4.8,3.0,1.4,0.1,Iris-setosa,1.0
13,4.3,3.0,1.1,0.1,Iris-setosa,1.0
14,5.8,4.0,1.2,0.2,Iris-setosa,1.0
15,5.7,4.4,1.5,0.4,Iris-setosa,1.0
16,5.4,3.9,1.3,0.4,Iris-setosa,1.0
17,5.1,3.5,1.4,0.3,Iris-setosa,1.0
18,5.7,3.8,1.7,0.3,Iris-setosa,1.0
19,5.1,3.8,1.5,0.3,Iris-setosa,1.0


### Step 8. Now let's delete the column class

In [10]:
iris.drop(columns="class")

Unnamed: 0,sepal_length (in cm),sepal_width (in cm),petal_length (in cm),petal_width (in cm),petal_legnth (in cm)
0,5.1,3.5,1.4,0.2,1.0
1,4.9,3.0,1.4,0.2,1.0
2,4.7,3.2,1.3,0.2,1.0
3,4.6,3.1,1.5,0.2,1.0
4,5.0,3.6,1.4,0.2,1.0
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,1.0
146,6.3,2.5,5.0,1.9,1.0
147,6.5,3.0,5.2,2.0,1.0
148,6.2,3.4,5.4,2.3,1.0


### Step 9.  Set the first 3 rows as NaN

In [11]:
iris.iloc[:3] = np.nan
iris

Unnamed: 0,sepal_length (in cm),sepal_width (in cm),petal_length (in cm),petal_width (in cm),class,petal_legnth (in cm)
0,,,,,,
1,,,,,,
2,,,,,,
3,4.6,3.1,1.5,0.2,Iris-setosa,1.0
4,5.0,3.6,1.4,0.2,Iris-setosa,1.0
...,...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,Iris-virginica,1.0
146,6.3,2.5,5.0,1.9,Iris-virginica,1.0
147,6.5,3.0,5.2,2.0,Iris-virginica,1.0
148,6.2,3.4,5.4,2.3,Iris-virginica,1.0


### Step 10.  Delete the rows that have NaN

In [12]:
iris = iris.dropna()
iris

Unnamed: 0,sepal_length (in cm),sepal_width (in cm),petal_length (in cm),petal_width (in cm),class,petal_legnth (in cm)
3,4.6,3.1,1.5,0.2,Iris-setosa,1.0
4,5.0,3.6,1.4,0.2,Iris-setosa,1.0
5,5.4,3.9,1.7,0.4,Iris-setosa,1.0
6,4.6,3.4,1.4,0.3,Iris-setosa,1.0
7,5.0,3.4,1.5,0.2,Iris-setosa,1.0
...,...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,Iris-virginica,1.0
146,6.3,2.5,5.0,1.9,Iris-virginica,1.0
147,6.5,3.0,5.2,2.0,Iris-virginica,1.0
148,6.2,3.4,5.4,2.3,Iris-virginica,1.0


### Step 11. Reset the index so it begins with 0 again

In [13]:
iris.reset_index(drop=True)

Unnamed: 0,sepal_length (in cm),sepal_width (in cm),petal_length (in cm),petal_width (in cm),class,petal_legnth (in cm)
0,4.6,3.1,1.5,0.2,Iris-setosa,1.0
1,5.0,3.6,1.4,0.2,Iris-setosa,1.0
2,5.4,3.9,1.7,0.4,Iris-setosa,1.0
3,4.6,3.4,1.4,0.3,Iris-setosa,1.0
4,5.0,3.4,1.5,0.2,Iris-setosa,1.0
...,...,...,...,...,...,...
142,6.7,3.0,5.2,2.3,Iris-virginica,1.0
143,6.3,2.5,5.0,1.9,Iris-virginica,1.0
144,6.5,3.0,5.2,2.0,Iris-virginica,1.0
145,6.2,3.4,5.4,2.3,Iris-virginica,1.0
