# 03 - Deleting & handling missing values - Iris

### Step 1. Import the necessary libraries

In [1]:
import pandas as pd
import numpy as np

### Step 2. Import the dataset from this [address](https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data). 

In [2]:
iris = pd.read_csv("iris.data")

### Step 3. Assign it to a variable called iris

In [3]:
iris.head()

Unnamed: 0,5.1,3.5,1.4,0.2,Iris-setosa
0,4.9,3.0,1.4,0.2,Iris-setosa
1,4.7,3.2,1.3,0.2,Iris-setosa
2,4.6,3.1,1.5,0.2,Iris-setosa
3,5.0,3.6,1.4,0.2,Iris-setosa
4,5.4,3.9,1.7,0.4,Iris-setosa


### Step 4. Call the columns for the dataset as follows:
1. sepal_length (in cm)
2. sepal_width (in cm)
3. petal_length (in cm)
4. petal_width (in cm)
5. class

In [4]:
iris.rename(columns = {"5.1":"sepal_length (in cm)", "3.5":"sepal_width (in cm)", "1.4" :"petal_length (in cm)", "0.2":"petal_width (in cm)", "Iris-setosa":"class" }, inplace = True)

In [5]:
iris

Unnamed: 0,sepal_length (in cm),sepal_width (in cm),petal_length (in cm),petal_width (in cm),class
0,4.9,3.0,1.4,0.2,Iris-setosa
1,4.7,3.2,1.3,0.2,Iris-setosa
2,4.6,3.1,1.5,0.2,Iris-setosa
3,5.0,3.6,1.4,0.2,Iris-setosa
4,5.4,3.9,1.7,0.4,Iris-setosa
...,...,...,...,...,...
144,6.7,3.0,5.2,2.3,Iris-virginica
145,6.3,2.5,5.0,1.9,Iris-virginica
146,6.5,3.0,5.2,2.0,Iris-virginica
147,6.2,3.4,5.4,2.3,Iris-virginica


### Step 5.  Is there any missing value in the dataframe?

In [6]:
iris.isnull().sum()

sepal_length (in cm)    0
sepal_width (in cm)     0
petal_length (in cm)    0
petal_width (in cm)     0
class                   0
dtype: int64

### Step 6.  Lets set the values of the rows 10 to 29 of the column 'petal_length' to NaN

In [7]:
iris.iloc[10:30,:]["petal_length (in cm)"] = pd.np.nan

  iris.iloc[10:30,:]["petal_length (in cm)"] = pd.np.nan
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iris.iloc[10:30,:]["petal_length (in cm)"] = pd.np.nan


In [8]:
iris.iloc[10:30,:]["petal_length (in cm)"]

10   NaN
11   NaN
12   NaN
13   NaN
14   NaN
15   NaN
16   NaN
17   NaN
18   NaN
19   NaN
20   NaN
21   NaN
22   NaN
23   NaN
24   NaN
25   NaN
26   NaN
27   NaN
28   NaN
29   NaN
Name: petal_length (in cm), dtype: float64

In [9]:
iris.isnull().sum()

sepal_length (in cm)     0
sepal_width (in cm)      0
petal_length (in cm)    20
petal_width (in cm)      0
class                    0
dtype: int64

### Step 7. Good, now lets substitute the NaN values to 1.0

In [10]:
iris.fillna(1.0)

Unnamed: 0,sepal_length (in cm),sepal_width (in cm),petal_length (in cm),petal_width (in cm),class
0,4.9,3.0,1.4,0.2,Iris-setosa
1,4.7,3.2,1.3,0.2,Iris-setosa
2,4.6,3.1,1.5,0.2,Iris-setosa
3,5.0,3.6,1.4,0.2,Iris-setosa
4,5.4,3.9,1.7,0.4,Iris-setosa
...,...,...,...,...,...
144,6.7,3.0,5.2,2.3,Iris-virginica
145,6.3,2.5,5.0,1.9,Iris-virginica
146,6.5,3.0,5.2,2.0,Iris-virginica
147,6.2,3.4,5.4,2.3,Iris-virginica


### Step 8. Now let's delete the column class

In [11]:
iris.drop("class", axis =1)

Unnamed: 0,sepal_length (in cm),sepal_width (in cm),petal_length (in cm),petal_width (in cm)
0,4.9,3.0,1.4,0.2
1,4.7,3.2,1.3,0.2
2,4.6,3.1,1.5,0.2
3,5.0,3.6,1.4,0.2
4,5.4,3.9,1.7,0.4
...,...,...,...,...
144,6.7,3.0,5.2,2.3
145,6.3,2.5,5.0,1.9
146,6.5,3.0,5.2,2.0
147,6.2,3.4,5.4,2.3


### Step 9.  Set the first 3 rows as NaN

In [12]:
iris[0:3]= None
iris

Unnamed: 0,sepal_length (in cm),sepal_width (in cm),petal_length (in cm),petal_width (in cm),class
0,,,,,
1,,,,,
2,,,,,
3,5.0,3.6,1.4,0.2,Iris-setosa
4,5.4,3.9,1.7,0.4,Iris-setosa
...,...,...,...,...,...
144,6.7,3.0,5.2,2.3,Iris-virginica
145,6.3,2.5,5.0,1.9,Iris-virginica
146,6.5,3.0,5.2,2.0,Iris-virginica
147,6.2,3.4,5.4,2.3,Iris-virginica


### Step 10.  Delete the rows that have NaN

In [13]:
iris.dropna(axis=0)

Unnamed: 0,sepal_length (in cm),sepal_width (in cm),petal_length (in cm),petal_width (in cm),class
3,5.0,3.6,1.4,0.2,Iris-setosa
4,5.4,3.9,1.7,0.4,Iris-setosa
5,4.6,3.4,1.4,0.3,Iris-setosa
6,5.0,3.4,1.5,0.2,Iris-setosa
7,4.4,2.9,1.4,0.2,Iris-setosa
...,...,...,...,...,...
144,6.7,3.0,5.2,2.3,Iris-virginica
145,6.3,2.5,5.0,1.9,Iris-virginica
146,6.5,3.0,5.2,2.0,Iris-virginica
147,6.2,3.4,5.4,2.3,Iris-virginica


### Step 11. Reset the index so it begins with 0 again

In [16]:
iris.reset_index()

Unnamed: 0,index,sepal_length (in cm),sepal_width (in cm),petal_length (in cm),petal_width (in cm),class
0,0,,,,,
1,1,,,,,
2,2,,,,,
3,3,5.0,3.6,1.4,0.2,Iris-setosa
4,4,5.4,3.9,1.7,0.4,Iris-setosa
...,...,...,...,...,...,...
144,144,6.7,3.0,5.2,2.3,Iris-virginica
145,145,6.3,2.5,5.0,1.9,Iris-virginica
146,146,6.5,3.0,5.2,2.0,Iris-virginica
147,147,6.2,3.4,5.4,2.3,Iris-virginica
