# Iris

### Introduction:

This exercise may seem a little bit strange, but keep doing it.

### Step 1. Import the necessary libraries

In [1]:
import pandas as pd
import numpy as np

### Step 2. Import the dataset from this [address](https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data). 

### Step 3. Assign it to a variable called iris

In [2]:
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris = pd.read_csv(url)
iris.head()

Unnamed: 0,5.1,3.5,1.4,0.2,Iris-setosa
0,4.9,3.0,1.4,0.2,Iris-setosa
1,4.7,3.2,1.3,0.2,Iris-setosa
2,4.6,3.1,1.5,0.2,Iris-setosa
3,5.0,3.6,1.4,0.2,Iris-setosa
4,5.4,3.9,1.7,0.4,Iris-setosa


### Step 4. Create columns for the dataset

In [3]:
# 1. sepal_length (in cm)
# 2. sepal_width (in cm)
# 3. petal_length (in cm)
# 4. petal_width (in cm)
# 5. class
columns = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'class']
iris = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data', header=None, names=columns)
iris.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,class
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


### Step 5.  Is there any missing value in the dataframe?

In [4]:
iris.isnull().sum()

sepal_length    0
sepal_width     0
petal_length    0
petal_width     0
class           0
dtype: int64

### Step 6.  Lets set the values of the rows 10 to 29 of the column 'petal_length' to NaN

In [5]:
iris.iloc[9:29] = np.nan
iris.iloc[9:29]

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,class
9,,,,,
10,,,,,
11,,,,,
12,,,,,
13,,,,,
14,,,,,
15,,,,,
16,,,,,
17,,,,,
18,,,,,


### Step 7. Good, now lets substitute the NaN values to 1.0

In [6]:
iris.iloc[9:29] = 1.0
iris.iloc[9:29]

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,class
9,1.0,1.0,1.0,1.0,1.0
10,1.0,1.0,1.0,1.0,1.0
11,1.0,1.0,1.0,1.0,1.0
12,1.0,1.0,1.0,1.0,1.0
13,1.0,1.0,1.0,1.0,1.0
14,1.0,1.0,1.0,1.0,1.0
15,1.0,1.0,1.0,1.0,1.0
16,1.0,1.0,1.0,1.0,1.0
17,1.0,1.0,1.0,1.0,1.0
18,1.0,1.0,1.0,1.0,1.0


### Step 8. Now let's delete the column class

In [7]:
iris = iris.drop('class', axis=1)
iris.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


### Step 9.  Set the first 3 rows as NaN

In [10]:
iris.iloc[:3] = np.nan
iris.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
3,,,,
4,,,,
5,,,,
6,4.6,3.4,1.4,0.3
7,5.0,3.4,1.5,0.2


### Step 10.  Delete the rows that have NaN

In [11]:
iris.dropna(inplace=True)
iris.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
6,4.6,3.4,1.4,0.3
7,5.0,3.4,1.5,0.2
8,4.4,2.9,1.4,0.2
9,1.0,1.0,1.0,1.0
10,1.0,1.0,1.0,1.0


### Step 11. Reset the index so it begins with 0 again

In [12]:
iris.reset_index(drop=True, inplace=True)
iris.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
0,4.6,3.4,1.4,0.3
1,5.0,3.4,1.5,0.2
2,4.4,2.9,1.4,0.2
3,1.0,1.0,1.0,1.0
4,1.0,1.0,1.0,1.0
