# Iris

### Introduction:

This exercise may seem a little bit strange, but keep doing it.

### Step 1. Import the necessary libraries

In [1]:
import pandas as pd
import numpy as np

### Step 2. Import the dataset from this [address](https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data). 

### Step 3. Assign it to a variable called iris

In [5]:
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris = pd.read_csv(url, header=None)

iris.head()

Unnamed: 0,0,1,2,3,4
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [6]:
len(iris)

150

### Step 4. Create columns for the dataset

In [7]:
# 1. sepal_length (in cm)
# 2. sepal_width (in cm)
# 3. petal_length (in cm)
# 4. petal_width (in cm)
# 5. class

iris.columns = ['sepal_length','sepal_width', 'petal_length', 'petal_width', 'class']
iris

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,class
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,Iris-virginica
146,6.3,2.5,5.0,1.9,Iris-virginica
147,6.5,3.0,5.2,2.0,Iris-virginica
148,6.2,3.4,5.4,2.3,Iris-virginica


### Step 5.  Is there any missing value in the dataframe?

In [8]:
pd.isnull(iris).sum()
# nice no missing value

sepal_length    0
sepal_width     0
petal_length    0
petal_width     0
class           0
dtype: int64

### Step 6.  Lets set the values of the rows 10 to 29 of the column 'petal_length' to NaN

In [9]:
iris.iloc[10:30,2:3] = np.nan
iris.head(20)

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,class
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa
5,5.4,3.9,1.7,0.4,Iris-setosa
6,4.6,3.4,1.4,0.3,Iris-setosa
7,5.0,3.4,1.5,0.2,Iris-setosa
8,4.4,2.9,1.4,0.2,Iris-setosa
9,4.9,3.1,1.5,0.1,Iris-setosa


### Step 7. Good, now lets substitute the NaN values to 1.0

In [10]:
iris.petal_length.fillna(1, inplace = True)
iris

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,class
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,Iris-virginica
146,6.3,2.5,5.0,1.9,Iris-virginica
147,6.5,3.0,5.2,2.0,Iris-virginica
148,6.2,3.4,5.4,2.3,Iris-virginica


### Step 8. Now let's delete the column class

In [40]:
del iris['class']
iris.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
0,4.9,3.0,1.4,0.2
1,4.7,3.2,1.3,0.2
2,4.6,3.1,1.5,0.2
3,5.0,3.6,1.4,0.2
4,5.4,3.9,1.7,0.4


### Step 9.  Set the first 3 rows as NaN

In [14]:
iris.head(30)

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,class
0,,,,,
1,,,,,
2,,,,,
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa
5,5.4,3.9,1.7,0.4,Iris-setosa
6,4.6,3.4,1.4,0.3,Iris-setosa
7,5.0,3.4,1.5,0.2,Iris-setosa
8,4.4,2.9,1.4,0.2,Iris-setosa
9,4.9,3.1,1.5,0.1,Iris-setosa


In [15]:
iris.iloc[0:3 ,:] = np.nan

### Step 10.  Delete the rows that have NaN

In [18]:
help(iris.dropna())

   SparseDataFrame : Container for sparse tabular data.
 |      
 |      Notes
 |      -----
 |      Sparse data should have the same dtypes as its dense representation.
 |      
 |      Examples
 |      --------
 |      >>> arr = np.random.RandomState(0).randn(100, 4)
 |      >>> arr[arr < .8] = np.nan
 |      >>> pd.DataFrame(arr).ftypes
 |      0    float64:dense
 |      1    float64:dense
 |      2    float64:dense
 |      3    float64:dense
 |      dtype: object
 |      
 |      >>> pd.SparseDataFrame(arr).ftypes  # doctest: +SKIP
 |      0    float64:sparse
 |      1    float64:sparse
 |      2    float64:sparse
 |      3    float64:sparse
 |      dtype: object
 |  
 |  iat
 |      Access a single value for a row/column pair by integer position.
 |      
 |      Similar to ``iloc``, in that both provide integer-based lookups. Use
 |      ``iat`` if you only need to get or set a single value in a DataFrame
 |      or Series.
 |      
 |      Raises
 |      ------
 |      IndexErro

In [16]:
iris = iris.dropna(how='any')
iris.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,class
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa
5,5.4,3.9,1.7,0.4,Iris-setosa
6,4.6,3.4,1.4,0.3,Iris-setosa
7,5.0,3.4,1.5,0.2,Iris-setosa


### Step 11. Reset the index so it begins with 0 again

In [20]:
iris2 = iris.reset_index(drop = True)
iris2.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,class
0,4.6,3.1,1.5,0.2,Iris-setosa
1,5.0,3.6,1.4,0.2,Iris-setosa
2,5.4,3.9,1.7,0.4,Iris-setosa
3,4.6,3.4,1.4,0.3,Iris-setosa
4,5.0,3.4,1.5,0.2,Iris-setosa
