# Iris

### Introduction:

This exercise may seem a little bit strange, but keep doing it.

### Step 1. Import the necessary libraries

In [1]:
import pandas as pd
import numpy as np


### Step 2. Import the dataset from this [address](https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data). 

### Step 3. Assign it to a variable called iris

In [2]:
iris = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data')
iris

Unnamed: 0,5.1,3.5,1.4,0.2,Iris-setosa
0,4.9,3.0,1.4,0.2,Iris-setosa
1,4.7,3.2,1.3,0.2,Iris-setosa
2,4.6,3.1,1.5,0.2,Iris-setosa
3,5.0,3.6,1.4,0.2,Iris-setosa
4,5.4,3.9,1.7,0.4,Iris-setosa
...,...,...,...,...,...
144,6.7,3.0,5.2,2.3,Iris-virginica
145,6.3,2.5,5.0,1.9,Iris-virginica
146,6.5,3.0,5.2,2.0,Iris-virginica
147,6.2,3.4,5.4,2.3,Iris-virginica


### Step 4. Create columns for the dataset

In [3]:
iris.columns = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'class']
#to create the columns we apply .columns to the data set and set it equal to the list of names
#1.sepal_length (in cm), 2.sepal_width (in cm), 3.petal_length (in cm), 4.petal_width (in cm), 5.class
iris.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,class
0,4.9,3.0,1.4,0.2,Iris-setosa
1,4.7,3.2,1.3,0.2,Iris-setosa
2,4.6,3.1,1.5,0.2,Iris-setosa
3,5.0,3.6,1.4,0.2,Iris-setosa
4,5.4,3.9,1.7,0.4,Iris-setosa


### Step 5.  Is there any missing value in the dataframe?

In [4]:
iris.isnull().sum()
#we can find missing data values in the dataframe using .isnull
#.sum() will give us the total missing data values

#Based on the results, there are no missing values

sepal_length    0
sepal_width     0
petal_length    0
petal_width     0
class           0
dtype: int64

### Step 6.  Lets set the values of the rows 10 to 29 of the column 'petal_length' to NaN

In [5]:
iris.loc[10:29, ['petal_length']]= np.nan
#using .loc, specifying the range and column 
#and setting it equal to np.nan we can replace the values with NaN
iris.head(31) 
#set head to 31 to make sure 10-29 read NaN

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,class
0,4.9,3.0,1.4,0.2,Iris-setosa
1,4.7,3.2,1.3,0.2,Iris-setosa
2,4.6,3.1,1.5,0.2,Iris-setosa
3,5.0,3.6,1.4,0.2,Iris-setosa
4,5.4,3.9,1.7,0.4,Iris-setosa
5,4.6,3.4,1.4,0.3,Iris-setosa
6,5.0,3.4,1.5,0.2,Iris-setosa
7,4.4,2.9,1.4,0.2,Iris-setosa
8,4.9,3.1,1.5,0.1,Iris-setosa
9,5.4,3.7,1.5,0.2,Iris-setosa


### Step 7. Good, now lets substitute the NaN values to 1.0

In [14]:
iris.loc[10:29, ['petal_length']]=[1.0]
#same operation as above just switch NaN with 1.0
iris.head(31) 

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
0,5.0,3.6,1.4,0.2
1,5.4,3.9,1.7,0.4
2,4.6,3.4,1.4,0.3
3,5.0,3.4,1.5,0.2
4,4.4,2.9,1.4,0.2
5,4.9,3.1,1.5,0.1
6,5.4,3.7,1.5,0.2
7,4.8,3.4,1.0,0.2
8,4.8,3.0,1.0,0.1
9,4.3,3.0,1.0,0.1


### Step 8. Now let's delete the column class

In [7]:
iris = iris.drop("class", axis =1)
#to remove any desired column we simply use .drop() and settting it equal to the data set
#inside the function we can specify the column name and axis of 1, which refers to columns
iris.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
0,4.9,3.0,1.4,0.2
1,4.7,3.2,1.3,0.2
2,4.6,3.1,1.5,0.2
3,5.0,3.6,1.4,0.2
4,5.4,3.9,1.7,0.4


### Step 9.  Set the first 3 rows as NaN

In [8]:
iris.iloc[0:3 ,:]=np.nan
#similar to step 6 and 7, except we use iloc 
#iloc is able to look at intergers as well
#we are not looking at a specific column, so that does not have to be specified in the brackets
iris.head(5)

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
0,,,,
1,,,,
2,,,,
3,5.0,3.6,1.4,0.2
4,5.4,3.9,1.7,0.4


### Step 10.  Delete the rows that have NaN

In [9]:
iris.dropna(how = "any", inplace=True)
#to get rid of NaN values we can use .dropna
#setting how equal to any will look across the entire data set and get rid of all Nan values
#in place being true maintain the change as we continue to code
iris.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
3,5.0,3.6,1.4,0.2
4,5.4,3.9,1.7,0.4
5,4.6,3.4,1.4,0.3
6,5.0,3.4,1.5,0.2
7,4.4,2.9,1.4,0.2


### Step 11. Reset the index so it begins with 0 again

In [10]:
iris.reset_index(drop = True, inplace=True)
#to reset the index we use .reset_index()
#settign drop equal to true gets rid of the previous index
iris.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
0,5.0,3.6,1.4,0.2
1,5.4,3.9,1.7,0.4
2,4.6,3.4,1.4,0.3
3,5.0,3.4,1.5,0.2
4,4.4,2.9,1.4,0.2


### BONUS: Create your own question and answer it.

### Step 12. Find Averages for all columns.

In [11]:
iris[['sepal_length', 'sepal_width', 'petal_length', 'petal_width']].mean()

sepal_length    5.871233
sepal_width     3.050000
petal_length    3.756164
petal_width     1.226027
dtype: float64