# Iris dataset using Random Forest Model 

### Import Modules

In [180]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

### Read csv file

In [165]:
iris = pd.read_csv("iris.csv",names=["sepal length (cm)","sepal width (cm)","petal length (cm)","petal width (cm)","names"])
print(iris.head(5))

   sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)  \
0                5.1               3.5                1.4               0.2   
1                4.9               3.0                1.4               0.2   
2                4.7               3.2                1.3               0.2   
3                4.6               3.1                1.5               0.2   
4                5.0               3.6                1.4               0.2   

         names  
0  Iris-setosa  
1  Iris-setosa  
2  Iris-setosa  
3  Iris-setosa  
4  Iris-setosa  


### Check the number of rows and columns

In [166]:
iris.shape

(150, 5)

In [167]:
print(iris.columns)

Index(['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)',
       'petal width (cm)', 'names'],
      dtype='object')


### Determine Ouput and Input labels

In [168]:
#Split dataset. X as the input variable while Y is the ouput variable 
x= iris.drop(columns=["names"])
y= iris["names"]

In [169]:
#inspect dimension. 150 flowers and 4 features
print("X: ",x.shape)
print("Y: ",y.shape)

X:  (150, 4)
Y:  (150,)


### Building Model

In [170]:
#Import model
model= RandomForestClassifier() 

#Train test split 80/20 rule
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.20)

#Fit train data model
model.fit(x_train,y_train)

RandomForestClassifier()

### Predicting class labels

In [171]:
#Predict class label using single sample data
sample = np.array([17.5, 21.5, 7.4,5]).reshape(1,-1)
model.predict(sample)

array(['Iris-virginica'], dtype=object)

In [172]:
#Predict class labels using test data
result = model.predict(x_test)
print(result)

['Iris-setosa' 'Iris-virginica' 'Iris-virginica' 'Iris-setosa'
 'Iris-setosa' 'Iris-virginica' 'Iris-virginica' 'Iris-setosa'
 'Iris-setosa' 'Iris-virginica' 'Iris-virginica' 'Iris-virginica'
 'Iris-versicolor' 'Iris-virginica' 'Iris-setosa' 'Iris-setosa'
 'Iris-versicolor' 'Iris-virginica' 'Iris-versicolor' 'Iris-setosa'
 'Iris-setosa' 'Iris-versicolor' 'Iris-versicolor' 'Iris-versicolor'
 'Iris-versicolor' 'Iris-setosa' 'Iris-virginica' 'Iris-versicolor'
 'Iris-versicolor' 'Iris-virginica']


### Feature Importance

In [173]:
#Shows the most sinificant feature. Petal width is the most significant feature followed by petal lenght.
print(model.feature_importances_)

[0.09675581 0.02066624 0.42344201 0.45913594]


### Evaluating model accuracy

In [174]:
#Meaning model is 96% accurate using test data
print(model.score(x_test,y_test))

1.0


In [175]:
#Check the most probable significant feature. Petal lenght is the most significant feature base from sample data 
print(model.predict_proba(sample))

[[0. 0. 1.]]


In [176]:
#Converting into data frame
df=pd.DataFrame({"Actual":y_test,"Predicted":result})
df

Unnamed: 0,Actual,Predicted
19,Iris-setosa,Iris-setosa
129,Iris-virginica,Iris-virginica
131,Iris-virginica,Iris-virginica
31,Iris-setosa,Iris-setosa
41,Iris-setosa,Iris-setosa
116,Iris-virginica,Iris-virginica
105,Iris-virginica,Iris-virginica
24,Iris-setosa,Iris-setosa
49,Iris-setosa,Iris-setosa
123,Iris-virginica,Iris-virginica
