In [1]:
#importing required libraries
from sklearn.datasets import load_iris 
from sklearn.ensemble import RandomForestClassifier
import pandas as pd
import numpy as np
np.random.seed(0)

In [2]:
#Loding Iris dataset
iris = load_iris()
df = pd.DataFrame(iris.data, columns=iris.feature_names)
df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [3]:
#connecting "species" name with data frame
df['species']= pd.Categorical.from_codes(iris.target,iris.target_names)
df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [4]:
#seprating 70% data 
df['is_train'] = np.random.uniform(0,1,len(df)) <= 0.70
df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),species,is_train
0,5.1,3.5,1.4,0.2,setosa,True
1,4.9,3.0,1.4,0.2,setosa,False
2,4.7,3.2,1.3,0.2,setosa,True
3,4.6,3.1,1.5,0.2,setosa,True
4,5.0,3.6,1.4,0.2,setosa,True


In [5]:
#naming data as train & test datasets
train, test = df[df['is_train'] == True],df[df['is_train'] == False]
print("number of observation in train data = ",len(train))
print("number of observation in test data = ",len(test))

number of observation in train data =  111
number of observation in test data =  39


In [6]:
#taking 4 values
features = df.columns[:4]
features

Index(['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)',
       'petal width (cm)'],
      dtype='object')

In [7]:
#encode categorical data to use that data
y = pd.factorize(train['species'])[0]
y

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2], dtype=int64)

In [8]:
#using Random Forest Classifier
clf = RandomForestClassifier(n_jobs = 2, random_state=0)
clf.fit(train[features],y)

RandomForestClassifier(n_jobs=2, random_state=0)

In [9]:
#predicting the values using test data
clf.predict(test[features])

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 1, 1, 1,
       1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2], dtype=int64)

In [10]:
#using probablity to show Random Forest Classifier
clf.predict_proba(test[features])[0:10]

array([[0.99, 0.01, 0.  ],
       [1.  , 0.  , 0.  ],
       [1.  , 0.  , 0.  ],
       [1.  , 0.  , 0.  ],
       [1.  , 0.  , 0.  ],
       [1.  , 0.  , 0.  ],
       [0.99, 0.01, 0.  ],
       [1.  , 0.  , 0.  ],
       [1.  , 0.  , 0.  ],
       [1.  , 0.  , 0.  ]])

In [11]:
#Converting Encoded data back into Categorical Data 
preds= iris.target_names[clf.predict(test[features])]
preds[0:5]

array(['setosa', 'setosa', 'setosa', 'setosa', 'setosa'], dtype='<U10')

In [12]:
#Just to check
test['species'].head()

1     setosa
7     setosa
8     setosa
10    setosa
13    setosa
Name: species, dtype: category
Categories (3, object): ['setosa', 'versicolor', 'virginica']

In [13]:
#Creating crosstab to commpare actual and predicted values
pd.crosstab(test["species"],preds,rownames=['Actual'],colnames=['Predicted'])

Predicted,setosa,versicolor,virginica
Actual,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
setosa,14,0,0
versicolor,0,7,2
virginica,0,0,16


In [14]:
print("Total numbers of observation : ",14+7+16+2)
print("Total numbers matched observation : ",14+7+16)

Total numbers of observation :  39
Total numbers matched observation :  37


In [15]:
print("Accuracy of Model : ",(37/39)*100)


Accuracy of Model :  94.87179487179486


In [16]:
#Input data here for Predictions
preds= iris.target_names[clf.predict([[5.0,3.6,1.2,0.2],[5.0,3.6,1.2,0.2]])]
preds

array(['setosa', 'setosa'], dtype='<U10')