In [40]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import roc_auc_score
from sklearn.metrics import classification_report
from sklearn.preprocessing import OneHotEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.multiclass import OneVsRestClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
import warnings
warnings.filterwarnings("ignore",category = FutureWarning)

%matplotlib inline

Importing the dataset

In [41]:
iris = pd.read_csv('Iris.csv')

In [42]:
iris.head(5)

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,Iris-setosa
1,2,4.9,3.0,1.4,0.2,Iris-setosa
2,3,4.7,3.2,1.3,0.2,Iris-setosa
3,4,4.6,3.1,1.5,0.2,Iris-setosa
4,5,5.0,3.6,1.4,0.2,Iris-setosa


In [43]:
iris.describe()

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm
count,150.0,150.0,150.0,150.0,150.0
mean,75.5,5.843333,3.054,3.758667,1.198667
std,43.445368,0.828066,0.433594,1.76442,0.763161
min,1.0,4.3,2.0,1.0,0.1
25%,38.25,5.1,2.8,1.6,0.3
50%,75.5,5.8,3.0,4.35,1.3
75%,112.75,6.4,3.3,5.1,1.8
max,150.0,7.9,4.4,6.9,2.5


In [96]:
X = iris.iloc[:,1:5].values
y = iris.iloc[:,5].values

In [97]:
X.shape

(150, 4)

In [98]:
onehot_encoder = OneHotEncoder(sparse=False)
reshaped = y.reshape(len(y),1)
y_onehot = onehot_encoder.fit_transform(reshaped)

In [99]:
print(y.shape)
print(y_onehot.shape)

(150,)
(150, 3)


Splitting the data into train set and test set

In [100]:
X_train,X_test,y_train,y_test = train_test_split(X,y_onehot,test_size = 0.2)

In [101]:
print(X_train.shape)

(120, 4)


In [102]:
print(X_test.shape)

(30, 4)


In [103]:
print(y_train.shape)

(120, 3)


In [104]:
print(y_test.shape)

(30, 3)


In [105]:
mnmx = MinMaxScaler()
X_train = mnmx.fit_transform(X_train)
X_test = mnmx.transform(X_test)

Decision Tree Classifier

In [106]:
dtree = DecisionTreeClassifier(criterion='entropy')
dtree.fit(X_train,y_train)

DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='entropy',
                       max_depth=None, max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort='deprecated',
                       random_state=None, splitter='best')

In [107]:
dtree_pred = dtree.predict(X_test)

In [108]:
print("AUC Score ", roc_auc_score(y_test,dtree_pred))
print("Accuracy Score ", accuracy_score(y_test,dtree_pred))

AUC Score  0.9708333333333333
Accuracy Score  0.9666666666666667


In [109]:
target_names = ['class 0','class 1','class 2']
print(classification_report(y_test,dtree_pred,target_names = target_names))

              precision    recall  f1-score   support

     class 0       1.00      1.00      1.00        12
     class 1       1.00      0.88      0.93         8
     class 2       0.91      1.00      0.95        10

   micro avg       0.97      0.97      0.97        30
   macro avg       0.97      0.96      0.96        30
weighted avg       0.97      0.97      0.97        30
 samples avg       0.97      0.97      0.97        30



Random Forest Classifier

In [110]:
rtree  = RandomForestClassifier(n_estimators = 10,criterion = 'entropy')
rtree.fit(X_train,y_train)

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='entropy', max_depth=None, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=10,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)

In [111]:
rtree_pred = rtree.predict(X_test)

In [112]:
print("AUC Score ",  roc_auc_score(y_test,rtree_pred))
print("Accuracy Score ",  accuracy_score(y_test,rtree_pred))

AUC Score  0.9833333333333334
Accuracy Score  0.9666666666666667


In [113]:
print(classification_report(y_test,rtree_pred,target_names=target_names))

              precision    recall  f1-score   support

     class 0       1.00      1.00      1.00        12
     class 1       1.00      1.00      1.00         8
     class 2       1.00      0.90      0.95        10

   micro avg       1.00      0.97      0.98        30
   macro avg       1.00      0.97      0.98        30
weighted avg       1.00      0.97      0.98        30
 samples avg       0.97      0.97      0.97        30



  _warn_prf(average, modifier, msg_start, len(result))


Support Vector Machines

In [114]:
clf = OneVsRestClassifier(SVC()).fit(X_train,y_train)

In [115]:
clf_pred  = clf.predict(X_test)

In [116]:
print("AUC Score ",roc_auc_score(y_test,clf_pred))
print("Accuracy Score ",accuracy_score(y_test,clf_pred))

AUC Score  0.9757575757575757
Accuracy Score  0.9666666666666667


In [118]:
print(classification_report(y_test,clf_pred,target_names = target_names))

              precision    recall  f1-score   support

     class 0       1.00      1.00      1.00        12
     class 1       0.89      1.00      0.94         8
     class 2       1.00      0.90      0.95        10

   micro avg       0.97      0.97      0.97        30
   macro avg       0.96      0.97      0.96        30
weighted avg       0.97      0.97      0.97        30
 samples avg       0.97      0.97      0.97        30



MultiLayer Perceptron

In [119]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [120]:
model = Sequential([
    Dense(1024,activation='relu',input_dim = X_train.shape[1]),
    Dense(1024,activation='relu'),
    Dense(3,activation='softmax')
])
model.compile(loss='categorical_crossentropy',optimizer = 'adam',metrics=['accuracy'])

In [121]:
model.fit(X_train,y_train,epochs=200,batch_size = 30)

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

<tensorflow.python.keras.callbacks.History at 0x3003b6fd88>

In [122]:
loss_accuracy = model.evaluate(X_test,y_test,batch_size = 50)



In [123]:
print("Accuracy ",loss_accuracy[1])

Accuracy  1.0


In [124]:
print("Loss ",loss_accuracy[0])

Loss  0.0072133527137339115


Making Predictions

Decision Tree Classifier

In [156]:
arr = np.array([[5.67,3.5,1.7,0.9]])
np.resize(arr,(4,4))
arr.shape
dtree.predict(arr)


array([[0., 0., 1.]])

Random Forest Classifier

In [157]:
rtree.predict(arr)

array([[0., 0., 1.]])

Support Vector Machines

In [158]:
clf.predict(arr)

array([[0, 0, 0]])

Multi Layer Perceptron

In [159]:
model.predict(arr)

array([[3.656902e-21, 1.000000e+00, 3.483319e-23]], dtype=float32)