### Classification Problem: Iris

In [None]:
import pandas as pd
import io
import requests
import numpy as np
from sklearn import metrics
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation
from tensorflow.keras.callbacks import EarlyStopping

df = pd.read_csv(
    "https://data.heatonresearch.com/data/t81-558/iris.csv", 
    na_values=['NA', '?'])

# Convert to numpy - Classification
x = df[['sepal_l', 'sepal_w', 'petal_l', 'petal_w']].values
dummies = pd.get_dummies(df['species']) # Classification
species = dummies.columns
y = dummies.values


# Build neural network
model = Sequential()
model.add(Dense(50, input_dim=x.shape[1], activation='relu')) # Hidden 1
model.add(Dense(25, activation='relu')) # Hidden 2
model.add(Dense(y.shape[1],activation='softmax')) # Output

model.compile(loss='categorical_crossentropy', optimizer='adam')
model.fit(x,y,verbose=2,epochs=100)

Train on 150 samples
Epoch 1/100
150/150 - 0s - loss: 1.1343
Epoch 2/100
150/150 - 0s - loss: 1.0067
Epoch 3/100
150/150 - 0s - loss: 0.9150
Epoch 4/100
150/150 - 0s - loss: 0.8430
Epoch 5/100
150/150 - 0s - loss: 0.7892
Epoch 6/100
150/150 - 0s - loss: 0.7348
Epoch 7/100
150/150 - 0s - loss: 0.6847
Epoch 8/100
150/150 - 0s - loss: 0.6382
Epoch 9/100
150/150 - 0s - loss: 0.6041
Epoch 10/100
150/150 - 0s - loss: 0.5686
Epoch 11/100
150/150 - 0s - loss: 0.5374
Epoch 12/100
150/150 - 0s - loss: 0.5166
Epoch 13/100
150/150 - 0s - loss: 0.4913
Epoch 14/100
150/150 - 0s - loss: 0.4706
Epoch 15/100
150/150 - 0s - loss: 0.4519
Epoch 16/100
150/150 - 0s - loss: 0.4337
Epoch 17/100
150/150 - 0s - loss: 0.4207
Epoch 18/100
150/150 - 0s - loss: 0.4026
Epoch 19/100
150/150 - 0s - loss: 0.3894
Epoch 20/100
150/150 - 0s - loss: 0.3764
Epoch 21/100
150/150 - 0s - loss: 0.3630
Epoch 22/100
150/150 - 0s - loss: 0.3527
Epoch 23/100
150/150 - 0s - loss: 0.3409
Epoch 24/100
150/150 - 0s - loss: 0.3296
Epoc

<tensorflow.python.keras.callbacks.History at 0x275f1d9bdc8>

In [None]:
# Print out number of species found:
print(species)

Index(['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'], dtype='object')


In [None]:
pred = model.predict(x)
print(f"Shape: {pred.shape}")
print(pred[0:10])

Shape: (150, 3)
[[9.98257935e-01 1.74211117e-03 1.47250105e-08]
 [9.94762123e-01 5.23775769e-03 9.35751956e-08]
 [9.97034669e-01 2.96533993e-03 5.68744980e-08]
 [9.94459629e-01 5.54029271e-03 1.55113412e-07]
 [9.98531222e-01 1.46873493e-03 1.33291875e-08]
 [9.98098075e-01 1.90198515e-03 1.37531018e-08]
 [9.96991158e-01 3.00874189e-03 7.64488419e-08]
 [9.97346044e-01 2.65395525e-03 3.05425694e-08]
 [9.92665589e-01 7.33401440e-03 3.12981911e-07]
 [9.95905280e-01 4.09475202e-03 5.66798697e-08]]


If you would like to turn of scientific notation, the following line can be used:

In [None]:
np.set_printoptions(suppress=True)

Now we see these values rounded up.

In [None]:
print(y[0:10])

[[1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]]


In [None]:
predict_classes = np.argmax(pred,axis=1)
expected_classes = np.argmax(y,axis=1)
print(f"Predictions: {predict_classes}")
print(f"Expected: {expected_classes}")

Predictions: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1
 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2]
Expected: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2]


In [None]:
print(species[predict_classes[1:10]])

Index(['Iris-setosa', 'Iris-setosa', 'Iris-setosa', 'Iris-setosa',
       'Iris-setosa', 'Iris-setosa', 'Iris-setosa', 'Iris-setosa',
       'Iris-setosa'],
      dtype='object')


In [None]:
from sklearn.metrics import accuracy_score

correct = accuracy_score(expected_classes,predict_classes)
print(f"Accuracy: {correct}")

Accuracy: 0.9733333333333334


The code below performs two ad hoc predictions.  The first prediction is simply a single iris flower, and the second predicts two iris flowers.  Notice that the argmax in the second prediction requires **axis=1**?  Since we have a 2D array now, we must specify which axis to take the argmax over.  The value **axis=1** specifies we want the max column index for each row.

In [None]:
sample_flower = np.array( [[5.0,3.0,4.0,2.0]], dtype=float)
pred = model.predict(sample_flower)
print(pred)
pred = np.argmax(pred)
print(f"Predict that {sample_flower} is: {species[pred]}")

[[0.00208851 0.19842853 0.799483  ]]
Predict that [[5. 3. 4. 2.]] is: Iris-virginica


In [None]:
sample_flower = np.array( [[5.0,3.0,4.0,2.0],[5.2,3.5,1.5,0.8]],\
        dtype=float)
pred = model.predict(sample_flower)
print(pred)
pred = np.argmax(pred,axis=1)
print(f"Predict that these two flowers {sample_flower} ")
print(f"are: {species[pred]}")

[[0.00208851 0.19842838 0.79948306]
 [0.9900221  0.00997756 0.00000035]]
Predict that these two flowers [[5.  3.  4.  2. ]
 [5.2 3.5 1.5 0.8]] 
are: Index(['Iris-virginica', 'Iris-setosa'], dtype='object')
