# Iris Flower Dataset business case

### Importing the Packages & the Date

In [1]:
import numpy as np
from sklearn import preprocessing
import tensorflow as tf

raw_csv_data = np.loadtxt('IRIS.csv', delimiter = ',',skiprows=1,dtype=str)
raw_csv_data[:5,:]

array([['5.1', '3.5', '1.4', '0.2', 'Iris-setosa'],
       ['4.9', '3', '1.4', '0.2', 'Iris-setosa'],
       ['4.7', '3.2', '1.3', '0.2', 'Iris-setosa'],
       ['4.6', '3.1', '1.5', '0.2', 'Iris-setosa'],
       ['5', '3.6', '1.4', '0.2', 'Iris-setosa']], dtype='<U15')

### Checking for Incomplete Data

In [2]:
np.isnan(raw_csv_data[:,:-1].astype(float)).sum()

0

### splitting the Data

In [3]:
inputs_all = raw_csv_data[:,1:-1]
targets_all = raw_csv_data[:,-1]

### coverting the target to Numeric

In [4]:
np.unique(targets_all,return_counts=True)

(array(['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'], dtype='<U15'),
 array([50, 50, 50]))

In [5]:
keys = list(np.unique(targets_all))
values = list(range(0, np.unique(targets_all).shape[0]))
dict_target = dict(zip(keys, values))

In [6]:
dict_target

{'Iris-setosa': 0, 'Iris-versicolor': 1, 'Iris-virginica': 2}

In [7]:
for i in np.unique(targets_all):
       targets_all = np.where(targets_all == i,
                                          dict_target[i],
                                          targets_all)

In [8]:
np.unique(targets_all,return_counts=True)

(array(['0', '1', '2'], dtype='<U15'), array([50, 50, 50]))

### coverting to numbers

In [9]:
inputs_all = inputs_all.astype(float)
targets_all = targets_all.astype(int)

### Standardize the inputs

In [10]:
scaled_inputs = preprocessing.scale(inputs_all)

### Shuffle the data

In [11]:
shuffled_indices = np.arange(scaled_inputs.shape[0])
np.random.shuffle(shuffled_indices)

shuffled_inputs = scaled_inputs[shuffled_indices]
shuffled_targets = targets_all[shuffled_indices]

### Split the dataset into train, validation, and test

In [12]:
samples_count = shuffled_inputs.shape[0]

train_samples_count = int(0.8*samples_count)
validation_samples_count = int(0.1*samples_count)
test_samples_count = samples_count - train_samples_count - validation_samples_count

train_inputs = shuffled_inputs[:train_samples_count]
train_targets = shuffled_targets[:train_samples_count]

validation_inputs = shuffled_inputs[train_samples_count:train_samples_count+validation_samples_count]
validation_targets = shuffled_targets[train_samples_count:train_samples_count+validation_samples_count]

test_inputs = shuffled_inputs[train_samples_count+validation_samples_count:]
test_targets = shuffled_targets[train_samples_count+validation_samples_count:]

print(np.sum(train_targets), train_samples_count)
print(np.sum(validation_targets), validation_samples_count)
print(np.sum(test_targets), test_samples_count)

121 120
18 15
11 15


### converting the dataset into .npz files

In [13]:
np.savez('IRIS_data_train', inputs=train_inputs, targets=train_targets)
np.savez('IRIS_data_validation', inputs=validation_inputs, targets=validation_targets)
np.savez('IRIS_data_test', inputs=test_inputs, targets=test_targets)

In [14]:
npz = np.load('IRIS_data_train.npz')

train_inputs = npz['inputs'].astype(float)
train_targets = npz['targets'].astype(int)

npz = np.load('IRIS_data_validation.npz')
validation_inputs, validation_targets = npz['inputs'].astype(float), npz['targets'].astype(int)

npz = np.load('IRIS_data_test.npz')
test_inputs, test_targets = npz['inputs'].astype(float), npz['targets'].astype(int)

### Model
#### Outline, optimizers, loss, early stopping and training

In [15]:
input_size = 4
output_size = 3
hidden_layer_size = 50

model = tf.keras.Sequential([

    tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
    tf.keras.layers.Dense(hidden_layer_size, activation='sigmoid'),
    tf.keras.layers.Dense(output_size, activation='softmax')
])



model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])


batch_size = 100


max_epochs = 100

early_stopping = tf.keras.callbacks.EarlyStopping(patience=0)


model.fit(train_inputs,
          train_targets,
          batch_size=batch_size,
          epochs=max_epochs,
          callbacks=[early_stopping],
          validation_data=(validation_inputs, validation_targets),
          verbose = 2
          )

Epoch 1/100
2/2 - 2s - loss: 1.3480 - accuracy: 0.3250 - val_loss: 1.4284 - val_accuracy: 0.2000 - 2s/epoch - 1s/step
Epoch 2/100
2/2 - 0s - loss: 1.3015 - accuracy: 0.3250 - val_loss: 1.3751 - val_accuracy: 0.2000 - 166ms/epoch - 83ms/step
Epoch 3/100
2/2 - 0s - loss: 1.2595 - accuracy: 0.3250 - val_loss: 1.3258 - val_accuracy: 0.2000 - 96ms/epoch - 48ms/step
Epoch 4/100
2/2 - 0s - loss: 1.2201 - accuracy: 0.3250 - val_loss: 1.2808 - val_accuracy: 0.2000 - 184ms/epoch - 92ms/step
Epoch 5/100
2/2 - 0s - loss: 1.1824 - accuracy: 0.3250 - val_loss: 1.2387 - val_accuracy: 0.2000 - 232ms/epoch - 116ms/step
Epoch 6/100
2/2 - 0s - loss: 1.1476 - accuracy: 0.3333 - val_loss: 1.1991 - val_accuracy: 0.2667 - 103ms/epoch - 52ms/step
Epoch 7/100
2/2 - 0s - loss: 1.1128 - accuracy: 0.3750 - val_loss: 1.1622 - val_accuracy: 0.3333 - 180ms/epoch - 90ms/step
Epoch 8/100
2/2 - 0s - loss: 1.0814 - accuracy: 0.3917 - val_loss: 1.1267 - val_accuracy: 0.3333 - 154ms/epoch - 77ms/step
Epoch 9/100
2/2 - 0s 

<keras.src.callbacks.History at 0x7ea9db9cbf10>

## Test the model

In [16]:
test_loss, test_accuracy = model.evaluate(test_inputs, test_targets)



In [17]:
print('\nTest loss: {0:.2f}. Test accuracy: {1:.2f}%'.format(test_loss, test_accuracy*100.))


Test loss: 0.25. Test accuracy: 86.67%
