# Import libraries

In [1]:
import pandas as pd
import tensorflow as tf

# Get the data

In [2]:
df = pd.read_csv('Iris.csv')

# Take a look at the data

In [3]:
print(df.head())

   Id  SepalLengthCm  SepalWidthCm  PetalLengthCm  PetalWidthCm      Species
0   1            5.1           3.5            1.4           0.2  Iris-setosa
1   2            4.9           3.0            1.4           0.2  Iris-setosa
2   3            4.7           3.2            1.3           0.2  Iris-setosa
3   4            4.6           3.1            1.5           0.2  Iris-setosa
4   5            5.0           3.6            1.4           0.2  Iris-setosa


# Convert string values to integer values

In [4]:
label_names = df['Species'].unique()

print(label_names)

['Iris-setosa' 'Iris-versicolor' 'Iris-virginica']


In [5]:
index_and_label = list(enumerate(label_names))

print(index_and_label)

[(0, 'Iris-setosa'), (1, 'Iris-versicolor'), (2, 'Iris-virginica')]


In [6]:
label_to_index = dict((label, index) for index, label in index_and_label)

print(label_to_index)

{'Iris-setosa': 0, 'Iris-versicolor': 1, 'Iris-virginica': 2}


In [7]:
df = df.replace(label_to_index)

print(df.head())

   Id  SepalLengthCm  SepalWidthCm  PetalLengthCm  PetalWidthCm  Species
0   1            5.1           3.5            1.4           0.2        0
1   2            4.9           3.0            1.4           0.2        0
2   3            4.7           3.2            1.3           0.2        0
3   4            4.6           3.1            1.5           0.2        0
4   5            5.0           3.6            1.4           0.2        0


# Normalize the numerical inputs

In [8]:
def normalize_column(df, column):
    max_value = df[column].max()
    min_value = df[column].min()
    df[column] = (df[column] - min_value)/(max_value - min_value)
    return df

In [9]:
df = normalize_column(df, 'SepalLengthCm')
df = normalize_column(df, 'SepalWidthCm')
df = normalize_column(df, 'PetalLengthCm')
df = normalize_column(df, 'PetalWidthCm')

# Shuffle the dataset

In [10]:
df = df.sample(frac=1)

In [11]:
print(df.head())

      Id  SepalLengthCm  SepalWidthCm  PetalLengthCm  PetalWidthCm  Species
111  112       0.583333      0.291667       0.728814      0.750000        2
146  147       0.555556      0.208333       0.677966      0.750000        2
114  115       0.416667      0.333333       0.694915      0.958333        2
147  148       0.611111      0.416667       0.711864      0.791667        2
148  149       0.527778      0.583333       0.745763      0.916667        2


# Split the dataset into training set and test set

In [12]:
x_train = df.iloc[:120, 1:-1].to_numpy()
y_train = df.iloc[:120, -1:].to_numpy()

In [13]:
x_test = df.iloc[120:, 1:-1].to_numpy()
y_test = df.iloc[120:, -1:].to_numpy()

In [14]:
print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

(120, 4)
(120, 1)
(30, 4)
(30, 1)


# Build the Model

In [15]:
model = tf.keras.Sequential()

model.add(tf.keras.layers.Input(shape=[4]))
model.add(tf.keras.layers.Dense(units=64, activation='relu'))
model.add(tf.keras.layers.Dense(units=3, activation='softmax'))

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 64)                320       
_________________________________________________________________
dense_1 (Dense)              (None, 3)                 195       
Total params: 515
Trainable params: 515
Non-trainable params: 0
_________________________________________________________________


# Choose Loss function and optimizer

In [16]:
model.compile(optimizer='Adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Start the training process

In [17]:
model.fit(x_train, y_train, epochs=100)

W0727 14:11:24.559357 140243979364160 deprecation.py:323] From /home/rishi/Programs/ML/mlvenv/lib/python3.7/site-packages/tensorflow/python/ops/math_grad.py:1250: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Train on 120 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100


Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


<tensorflow.python.keras.callbacks.History at 0x7f8cd15e9588>

# Evaluate with Test set

In [18]:
test_loss, test_acc = model.evaluate(x_test, y_test)

print(test_acc)

1.0
