In [30]:
import numpy as np
import pandas as pd

In [31]:
iris_data = pd.read_csv('./iris.csv')

In [32]:
iris_data.drop(['Id'], axis=1, inplace=True)

In [33]:
iris_data

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,Iris-virginica
146,6.3,2.5,5.0,1.9,Iris-virginica
147,6.5,3.0,5.2,2.0,Iris-virginica
148,6.2,3.4,5.4,2.3,Iris-virginica


In [34]:
Species_column = iris_data['Species']

In [35]:
Species_column

0         Iris-setosa
1         Iris-setosa
2         Iris-setosa
3         Iris-setosa
4         Iris-setosa
            ...      
145    Iris-virginica
146    Iris-virginica
147    Iris-virginica
148    Iris-virginica
149    Iris-virginica
Name: Species, Length: 150, dtype: object

In [36]:
Species_column.replace(to_replace=['Iris-setosa','Iris-versicolor', 'Iris-virginica'], value=[0, 1, 2], inplace=True)

In [37]:
iris_data

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,2
146,6.3,2.5,5.0,1.9,2
147,6.5,3.0,5.2,2.0,2
148,6.2,3.4,5.4,2.3,2


In [38]:
shuffle_data = iris_data.sample(frac=1, replace=True)

In [39]:
shuffle_data.reset_index(drop=True)

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,5.8,2.7,5.1,1.9,2
1,6.8,2.8,4.8,1.4,1
2,6.7,3.0,5.2,2.3,2
3,6.4,2.7,5.3,1.9,2
4,4.9,3.1,1.5,0.1,0
...,...,...,...,...,...
145,5.8,2.8,5.1,2.4,2
146,4.9,3.0,1.4,0.2,0
147,5.0,3.4,1.5,0.2,0
148,5.4,3.0,4.5,1.5,1


In [40]:
# Let's say we want to split the data in 80:10:10 for train:valid:test dataset
from sklearn.model_selection import train_test_split
train_size=0.8

X = iris_data.drop(columns = ['Species']).copy()
y = iris_data['Species']

# In the first step we will split the data in training and remaining dataset
train_data, X_rem, train_labels, y_rem = train_test_split(X,y, train_size=0.8)

# Now since we want the valid and test size to be equal (10% each of overall data). 
# we have to define valid_size=0.5 (that is 50% of remaining data)
test_size = 0.5
valid_data, test_data, valid_labels, test_labels = train_test_split(X_rem,y_rem, test_size=0.5)

print(train_data.shape), print(train_labels.shape)
print(valid_data.shape), print(valid_labels.shape)
print(test_data.shape), print(test_labels.shape)

(120, 4)
(120,)
(15, 4)
(15,)
(15, 4)
(15,)


(None, None)

# Normalization

In [41]:
mean = train_data.mean(axis=0)
train_data -= mean
std = train_data.std(axis=0)
train_data /= std
test_data -= mean
test_data /= std
valid_data -= mean
valid_data /= std

In [42]:
valid_data

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm
148,0.439442,0.830681,0.917738,1.443793
26,-0.979385,0.830681,-1.195841,-1.018322
65,1.03062,0.130341,0.361533,0.277528
147,0.794149,-0.103106,0.806497,1.055038
64,-0.269971,-0.336552,-0.083431,0.147943
73,0.321207,-0.569999,0.528395,0.018358
14,-0.0335,2.231362,-1.418323,-1.277492
27,-0.742914,1.064128,-1.251461,-1.277492
132,0.675914,-0.569999,1.028979,1.314208
145,1.03062,-0.103106,0.806497,1.443793


# Model

In [43]:
import tensorflow
from tensorflow.keras import models
from tensorflow.keras import layers

In [48]:
def build_model():
     # Because we will need to instantiate
     # the same model multiple time,
     # we use a function to construct it.
    model = models.Sequential()
    model.add(layers.Dense(64, activation='relu',
    input_shape=(train_data.shape[1],)))
    model.add(layers.Dense(64, activation='relu'))
    model.add(layers.Dense(1, activation = 'softmax'))
    model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [49]:
import numpy as np
k = 4
num_val_samples = len(train_data) // k
num_epochs = 100
all_scores = []
val_mae = ''
for i in range(k):
    print('processing fold #', i)
    
    # Prepare the validation data: data from partition # k
    val_data = valid_data
    val_targets = valid_labels
    # Prepare the training data: data from all other partitions
    partial_train_data = np.concatenate(
    [train_data[:i * num_val_samples],
    train_data[(i + 1) * num_val_samples:]],
    axis=0)
    partial_train_targets = np.concatenate(
    [train_labels[:i * num_val_samples],
    train_labels[(i + 1) * num_val_samples:]],
    axis=0)
    # Build the Keras model (already compiled)
    model = build_model()
    # Train the model (in silent mode, verbose=0)
    model.fit(partial_train_data, partial_train_targets,
    epochs=num_epochs, batch_size=1, verbose=0)
    # Evaluate the model on the validation data
    val_mse, val_mae = model.evaluate(val_data, val_targets, verbose=0)
    all_scores.append(val_mae)

processing fold # 0
processing fold # 1
processing fold # 2
processing fold # 3


In [50]:
all_scores

[0.3333333432674408,
 0.3333333432674408,
 0.3333333432674408,
 0.3333333432674408]