# Libraries

In [7]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from tensorflow.keras import models
from tensorflow.keras import layers

# Reading Data

In [8]:
data = pd.read_csv('iris.csv').drop('Id', axis = 1)
data

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,Iris-virginica
146,6.3,2.5,5.0,1.9,Iris-virginica
147,6.5,3.0,5.2,2.0,Iris-virginica
148,6.2,3.4,5.4,2.3,Iris-virginica


# Data Shape

In [9]:
data.shape

(150, 5)

# Check No. of Classes

In [10]:
data.groupby('Species').size()

Species
Iris-setosa        50
Iris-versicolor    50
Iris-virginica     50
dtype: int64

# Converting Labels to integar value

In [13]:
# Selecting labels
labels = data['Species']
# Replacing 'R' with 0 and 'M' with 1
labels.replace(to_replace = ['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'], value = [0,1,2], inplace = True)
labels

0      0
1      0
2      0
3      0
4      0
      ..
145    2
146    2
147    2
148    2
149    2
Name: Species, Length: 150, dtype: int64

# Shuffling Data

In [15]:
# Shuffling data with default indexing
shuffledData = data.sample(frac = 1, replace = True).reset_index(drop=True)
shuffledData

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,7.7,2.6,6.9,2.3,2
1,5.7,2.8,4.1,1.3,1
2,4.9,3.1,1.5,0.1,0
3,7.3,2.9,6.3,1.8,2
4,6.3,2.5,5.0,1.9,2
...,...,...,...,...,...
145,5.8,2.7,5.1,1.9,2
146,5.9,3.2,4.8,1.8,1
147,6.4,2.8,5.6,2.1,2
148,6.8,3.0,5.5,2.1,2


# Seperating Data(Train, Valid, Test)

In [18]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
# Let's say we want to split the data in 80:10:10 for train:valid:test dataset
train_size=0.8

X = shuffledData.drop(columns = 'Species').copy()
y = shuffledData['Species']

# In the first step we will split the data in training and remaining dataset
train_data, X_rem, train_labels, y_rem = train_test_split(X,y, train_size=0.8)

# Now since we want the valid and test size to be equal (10% each of overall data). 
# we have to define valid_size=0.5 (that is 50% of remaining data)
test_size = 0.5
valid_data, test_data, valid_labels, test_labels = train_test_split(X_rem,y_rem, test_size=0.5)

print(train_data.shape), print(train_labels.shape)
print(valid_data.shape), print(valid_labels.shape)
print(test_data.shape), print(test_labels.shape)

(120, 4)
(120,)
(15, 4)
(15,)
(15, 4)
(15,)


(None, None)

# Encoding Labels(one-hot-endcoding)

In [19]:
train_labels = to_categorical(train_labels)
valid_labels = to_categorical(valid_labels)
test_labels = to_categorical(test_labels)

# Defining Model

In [20]:
def build_model():
    model = models.Sequential()
    model.add(layers.Dense(64, activation='relu', input_shape=(train_data.shape[1],)))
    model.add(layers.Dense(64, activation='relu'))
    model.add(layers.Dense(3, activation='softmax'))
    model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])
    return model