## NN-IRIS

Create a neural network model to predict an Iris class

In [34]:
# Import libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow import keras
import numpy as np

## 1. Prepare data

### 1.1 Read data

In [2]:
columns = pd.read_csv("data/iris names.txt", names=['names'])
columns

Unnamed: 0,names
0,sepal length in cm
1,sepal width in cm
2,petal length in cm
3,petal width in cm
4,class


In [3]:
df = pd.read_csv("data/iris2.data", sep="\t", names=columns["names"])
df

Unnamed: 0,sepal length in cm,sepal width in cm,petal length in cm,petal width in cm,class
0,5.1,3.5,1.4,0.2,1
1,4.9,3.0,1.4,0.2,1
2,4.7,3.2,1.3,0.2,1
3,4.6,3.1,1.5,0.2,1
4,5.0,3.6,1.4,0.2,1
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,3
146,6.3,2.5,5.0,1.9,3
147,6.5,3.0,5.2,2.0,3
148,6.2,3.4,5.4,2.3,3


### 1.2 Split data

Split data to train and test subsets using `train_test_split()`

In [4]:
X = df.drop("class", axis=1)
y = df["class"]

In [5]:
X

Unnamed: 0,sepal length in cm,sepal width in cm,petal length in cm,petal width in cm
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2
...,...,...,...,...
145,6.7,3.0,5.2,2.3
146,6.3,2.5,5.0,1.9
147,6.5,3.0,5.2,2.0
148,6.2,3.4,5.4,2.3


In [6]:
y = pd.get_dummies(y)
y

Unnamed: 0,1,2,3
0,True,False,False
1,True,False,False
2,True,False,False
3,True,False,False
4,True,False,False
...,...,...,...
145,False,False,True
146,False,False,True
147,False,False,True
148,False,False,True


In [7]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15)

# Split the training data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.15)

In [8]:
X_train

Unnamed: 0,sepal length in cm,sepal width in cm,petal length in cm,petal width in cm
84,5.4,3.0,4.5,1.5
120,6.9,3.2,5.7,2.3
106,4.9,2.5,4.5,1.7
9,4.9,3.1,1.5,0.1
94,5.6,2.7,4.2,1.3
...,...,...,...,...
114,5.8,2.8,5.1,2.4
107,7.3,2.9,6.3,1.8
118,7.7,2.6,6.9,2.3
98,5.1,2.5,3.0,1.1


In [9]:
y_train

Unnamed: 0,1,2,3
84,False,True,False
120,False,False,True
106,False,False,True
9,True,False,False
94,False,True,False
...,...,...,...
114,False,False,True
107,False,False,True
118,False,False,True
98,False,True,False


In [10]:
X_val

Unnamed: 0,sepal length in cm,sepal width in cm,petal length in cm,petal width in cm
56,6.3,3.3,4.7,1.6
11,4.8,3.4,1.6,0.2
4,5.0,3.6,1.4,0.2
144,6.7,3.3,5.7,2.5
103,6.3,2.9,5.6,1.8
36,5.5,3.5,1.3,0.2
89,5.5,2.5,4.0,1.3
85,6.0,3.4,4.5,1.6
60,5.0,2.0,3.5,1.0
110,6.5,3.2,5.1,2.0


In [11]:
y_val

Unnamed: 0,1,2,3
56,False,True,False
11,True,False,False
4,True,False,False
144,False,False,True
103,False,False,True
36,True,False,False
89,False,True,False
85,False,True,False
60,False,True,False
110,False,False,True


In [12]:
X_test

Unnamed: 0,sepal length in cm,sepal width in cm,petal length in cm,petal width in cm
71,6.1,2.8,4.0,1.3
54,6.5,2.8,4.6,1.5
142,5.8,2.7,5.1,1.9
102,7.1,3.0,5.9,2.1
39,5.1,3.4,1.5,0.2
48,5.3,3.7,1.5,0.2
81,5.5,2.4,3.7,1.0
51,6.4,3.2,4.5,1.5
128,6.4,2.8,5.6,2.1
1,4.9,3.0,1.4,0.2


In [13]:
y_test

Unnamed: 0,1,2,3
71,False,True,False
54,False,True,False
142,False,False,True
102,False,False,True
39,True,False,False
48,True,False,False
81,False,True,False
51,False,True,False
128,False,False,True
1,True,False,False


In [14]:
X_train.shape, y_train.shape, X_val.shape, y_val.shape, X_test.shape, y_test.shape

((107, 4), (107, 3), (20, 4), (20, 3), (23, 4), (23, 3))

## 2. Create model

### 2.1. Model definition

In [15]:
model = keras.Sequential()
model.add(keras.layers.Dense(16, activation='relu', input_shape=(4,)))
model.add(keras.layers.Dense(3, activation='softmax'))
model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
2024-04-04 16:29:03.218133: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-04-04 16:29:03.218868: W tensorflow/core/common_runtime/gpu/gpu_device.cc:2251] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


### 2.2. Model compilation

In [28]:
# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy', 'recall', 'precision', 'TruePositives', 'TrueNegatives', 'FalsePositives', 'FalseNegatives'])

### 2.3. Model training

In [29]:
# Train the model!
model.fit(X_train, y_train,
          batch_size=12,
          epochs=200,
          validation_data=(X_val, y_val))

Epoch 1/200
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 70ms/step - FalseNegatives: 1.7000 - FalsePositives: 1.7000 - TrueNegatives: 127.5000 - TruePositives: 62.9000 - accuracy: 0.9681 - loss: 0.1106 - precision: 0.9681 - recall: 0.9681 - val_FalseNegatives: 1.0000 - val_FalsePositives: 1.0000 - val_TrueNegatives: 39.0000 - val_TruePositives: 19.0000 - val_accuracy: 0.9500 - val_loss: 0.0882 - val_precision: 0.9500 - val_recall: 0.9500
Epoch 2/200
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - FalseNegatives: 1.4000 - FalsePositives: 1.4000 - TrueNegatives: 127.8000 - TruePositives: 63.2000 - accuracy: 0.9849 - loss: 0.0550 - precision: 0.9849 - recall: 0.9849 - val_FalseNegatives: 1.0000 - val_FalsePositives: 1.0000 - val_TrueNegatives: 39.0000 - val_TruePositives: 19.0000 - val_accuracy: 0.9500 - val_loss: 0.0887 - val_precision: 0.9500 - val_recall: 0.9500
Epoch 3/200
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step 

<keras.src.callbacks.history.History at 0x7492fffc6a90>

### 2.4. Model evaluation

In [35]:
# Evaluate the model on the test data
_, FN, FP, TN, TP, accuracy, precision, recall = model.evaluate(X_test, y_test)
mcc = (TP * TN - FP * FN) / np.sqrt((TP + FP) * (TP + FN) * (TN + FP) * (TN + FN))
print(f"Test accuracy: {accuracy*100:.2f}%")
print(f"Test recall: {recall*100:.2f}%")
print(f"Test precision: {precision*100:.2f}%")
print(f"TP: {TP}")
print(f"TN: {TN}")
print(f"FP: {FP}")
print(f"FN: {FN}")
print(f"Test Matthew coefficient: {mcc * 100:.2f}%")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step - FalseNegatives: 0.0000e+00 - FalsePositives: 0.0000e+00 - TrueNegatives: 46.0000 - TruePositives: 23.0000 - accuracy: 1.0000 - loss: 0.0069 - precision: 1.0000 - recall: 1.0000
Test accuracy: 100.00%
Test recall: 100.00%
Test precision: 100.00%
TP: 23.0
TN: 46.0
FP: 0.0
FN: 0.0
Test Matthew coefficient: 100.00%
