In [1]:
import tensorflow as tf
import datetime

In [2]:
input_dir = '/home/jovyan/work/data/'

In [3]:
from tensorflow.keras.preprocessing import image_dataset_from_directory


# training_data_dir = output_dir+'/train'
# validation_data_dir = output_dir+'/val'

image_shape = (250, 250) #image size to use, (height, width)
batch_size = 32 #taking 32 images in a batch at a time
seed = 123 #seed to recreate the same results every time we run this code

train_ds = image_dataset_from_directory(
        input_dir,
        image_size=image_shape,
        batch_size=batch_size,
        class_names = [
          'human', 'empty'
        ],
        label_mode = "categorical",
        seed=seed,
        validation_split=0.1,
        shuffle=True,
        subset='training'
)
valid_ds = image_dataset_from_directory(
        input_dir,
        image_size=image_shape,
        batch_size=batch_size,
        class_names = [
          'human', 'empty'
        ],
        label_mode = "categorical",
        seed=seed,
        validation_split=0.1,
        shuffle=True,
        subset='validation'
)

Found 52800 files belonging to 2 classes.
Using 47520 files for training.


2022-04-12 09:50:40.482134: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2022-04-12 09:50:40.482223: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)
2022-04-12 09:50:40.482269: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (9254522d3e31): /proc/driver/nvidia/version does not exist
2022-04-12 09:50:40.486914: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Found 52800 files belonging to 2 classes.
Using 5280 files for validation.


<BatchDataset shapes: ((None, 250, 250, 3), (None, 2)), types: (tf.float32, tf.float32)>

In [5]:
AUTOTUNE = tf.data.AUTOTUNE

train_ds = train_ds.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
val_ds = valid_ds.cache().prefetch(buffer_size=AUTOTUNE)

In [4]:
from tensorflow.keras import layers
# To rescale an input in the [0, 255] range to be in the [0, 1] range,pass scale=1./255.
normalization_layer = layers.experimental.preprocessing.Rescaling(1./255) 

In [5]:
from tensorflow.keras import Sequential
num_classes = 2

model = Sequential([
  layers.experimental.preprocessing.Rescaling(1./255, input_shape=(250,250,3)), #3 rgb color means, input_sahpe has htirs parameteras 3.
  layers.Conv2D(16, 3, padding='same', activation='relu'),
  layers.MaxPooling2D(),
  layers.Conv2D(32, 3, padding='same', activation='relu'),
  layers.MaxPooling2D(),
  layers.Conv2D(64, 3, padding='same', activation='relu'),
  layers.MaxPooling2D(),
  layers.Flatten(),
  layers.Dense(128, activation='relu'),
  layers.Dense(num_classes)
])

In [6]:
model.compile(optimizer='adam',
              loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True), #this is categorical classification so, CategoricalCrossentropy is used
              metrics=['accuracy'])

In [7]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
rescaling_1 (Rescaling)      (None, 250, 250, 3)       0         
_________________________________________________________________
conv2d (Conv2D)              (None, 250, 250, 16)      448       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 125, 125, 16)      0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 125, 125, 32)      4640      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 62, 62, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 62, 62, 64)        18496     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 31, 31, 64)        0

In [10]:
class MyCustomCallback(tf.keras.callbacks.Callback):

  def on_train_batch_begin(self, batch, logs=None):
    print('Training: batch {} begins at {}'.format(batch, datetime.datetime.now().time()))

  def on_train_batch_end(self, batch, logs=None):
    print('Training: batch {} ends at {}'.format(batch, datetime.datetime.now().time()))

  def on_test_batch_begin(self, batch, logs=None):
    print('Evaluating: batch {} begins at {}'.format(batch, datetime.datetime.now().time()))

  def on_test_batch_end(self, batch, logs=None):
    print('Evaluating: batch {} ends at {}'.format(batch, datetime.datetime.now().time()))

In [8]:
epochs=5
history = model.fit(
    train_ds,
    validation_data=valid_ds,
    epochs=epochs
)

Epoch 1/5


2022-04-12 09:50:53.447043: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)


Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [9]:
# save model
saved_model_path = '/home/jovyan/work/saved_model/cnn_model'
model.save(saved_model_path)

2022-04-12 11:46:43.664561: W tensorflow/python/util/util.cc:348] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.


INFO:tensorflow:Assets written to: /home/jovyan/work/saved_model/cnn_model/assets


In [31]:
# Predict
import numpy as np
from keras.preprocessing import image
img = image.load_img(input_dir+'/empty/image11507.png', target_size=(250, 250))
img_array = image.img_to_array(img)
img = np.expand_dims(img_array, axis=0)
model.predict(img)


array([[-2.6806417 , -0.47557408]], dtype=float32)

In [32]:
import numpy as np

y_pred = []  # store predicted labels
y_true = []  # store true labels

# iterate over the dataset
for image_batch, label_batch in valid_ds:   # use dataset.unbatch() with repeat
  # append true labels
  y_true.append(np.argmax(label_batch, axis = - 1))
  # # compute predictions
  preds = model.predict(image_batch)
  # # append predicted labels
  y_pred.append(np.argmax(preds, axis = - 1))



In [33]:
# convert the true and predicted labels into tensors
correct_labels = tf.concat([item for item in y_true], axis = 0)
predicted_labels = tf.concat([item for item in y_pred], axis = 0)

In [34]:


from sklearn.metrics import confusion_matrix
confusion_matrix(predicted_labels, correct_labels)



array([[1077,   56],
       [ 344, 3803]])