## imports

In [0]:
import tensorflow as tf
tf.__version__

'1.13.1'

In [0]:
ls

In [0]:
import keras 
keras.__version__

Using TensorFlow backend.


'2.2.4'

In [0]:
import matplotlib
print('matplotlib: {}'.format(matplotlib.__version__))

matplotlib: 3.0.3


In [0]:
import pandas as pd
pd.__version__

'0.24.2'

In [0]:
import cv2
print(cv2.__version__)

3.4.3


In [0]:
import numpy as np
np.__version__

'1.16.3'

In [0]:
!python --version

Python 3.6.7


# Step 1: Download Dataset

**Shenzhen Hospital X-ray Set: **X-ray images in this data set have been collected by Shenzhen No.3 Hospital in Shenzhen, Guangdong providence, China. The x-rays were acquired as part of the routine care at Shenzhen Hospital. The set contains images in JPEG format. 

In [0]:
!wget https://openi.nlm.nih.gov/imgs/collections/ChinaSet_AllFiles.zip

In [0]:
ls

In [0]:
!unzip ChinaSet_AllFiles.zip -d images/ 

In [0]:
ls images/ChinaSet_AllFiles

In [0]:
rm ChinaSet_AllFiles.zip

In [0]:
rm images/ChinaSet_AllFiles/CXR_png/Thumbs.db

In [0]:
!rm -rf images/ChinaSet_AllFiles/ClinicalReadings/

In [0]:
!rm images/ChinaSet_AllFiles/NLM-ChinaCXRSet-ReadMe.docx

# Step 2: Prepare Dataset csv file

In [0]:
ls  

In [0]:
# prepare dataset.csv file

import os
import csv

def parseTxtFile(filename):
  print("filename is ====> ", filename)
  temp = filename.split("." )
  temp = str(temp[0])
  temp = temp.split("_")
  label = temp[2]
  text = [filename, label]

  with open('dataset.csv', 'a') as csvFile:
    writer = csv.writer(csvFile)
    writer.writerow(text)
    csvFile.close()

        
             

datasetDir = os.path.join(os.getcwd(), "images/ChinaSet_AllFiles/CXR_png/")

for file_name in os.listdir(datasetDir):
             parseTxtFile(file_name)

In [0]:
import pandas as pd
original_dataset = pd.read_csv("dataset.csv", header=None)
print(original_dataset.shape)
print("\n\n",original_dataset.head())

In [0]:
X = original_dataset[0]
y = original_dataset[1]

In [0]:
X.head(10)

In [0]:
y.head()

In [0]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=1)

In [0]:
print("X_train: ",X_train.size, y_train.size)

In [0]:
print("X_test: ",X_test.size, y_test.size)

In [0]:
print("X_val: ",X_val.size, y_val.size)

# Step 3: Preprocess image

In [0]:
ls images/ChinaSet_AllFiles/

In [0]:
import random
import pathlib

datasetDir = os.path.join(os.getcwd(), "images/ChinaSet_AllFiles/")
data_root = pathlib.Path(datasetDir)
print(data_root,"=======")


all_image_paths = list(data_root.glob('*/*'))
all_image_paths = [str(path) for path in all_image_paths]

image_count = len(all_image_paths)
image_count

In [0]:
all_image_paths[:10]

In [0]:
all_image_paths[2]

### Code to read image in grayscale format

In [0]:
# # Resize image and normalizes it

# def preprocess_image(image):
#   # convert raw image into tensor
#   img_tensor = tf.image.decode_jpeg(image, channels=3)
#   img_tensor = tf.image.rgb_to_grayscale(img_tensor)
  
#   # resize image to 128*128
#   image = tf.image.resize_images(img_tensor, [128, 128])

#   # Removes dimensions of size 1 from the shape of a tensor.
#   image = tf.squeeze(image)
#   # normalize to [0,1] range
#   image /= 255.0  
#   return image

### Code to read image in *RGB* format

In [0]:
# Resize image and normalizes it

def preprocess_image(image):
  # convert raw image into tensor
  img_tensor = tf.image.decode_jpeg(image, channels=3)
  
  # resize image to 128*128
  image = tf.image.resize_images(img_tensor, [224, 224])
  # normalize to [0,1] range
  image /= 255.0  
  return image

### Load and preprocess function

In [0]:
# reads image and call functin to resize it
def load_and_preprocess_image(path):
  image = tf.read_file(path)
  return preprocess_image(image)

## Test preprocessing functions if it's working or not

In [0]:
img_tensor = load_and_preprocess_image(all_image_paths[0])

In [0]:
sess = tf.InteractiveSession()

In [0]:
print(repr(img_tensor.shape))
print(img_tensor.dtype)

print(img_tensor.eval().min())
print(img_tensor.eval().max())


In [0]:
%matplotlib inline

import matplotlib.pyplot as plt

image_path = all_image_paths[0]
img = load_and_preprocess_image(image_path)

print(img.shape)
print(img.eval().ndim)

plt.imshow(img.eval())
plt.grid(False)

In [0]:
sess.close()

### RGB plot V/S Grayscale Plot

In [0]:
import cv2
img = cv2.imread(all_image_paths[0], 0)
print(img.shape)

%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt

plt.imshow(img, cmap = plt.cm.gray)
plt.grid(False)

In [0]:
import cv2
img = cv2.imread(all_image_paths[0])

print(img.shape)

%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt

plt.imshow(img)
plt.grid(False)

# Step 4: Build a `tf.data.Dataset`

https://www.tensorflow.org/guide/performance/datasets

GPUs and TPUs can radically reduce the time required to execute a single training step, the CPU processing is prone to becoming the bottleneck. Achieving peak performance requires an efficient input pipeline that delivers data for the next step before the current step has finished. The tf.data API helps to build flexible and efficient input pipelines. feed-dict is the slowest possible way to pass information to TensorFlow and it must be avoided


**Input Pipeline Structure**

A typical TensorFlow training input pipeline can be framed as an ETL process:

1. **Extract**: Read data from persistent storage -- either local (e.g. HDD or SSD) or remote (e.g. GCS or HDFS).
2. **Transform**: Use CPU cores to parse and perform preprocessing operations on the data such as image decompression, data augmentation transformations (such as random crop, flips, and color distortions), shuffling, and batching.
3. **Load**: Load the transformed data onto the accelerator device(s) (for example, GPU(s) or TPU(s)) that execute the machine learning model.

This pattern effectively utilizes the CPU, while reserving the accelerator for the heavy lifting of training your model

## A dataset of images

The easiest way to build a `tf.data.Dataset` is using the `from_tensor_slices` method.

Slicing the array of strings, results in a dataset of strings:

In [0]:
type(all_image_paths)

In [0]:
path_ds = tf.data.Dataset.from_tensor_slices(all_image_paths)

The `output_shapes` and `output_types` fields describe the content of each item in the dataset. In this case it is a set of scalar binary-strings

In [0]:
print('shape: ', repr(path_ds.output_shapes))
print('type: ', path_ds.output_types)
print()
print(path_ds)

Now create a new dataset that loads and formats images on the fly by mapping `preprocess_image` over the dataset of paths.

In [0]:
AUTOTUNE = tf.data.experimental.AUTOTUNE

In [0]:
image_ds = path_ds.map(load_and_preprocess_image, num_parallel_calls=AUTOTUNE)

### test if dataset api indeed return images

In [0]:
type(image_ds)

In [0]:
# create a one-shot iterator
iterator = path_ds.make_one_shot_iterator()
# extract an element
next_element = iterator.get_next()
img = load_and_preprocess_image(next_element)
print(img)

In [0]:
import tensorflow.contrib.eager as tfe
import matplotlib.pyplot as plt

with tf.Session() as sess:
    print(img.eval().max())    
    print(img.eval().min())
    print(img.shape)
    plt.imshow(sess.run(img))

## Preparing label dataset

In [0]:
# ensure label from image name matches all_image_labels

all_image_labels = original_dataset[1]
img_filename = lambda i: int(all_image_paths[i].split('/')[-1].split('_')[-1].split('.')[0])

for i in range(len(all_image_paths)):
  if i<10:
    print(img_filename(i),"-", all_image_labels[i])
  assert(img_filename(i), all_image_labels[i])

Ok now as we dont have assertion error we are good to go

In [0]:
type(all_image_labels)

In [0]:
# convert pandas series object to numpy array
all_image_labels = all_image_labels.values

In [0]:
# Using the same from_tensor_slices method we can build a dataset of labels
label_ds = tf.data.Dataset.from_tensor_slices(tf.cast(all_image_labels, tf.int64))

In [0]:
# test if label_ds works
iterator = label_ds.make_one_shot_iterator()
next_element = iterator.get_next()

with tf.Session() as sess:
  for i in range(10):
    print(next_element.eval())

## A dataset of (image, label) pairs

In [0]:
ds = tf.data.Dataset.from_tensor_slices((all_image_paths, all_image_labels))

# The tuples are unpacked into the positional arguments of the mapped function 
def load_and_preprocess_from_path_label(path, label):
  return load_and_preprocess_image(path), label

image_label_ds = ds.map(load_and_preprocess_from_path_label)
# image_label_ds = image_label_ds.cache()
image_label_ds

# Step 5:Train validation and test split

using a 70/15/15 train/val/test split 

In [0]:
DATASET_SIZE = original_dataset.shape[0]
DATASET_SIZE

In [0]:
train_size = int(0.7 * DATASET_SIZE)
val_size = int(0.15 * DATASET_SIZE)
test_size = int(0.15 * DATASET_SIZE)

print(train_size, val_size, test_size)

https://stackoverflow.com/questions/51125266/how-do-i-split-tensorflow-datasets

**Take:**

Creates a Dataset with at most count elements from this dataset.

**Skip:**

Creates a Dataset that skips count elements from this dataset.

**Note that skip actually iterates over the dataset so it can cause big latency on large dataset**

In [0]:
train_dataset = image_label_ds.take(train_size)
test_dataset = image_label_ds.skip(train_size)
val_dataset = test_dataset.skip(val_size)
test_dataset = test_dataset.take(test_size)

### calculate dataset size

In [0]:
itrtor = train_dataset.make_one_shot_iterator()
nxt_elmnt = itrtor.get_next()
train_count = 0
train_positive_label = 0
train_neg_label = 0
with tf.Session() as sess:
#   sess.run(itrtor.initializer)
  while(1):
    try:
      img, label = sess.run(nxt_elmnt)
#     print(img.shape)
#     print(label.shape)
#       print(label)
      train_count += 1
      if label ==1:
        train_positive_label += 1
      else:
        train_neg_label += 1
    except tf.errors.OutOfRangeError:
      print("End of dataset")  # "End of dataset"
      break

print("train total images: ",train_count)
print("train positive_label: ",train_positive_label)
print("train neg_label: ",train_neg_label)

In [0]:
itrtor = test_dataset.make_one_shot_iterator()
nxt_elmnt = itrtor.get_next()
test_count = 0
test_positive_label = 0
test_neg_label = 0
with tf.Session() as sess:
#   sess.run(itrtor.initializer)
  while(1):
    try:
      img, label = sess.run(nxt_elmnt)
#     print(img.shape)
#     print(label.shape)
#       print(label)
      test_count += 1
      if label ==1:
        test_positive_label += 1
      else:
        test_neg_label += 1
    except tf.errors.OutOfRangeError:
      print("End of dataset")  # "End of dataset"
      break

print("test total images: ",test_count)
print("test positive_label: ",test_positive_label)
print("test neg_label: ",test_neg_label)

In [0]:
itrtor = val_dataset.make_one_shot_iterator()
nxt_elmnt = itrtor.get_next()
val_count = 0
val_positive_label = 0
val_neg_label = 0
with tf.Session() as sess:
#   sess.run(itrtor.initializer)
  while(1):
    try:
      img, label = sess.run(nxt_elmnt)
#     print(img.shape)
#     print(label.shape)
#       print(label)
      val_count += 1
      if label ==1:
        val_positive_label += 1
      else:
        val_neg_label += 1
    except tf.errors.OutOfRangeError:
      print("End of dataset")  # "End of dataset"
      break

print("val total images: ",val_count)
print("val positive_label: ",val_positive_label)
print("val neg_label: ",val_neg_label)

# Step 6: Augmentation pipeline on train dataset

https://www.wouterbulten.nl/blog/tech/data-augmentation-using-tensorflow-data-dataset/

To augment the dataset it can beneficial to make augmenter functions: a function that receives an image (a tf.Tensor) and returns a new augmented image. By defining functions for each augmentation operation we can easily attach them to datasets and control when they are evaluated. 



### Rotation and flipping


To get the number of times the image is rotated by 90 degrees, we need to use a random function from Tensorflow itself

In [0]:
# def rotate(x: tf.Tensor) -> tf.Tensor:
#     """Rotation augmentation

#     Args:
#         x: Image

#     Returns:
#         Augmented image
#     """

#     # Rotate 0, 90, 180, 270 degrees
#     return tf.image.rot90(x, tf.random_uniform(shape=[], minval=0, maxval=4, dtype=tf.int32))


Tensorflow has a built-in function that does this for us: random_flip_left_right and random_flip_up_down.



In [0]:
# def flip(x: tf.Tensor) -> tf.Tensor:
#     """Flip augmentation

#     Args:
#         x: Image to flip

#     Returns:
#         Augmented image
#     """
#     x = tf.image.random_flip_left_right(x)
#     x = tf.image.random_flip_up_down(x)

#     return x

### Augmenting the Dataset


With all functions defined we can combine them in to a single pipeline. Applying these functions to a Tensorflow Dataset is very easy using the map function. The map function takes a function and returns a new and augmented dataset. 

To drastically increase the speed of these operations we can execute them in parallel, practically all Tensorflow operations support this. With the tf.Data API this is done using the num_parallel_calls parameter of the map function. When this parameter is higher than one functions will be executed in parallel. It is advised to set this parameter to the number of CPUs available.

Note: Some of these operations can result in images that have values outside the normal range of [0, 1]. To make sure that these ranges are not exceeded a clipping function such as tf.clip_by_value is recommended.



In [0]:
# AUTOTUNE = tf.data.experimental.AUTOTUNE

In [0]:
# # Add augmentations
# augmentations = [flip, rotate]

# # Add the augmentations to the dataset
# for f in augmentations:
#     # Apply the augmentation, run 4 jobs in parallel.
#     aug_dataset = train_dataset.map(f, num_parallel_calls=4)

# # Make sure that the values are still in [0, 1]
# aug_dataset = aug_dataset.map(lambda x: tf.clip_by_value(x, 0, 1), num_parallel_calls=AUTOTUNE)


# Step 6.5: Data prep in batches

To train a model with this dataset you will want the data:

* To be well shuffled.
* To be batched.
* To repeat forever.
* Batches to be available as soon as possible.

These features can be easily added using the `tf.data api`.



Make sure to call tf.data.Dataset.shuffle() before applying the heavy transformations (like reading the images, processing them, batching...).



**shuffling dataset and problems related**



1.   https://github.com/tensorflow/tensorflow/issues/14857
2.   https://stackoverflow.com/questions/46444018/meaning-of-buffer-size-in-dataset-map-dataset-prefetch-and-dataset-shuffle



In [0]:
AUTOTUNE = tf.data.experimental.AUTOTUNE

In [0]:
image_count = 700
BATCH_SIZE = 32

In [0]:
# this code section has alternative snipet in below code cell

# # Setting a shuffle buffer size as large as the dataset ensures that the data is
# # completely shuffled.
# ds = image_label_ds.shuffle(buffer_size=image_count)
# ds = ds.repeat()
# ds = ds.batch(BATCH_SIZE)
# # `prefetch` lets the dataset fetch batches, in the background while the model is training.
# ds = ds.prefetch(buffer_size=AUTOTUNE)
# ds

There are a few things to note here:

1. The order is important.

  * A `.shuffle` before a `.repeat` would shuffle items across epoch boundaries (some items will ve seen twice before others are seen at all).
  * A `.shuffle` after a `.batch` would shuffle the order of the batches, but not shuffle the items across batches.
  
2. We use a `buffer_size` the same size as the dataset for a full shuffle. Up to the dataset size, large values provide better randomization, but use more memory.

3. The shuffle buffer is filled before any elements are pulled from it. So a large `buffer_size` may cause a delay when your Dataset is starting.

4. The shuffeled dataset doesn't report the end of a dataset until the shuffle-buffer is completely empty. The Dataset is restarted by `.repeat`, causing another wait for the shuffle-buffer to be filled.


This last point can be addressed by using the `tf.data.Dataset.apply` method with the fused `tf.data.experimental.shuffle_and_repeat` function:



In [0]:
#this code prepares full dataset

# ds = image_label_ds.apply(
#   tf.data.experimental.shuffle_and_repeat(buffer_size=image_count))
# ds = ds.batch(BATCH_SIZE)
# ds = ds.prefetch(buffer_size=AUTOTUNE)
# ds

### preparing batches/ shuffling

In [0]:
print(train_size, val_size, test_size)

In [0]:
# Train dataset 

train_ds = train_dataset.apply(
  tf.data.experimental.shuffle_and_repeat(buffer_size=train_size))
train_ds = train_ds.batch(BATCH_SIZE)
train_ds = train_ds.prefetch(buffer_size=AUTOTUNE)
train_ds

In [0]:
# valid dataset 

valid_ds = val_dataset.apply(
  tf.data.experimental.shuffle_and_repeat(buffer_size=val_size))
valid_ds = valid_ds.batch(BATCH_SIZE)
valid_ds = valid_ds.prefetch(buffer_size=AUTOTUNE)
valid_ds

In [0]:
# test dataset 

test_ds = test_dataset.apply(
  tf.data.experimental.shuffle_and_repeat(buffer_size=val_size))
test_ds = test_ds.batch(BATCH_SIZE)
test_ds = test_ds.prefetch(buffer_size=AUTOTUNE)
test_ds

### Normalization to [-1, 1]

**This function applies the "Inception" preprocessing which converts
the RGB values from [0, 255] to [-1, 1]  ** So before the passing it to the MobilNet model, we need to convert the input from a range of [0,1] to [-1,1].




In [0]:
def change_range(image,label):
  return 2*image-1, label

train_ds = train_ds.map(change_range)
valid_ds = valid_ds.map(change_range)
test_ds = test_ds.map(change_range)

### **Cache**

Use `tf.data.Dataset.cache` to easily cache calculations across epochs. This is especially performant if the dataq fits in memory.

Here the images are cached, after being pre-precessed (decoded and resized):



In [0]:
# train_dataset = train_dataset.cache()
# val_dataset = val_dataset.cache()

# Step 7: Training

## Pipe the dataset to a model using mobilenetV2

In [0]:
mobile_net = tf.keras.applications.MobileNetV2(input_shape=(224, 224, 3),
                                               include_top=False,
                                               weights='imagenet')

In [0]:
# mobile_net = tf.keras.applications.MobileNetV2(input_shape=(128, 128, 3), include_top=False)
mobile_net.trainable=False

### misc

In [0]:
# # this code prepares full dataset

# ds = image_label_ds.apply(
#   tf.data.experimental.shuffle_and_repeat(buffer_size=image_count))
# ds = ds.batch(BATCH_SIZE)
# ds = ds.prefetch(buffer_size=AUTOTUNE)
# ds

# keras_ds = ds.map(change_range)

The MobileNet returns a 6x6 spatial grid of features for each image.

Pass it a batch of images to see:



In [0]:
# iterator = train_ds.make_initializable_iterator()
# next_element = iterator.get_next()
# with tf.Session() as sess:
#         sess.run(iterator.initializer)
#         image_batch, label_batch = sess.run(next_element)
#         print(image_batch.shape)
#         print(label_batch.shape)
#         feature_map_batch = mobile_net(image_batch)
#         print(feature_map_batch.shape)

### Build a model wrapped around MobileNet

### temp model to check the shape of output

In [0]:
# model_temp = tf.keras.Sequential([
#   mobile_net,
#   tf.keras.layers.GlobalAveragePooling2D(),
#   tf.keras.layers.Dense(1)])

Now it produces outputs of the expected shape:



In [0]:
# iterator = keras_ds.make_initializable_iterator()
# next_element = iterator.get_next()
# with tf.Session() as sess:
#         sess.run(iterator.initializer)
#         sess.run(tf.global_variables_initializer())
#         image_batch, label_batch = sess.run(next_element)
#         print(image_batch.shape)
#         print(label_batch.shape)
        
#         logit_batch = model_temp(image_batch).eval()

#         print("min logit:", logit_batch.min())
#         print("max logit:", logit_batch.max())
#         print()

#         print("Shape:", logit_batch.shape)


## MobileNetV2 Model

tf.keras.layers.GlobalAveragePooling2D layer to convert the features to a single n-element vector per image.



In [0]:
model = tf.keras.Sequential([
  mobile_net,
  tf.keras.layers.GlobalAveragePooling2D(),
#   tf.keras.layers.Dense(32, activation=tf.nn.relu),
#   tf.keras.layers.Dense(16, activation=tf.nn.relu),
  tf.keras.layers.Dense(1)])

Compile the model to describe the training procedure:



In [0]:
# model.compile(optimizer=tf.train.AdamOptimizer(), 
#               loss='binary_crossentropy',
#               metrics=["accuracy"])

Since there are two classes, use a binary cross-entropy loss.

In [0]:
base_learning_rate = 0.0001
model.compile(optimizer=tf.keras.optimizers.RMSprop(lr=base_learning_rate),
              loss='binary_crossentropy',
              metrics=['accuracy'])

There are 2 trainable variables: the Dense weights and bias:

In [0]:
len(model.trainable_variables) 

In [0]:
model.summary()

**Train the model.**

Normally you would specify the real number of steps per epoch, but for demonstration purposes only run 3 steps.



In [0]:
print(train_size, val_size, test_size)

In [0]:
import numpy as np
train_steps_per_epoch=np.ceil(train_size/BATCH_SIZE)
train_steps_per_epoch = np.int64(train_steps_per_epoch)
train_steps_per_epoch

In [0]:
valid_steps_per_epoch=np.ceil(val_size/BATCH_SIZE)
valid_steps_per_epoch = np.int64(valid_steps_per_epoch)
valid_steps_per_epoch

In [0]:
initial_epochs = 10


In [0]:
loss0, accuracy0 = model.evaluate(valid_ds, steps = valid_steps_per_epoch-1)

In [0]:
print("initial loss: {:.2f}".format(loss0))
print("initial accuracy: {:.2f}".format(accuracy0))

In [0]:
history = model.fit(train_ds, 
                    epochs=initial_epochs, 
                    steps_per_epoch=train_steps_per_epoch, 
                    validation_data = valid_ds, 
                    validation_steps=valid_steps_per_epoch-1)

In [0]:
history.history

In [0]:
test_steps_per_epoch=np.ceil(test_size/BATCH_SIZE)
test_steps_per_epoch = np.int64(test_steps_per_epoch)
test_steps_per_epoch

In [0]:
test_loss, test_acc = model.evaluate(test_ds, steps=test_steps_per_epoch-1)

In [0]:
print('Test accuracy:', test_acc)

### Plots: Learning curves
Let's take a look at the learning curves of the training and validation accuracy/loss when using the MobileNet V2 base model as a fixed feature extractor.

In [0]:
import matplotlib.pyplot as plt

acc = history.history['acc']
val_acc = history.history['val_acc']

loss = history.history['loss']
val_loss = history.history['val_loss']

plt.figure(figsize=(8, 8))
plt.subplot(2, 1, 1)
plt.plot(acc, label='Training Accuracy')
plt.plot(val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.ylabel('Accuracy')
plt.ylim([min(plt.ylim()),1])
plt.title('Training and Validation Accuracy')

plt.subplot(2, 1, 2)
plt.plot(loss, label='Training Loss')
plt.plot(val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.ylabel('Cross Entropy')
plt.ylim([0,1.0])
plt.title('Training and Validation Loss')
plt.xlabel('epoch')
plt.show()

### predict values

In [0]:
pred_vector = model.predict(test_ds, steps=test_steps_per_epoch-1)

In [0]:
pred_vector.shape

In [0]:
np.squeeze(pred_vector)

In [0]:
np.where( np.absolute(np.squeeze(pred_vector)) > 0.5 )

In [0]:
np.sum(np.absolute(np.squeeze(pred_vector)) > 0.5)

In [0]:
# create a one-shot iterator
iterator = test_ds.make_initializable_iterator()
# extract an element
next_element = iterator.get_next()
with tf.Session() as sess:
        sess.run(iterator.initializer)
        image_batch, label_batch = sess.run(next_element)
#         print(image_batch.shape)
        print(np.squeeze(label_batch))

## Finetuning

In [0]:
mobile_net.trainable = True

In [0]:
# Let's take a look to see how many layers are in the base model
print("Number of layers in the base model: ", len(mobile_net.layers))

# Fine tune from this layer onwards
fine_tune_at = 50

# Freeze all the layers before the `fine_tune_at` layer
for layer in mobile_net.layers[:fine_tune_at]:
  layer.trainable =  False

In [0]:
model.compile(loss='binary_crossentropy',
              optimizer = tf.keras.optimizers.RMSprop(lr=base_learning_rate/10),
              metrics=['accuracy'])

In [0]:
len(model.trainable_variables)

In [0]:
fine_tune_epochs = 10
total_epochs =  initial_epochs + fine_tune_epochs

In [0]:
history = model.fit(train_ds, 
                    epochs = total_epochs,
                    initial_epoch = initial_epochs,
                    steps_per_epoch = train_steps_per_epoch, 
                    validation_data = valid_ds, 
                    validation_steps = valid_steps_per_epoch-1)

In [0]:
test_loss, test_acc = model.evaluate(test_ds, steps=3)

In [0]:
print('Test accuracy:', test_acc)

In [0]:
model.summary()

# Manually save weights

### checkpoints

In [0]:
model.save_weights('./checkpoints/my_checkpoint')

In [0]:
ls checkpoints/

 **Restore the weights**

In [0]:
new_model = model

In [0]:
new_model.summary()

In [0]:
new_model.load_weights('./checkpoints/my_checkpoint')

In [0]:
test_loss, test_acc = new_model.evaluate(test_ds, steps=3)

### save architecture and weights with hdf5

In [0]:
# serialize model to JSON
model_json = model.to_json()
with open("model.json", "w") as json_file:
    json_file.write(model_json)

In [0]:
# Save weights to disk
model.save_weights('path_to_my_weights.h5')



In [0]:
from google.colab import files
files.upload()

In [0]:
ls

In [0]:
import tensorflow as tf
import numpy as np
from tensorflow import keras

In [0]:
# load json and create model
json_file = open('model.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
loaded_model = keras.models.model_from_json(loaded_model_json)


In [0]:
# load weights into new model
loaded_model.load_weights('path_to_my_weights.h5')

In [0]:
base_learning_rate = 0.0001
loaded_model.compile(optimizer=tf.keras.optimizers.RMSprop(lr=base_learning_rate),
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [0]:
test_loss, test_acc = loaded_model.evaluate(test_ds, steps=3)

### predict function

In [0]:
from scipy.special import softmax


def out_probability(path_to_img):
  img = load_and_preprocess_image(path_to_img)
  img_arr = tf.Session().run(img)
  img_arr = img_arr.reshape((-1, 224, 224, 3))
  vectr = loaded_model.predict(img_arr, steps=1)
#   print(vectr)

#   np.set_printoptions(precision=5)
#   m = softmax(vectr)
  return vectr

In [0]:
out_probability('/content/images/ChinaSet_AllFiles/CXR_png/CHNCXR_0494_1.png')

In [0]:
results = []

import os, os.path
len(os.listdir('/content/images/ChinaSet_AllFiles/CXR_png') )

In [0]:
filenames = []
for i in os.listdir('/content/images/ChinaSet_AllFiles/CXR_png'):
  path = os.path.join('/content/images/ChinaSet_AllFiles/CXR_png', i)
  filenames.append(i)
  m1 = out_probability(path)
  results.append(m1)

In [0]:
np.sum(np.squeeze(np.array(results))<0)

In [0]:
ls

In [0]:
!zip -r model.zip checkpoints

I am following [this](https://www.tensorflow.org/alpha/tutorials/images/transfer_learning#add_a_classification_head) tutorial for binary class classification. While defining the model it is defined as follows and quotes:

> Apply a tf.keras.layers.Dense layer to convert these features into a single prediction per image. You don't need an activation function here because this prediction will be treated as logit or a raw prediction value. Positive numbers predict class 1, negative numbers predict class 0.




```
model = tf.keras.Sequential([
  base_model,
  tf.keras.layers.GlobalAveragePooling2D(),
  tf.keras.layers.Dense(1)
])
```
and then its compiled as 
```
base_learning_rate = 0.0001
model.compile(optimizer=tf.keras.optimizers.RMSprop(lr=base_learning_rate),
              loss='binary_crossentropy',
              metrics=['accuracy'])
```

I have seen a similar model definition [here](https://www.tensorflow.org/tutorials/load_data/images#pipe_the_dataset_to_a_model) as follows:

```
model = tf.keras.Sequential([
  mobile_net,
  tf.keras.layers.GlobalAveragePooling2D(),
  tf.keras.layers.Dense(len(label_names))])

model.compile(optimizer=tf.train.AdamOptimizer(), 
              loss=tf.keras.losses.sparse_categorical_crossentropy,
              metrics=["accuracy"])
```

In the above cases where no activation function is used, I observed predicted values take any real value(not in the range of [0,1]) and not a single negative value for example.

```
model = tf.keras.Sequential([
  mobile_net,
  tf.keras.layers.GlobalAveragePooling2D(),
  tf.keras.layers.Dense(1)])

base_learning_rate = 0.0001
model.compile(optimizer=tf.keras.optimizers.RMSprop(lr=base_learning_rate),
              loss='binary_crossentropy',
              metrics=['accuracy'])

np.squeeze(model.predict(test_ds, steps=test_steps_per_epoch))

# array([0.8656062 , 1.1738479 , 1.3243774 , 0.43144074, 1.3459874 ,
       0.8830215 , 0.27673364, 0.61824167, 0.6811296 , 0.31660053,
       0.66832197, 0.9944696 , 1.1472682 , 0.643435  , 1.6108004 ,
       0.46332538, 1.0919437 , 0.9578197 , 1.176657  , 1.1019497 ,
       1.2280573 , 1.3852577 , 1.0576394 , 0.89174306, 0.75531614,
       0.77309614, 0.2964771 , 1.4851328 , 0.52786475, 0.8349319 ,
       0.6725186 , 0.850648  , 1.5454502 , 1.5105858 , 0.8132403 ,
       0.8769205 , 0.8270436 , 0.5637488 , 1.0141921 , 1.7030811 ,
       1.4353518 , 1.4161562 , 1.378978  , 0.501247  , 0.6213258 ,
       0.9437766 , 2.429086  , 1.2481798 , 0.6229276 , 0.37893608,
       1.3877648 , 1.0904361 , 1.0879816 , 0.42403704, 0.79637295,
       2.8160148 , 0.8214861 , 0.8503458 , 0.80563146, 1.4901325 ,
       1.0303755 , 0.77981436, 1.088749  , 0.71522933, 1.3340217 ,
       2.0090134 , 1.0075089 , 0.8950774 , 0.6173111 , 0.7857665 ,
       1.7411164 , 1.3057053 , 0.33380216, 0.76223296, 1.5859761 ,
       0.96682435, 0.6254643 , 1.4843993 , 1.1031054 , 0.6320849 ,
       0.01859415, 0.72086346, 1.1440296 , 0.29395923, 1.5440805 ,
       0.380056  , 1.7602444 , 0.6369114 , 0.7867059 , 1.1418453 ,
       1.8237758 , 0.2560327 , 2.6044023 , 1.5562654 , 0.737739  ,
       0.40826577], dtype=float32)

```  
**QUESTION: 1**

**so, how does tensorflow calculate accuracy based on such values? because these values are not 0 or 1, so what threshold value it uses to decide whether a sample is of class 1 or class 0**

---

In another [tutorial](https://www.tensorflow.org/tutorials/keras/basic_classification#setup_the_layers), I have seen the use of sigmoid or softmax activation function for the last layer.

```
model = keras.Sequential([
    keras.layers.Flatten(input_shape=(28, 28)),
    keras.layers.Dense(128, activation=tf.nn.relu),
    keras.layers.Dense(10, activation=tf.nn.softmax)
])
```
similarly, I defined my model as follows:

```
model = tf.keras.Sequential([
  mobile_net,
  keras.layers.GlobalAveragePooling2D(),
  keras.layers.Dense(1, activation='sigmoid')
])

model.compile(optimizer=tf.keras.optimizers.RMSprop(lr=0.0001),
              loss='binary_crossentropy',
              metrics=['accuracy'])

```

and observed values get in range of [0,1]
```
np.squeeze(model.predict(test_ds, steps=test_steps_per_epoch))

# array([0.5962706 , 0.41386074, 0.7369955 , 0.4375754 , 0.4081418 ,
       0.5233598 , 0.54559284, 0.58932847, 0.46750832, 0.73593813,
       0.49894634, 0.49055347, 0.37505004, 0.6098627 , 0.5756561 ,
       0.5219231 , 0.37050545, 0.5673407 , 0.5554987 , 0.531324  ,
       0.28257015, 0.74096835, 0.57002604, 0.46783662, 0.7368346 ,
       0.5332815 , 0.5606995 , 0.5541738 , 0.57862717, 0.40553188,
       0.46588784, 0.30736524, 0.43870398, 0.74726176, 0.71659195,
       0.27446586, 0.50352675, 0.43134567, 0.68349624, 0.38074452,
       0.5150338 , 0.7177907 , 0.61012363, 0.63375396, 0.43830383,
       0.5749217 , 0.4520418 , 0.42618847, 0.53284496, 0.55864084,
       0.55283684, 0.56968784, 0.5476512 , 0.47232378, 0.43477964,
       0.424371  , 0.5257551 , 0.4982109 , 0.6054718 , 0.45364827,
       0.5447099 , 0.5589619 , 0.6879043 , 0.43605927, 0.49726096,
       0.5986774 , 0.46806905, 0.45553213, 0.4558573 , 0.2709099 ,
       0.29398417, 0.42126212, 0.4208623 , 0.25966096, 0.5174277 ,
       0.5691663 , 0.6820154 , 0.66986185, 0.29530805, 0.5368336 ,
       0.6704497 , 0.4770817 , 0.58965963, 0.66673934, 0.44505033,
       0.3894297 , 0.53820807, 0.47612685, 0.3273378 , 0.6933465 ,
       0.54334545, 0.49939007, 0.5978731 , 0.49409997, 0.4585469 ,
       0.43943945], dtype=float32)
```

**QUESTION: 2**

**how accuracy in this case is calculated by tensorflow?**

---

**QUESTION: 3**

**so what is the difference between using sigmoid activation and not using it in the last layer? when I used sigmoid activation function, accuracy of model somehow decreased by 10% than when I didn't used sigmoid function. Is this coincident or does it has to do anything with the use of activation function.**

# caps


In [0]:
def margin_loss(y_true, y_pred):
    """
    :param y_true: [None, n_classes]
    :param y_pred: [None, num_capsule]
    :return: a scalar loss value.
    """
    L = y_true * K.square(K.maximum(0., 0.9 - y_pred)) + \
        0.5 * (1 - y_true) * K.square(K.maximum(0., y_pred - 0.1))

    return K.mean(K.sum(L, 1))

In [0]:
def PrimaryCap(inputs, dim_vector, n_channels, kernel_size, strides, padding):
    """
    Apply Conv2D `n_channels` times and concatenate all capsules
    :param inputs: 4D tensor, shape=[None, width, height, channels]
    :param dim_vector: the dim of the output vector of capsule
    :param n_channels: the number of types of capsules
    :return: output tensor, shape=[None, num_capsule, dim_vector]
    """
    output = layers.Conv2D(filters=dim_vector*n_channels, kernel_size=kernel_size, strides=strides, padding=padding)(inputs)
    outputs = layers.Reshape(target_shape=[-1, dim_vector])(output)
    return layers.Lambda(squash)(outputs)


In [0]:
def squash(vectors, axis=-1):
    """
    The non-linear activation used in Capsule. It drives the length of a large vector to near 1 and small vector to 0
    :param vectors: some vectors to be squashed, N-dim tensor
    :param axis: the axis to squash
    :return: a Tensor with same shape as input vectors
    """
    s_squared_norm = K.sum(K.square(vectors), axis, keepdims=True)
    scale = s_squared_norm / (1 + s_squared_norm) / K.sqrt(s_squared_norm)
    return scale * vectors
  
  