In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install -r /content/drive/MyDrive/covidnet/requirements.txt

Collecting absl-py==0.10.0
  Downloading absl_py-0.10.0-py3-none-any.whl (127 kB)
[K     |████████████████████████████████| 127 kB 12.8 MB/s 
[?25hCollecting aiohttp==3.6.2
  Downloading aiohttp-3.6.2-cp37-cp37m-manylinux1_x86_64.whl (1.2 MB)
[K     |████████████████████████████████| 1.2 MB 58.7 MB/s 
[?25hCollecting anyio==2.1.0
  Downloading anyio-2.1.0-py3-none-any.whl (64 kB)
[K     |████████████████████████████████| 64 kB 3.8 MB/s 
[?25hCollecting argon2-cffi==20.1.0
  Downloading argon2_cffi-20.1.0-cp35-abi3-manylinux1_x86_64.whl (97 kB)
[K     |████████████████████████████████| 97 kB 9.7 MB/s 
Collecting async-generator==1.10
  Downloading async_generator-1.10-py3-none-any.whl (18 kB)
Collecting async-timeout==3.0.1
  Downloading async_timeout-3.0.1-py3-none-any.whl (8.2 kB)
Collecting attrs==20.2.0
  Downloading attrs-20.2.0-py2.py3-none-any.whl (48 kB)
[K     |████████████████████████████████| 48 kB 6.7 MB/s 
[?25hCollecting Babel==2.9.0
  Downloading Babel-2.9.0-py2.

In [None]:
from __future__ import print_function
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()
from tensorflow import keras

import numpy as np
import os, argparse, pathlib
import cv2

from tensorflow.keras.preprocessing.image import ImageDataGenerator

from sklearn.metrics import confusion_matrix

Instructions for updating:
non-resource variables are not supported in the long term


In [None]:
# original code from: https://github.com/lindawangg/COVID-Net

In [None]:
def crop_top(img, percent=0.15):
    offset = int(img.shape[0] * percent)
    return img[offset:]

def central_crop(img):
    size = min(img.shape[0], img.shape[1])
    offset_h = int((img.shape[0] - size) / 2)
    offset_w = int((img.shape[1] - size) / 2)
    return img[offset_h:offset_h + size, offset_w:offset_w + size]

def process_image_file(filepath, top_percent, size):
    img = cv2.imread(filepath)
    img = crop_top(img, percent=top_percent)
    img = central_crop(img)
    img = cv2.resize(img, (size, size))
    return img

def random_ratio_resize(img, prob=0.3, delta=0.1):
    if np.random.rand() >= prob:
        return img
    ratio = img.shape[0] / img.shape[1]
    ratio = np.random.uniform(max(ratio - delta, 0.01), ratio + delta)

    if ratio * img.shape[1] <= img.shape[1]:
        size = (int(img.shape[1] * ratio), img.shape[1])
    else:
        size = (img.shape[0], int(img.shape[0] / ratio))

    dh = img.shape[0] - size[1]
    top, bot = dh // 2, dh - dh // 2
    dw = img.shape[1] - size[0]
    left, right = dw // 2, dw - dw // 2

    if size[0] > 480 or size[1] > 480:
        print(img.shape, size, ratio)

    img = cv2.resize(img, size)
    img = cv2.copyMakeBorder(img, top, bot, left, right, cv2.BORDER_CONSTANT,
                             (0, 0, 0))

    if img.shape[0] != 480 or img.shape[1] != 480:
        raise ValueError(img.shape, size)
    return img

# _augmentation_transform = ImageDataGenerator(
#     featurewise_center=False,
#     featurewise_std_normalization=False,
#     rotation_range=10,
#     width_shift_range=0.1,
#     height_shift_range=0.1,
#     horizontal_flip=True,
#     brightness_range=(0.9, 1.1),
#     zoom_range=(0.85, 1.15),
#     fill_mode='constant',
#     cval=0.,
# )

_augmentation_transform = None

def get_augmentation_transform():
  global _augmentation_transform
  if _augmentation_transform is None:
    _augmentation_transform = ImageDataGenerator(
      featurewise_center=False,
      featurewise_std_normalization=False,
      rotation_range=10,
      width_shift_range=0.1,
      height_shift_range=0.1,
      horizontal_flip=True,
      brightness_range=(0.9, 1.1),
      zoom_range=(0.85, 1.15),
      fill_mode='constant',
      cval=0.,
    )
  return _augmentation_transform

def apply_augmentation(img):
    img = random_ratio_resize(img)
    img = get_augmentation_transform().random_transform(img)
    return img

def _process_csv_file(file):
    with open(file, 'r') as fr:
        files = fr.readlines()
    return files


class BalanceCovidDataset(keras.utils.Sequence):
    'Generates data for Keras'

    def __init__(
            self,
            data_dir,
            csv_file,
            is_training=True,
            batch_size=8,
            input_shape=(224, 224),
            num_channels=3,
            mapping={
                'negative': 0,
                'positive': 1,
            },
            shuffle=True,
            augmentation=apply_augmentation,
            covid_percent=0.5,
            class_weights=[1., 1.],
            top_percent=0.08
    ):
        'Initialization'
        self.datadir = data_dir
        self.dataset = _process_csv_file(csv_file)
        self.is_training = is_training
        self.batch_size = batch_size
        self.N = len(self.dataset)
        self.input_shape = input_shape
        self.num_channels = num_channels
        self.mapping = mapping
        self.shuffle = shuffle
        self.covid_percent = covid_percent
        self.class_weights = class_weights
        self.n = 0
        self.augmentation = augmentation
        self.top_percent = top_percent

        datasets = {}
        for key in self.mapping.keys():
            datasets[key] = []

        for l in self.dataset:
            if l.split()[-1] == 'sirm':
                datasets[l.split()[3]].append(l)
            else:
                datasets[l.split()[2]].append(l)

        self.datasets = [
            datasets['negative'], datasets['positive']
        ]
        print(len(self.datasets[0]), len(self.datasets[1]))

        self.on_epoch_end()

    def __next__(self):
        # Get one batch of data
        batch_x, batch_y, weights = self.__getitem__(self.n)
        # Batch index
        self.n += 1

        # If we have processed the entire dataset then
        if self.n >= self.__len__():
            self.on_epoch_end()
            self.n = 0

        return batch_x, batch_y, weights

    def __len__(self):
        return int(np.ceil(len(self.datasets[0]) / float(self.batch_size)))

    def on_epoch_end(self):
        'Updates indexes after each epoch'
        if self.shuffle == True:
            for v in self.datasets:
                np.random.shuffle(v)

    def __getitem__(self, idx):
        batch_x, batch_y = np.zeros(
            (self.batch_size, *self.input_shape,
             self.num_channels)), np.zeros(self.batch_size)

        batch_files = self.datasets[0][idx * self.batch_size:(idx + 1) *
                                       self.batch_size]

        # upsample covid cases
        covid_size = max(int(len(batch_files) * self.covid_percent), 1)
        covid_inds = np.random.choice(np.arange(len(batch_files)),
                                      size=covid_size,
                                      replace=False)
        covid_files = np.random.choice(self.datasets[1],
                                       size=covid_size,
                                       replace=False)
        for i in range(covid_size):
            batch_files[covid_inds[i]] = covid_files[i]

        for i in range(len(batch_files)):
            sample = batch_files[i].split()

            # Remove first item from sirm samples for proper indexing as a result of spacing in file name
            if sample[-1] == 'sirm':
                sample.pop(0)

            if self.is_training:
                folder = 'train'
            else:
                folder = 'test'

            x = process_image_file(os.path.join(self.datadir, folder, sample[1]),
                                   self.top_percent,
                                   self.input_shape[0])

            if self.is_training and hasattr(self, 'augmentation'):
                x = self.augmentation(x)

            x = x.astype('float32') / 255.0
            y = self.mapping[sample[2]]

            batch_x[i] = x
            batch_y[i] = y

        class_weights = self.class_weights
        weights = np.take(class_weights, batch_y.astype('int64'))

        return batch_x, keras.utils.to_categorical(batch_y, num_classes=2), weights

In [None]:
def inference():
  # To remove TF Warnings
  tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
  os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

  # example='/content/drive/MyDrive/covidnet/COVID-NET- CXR-2'

  parser = argparse.ArgumentParser(description='COVID-Net Inference')
  parser.add_argument('--weightspath', default='/content/drive/MyDrive/covidnet/COVID-NET- CXR-2', type=str, help='Path to model files, defaults to \'/content/drive/MyDrive/covidnet/COVID-NET- CXR-2\'')
  parser.add_argument('--metaname', default='model.meta', type=str, help='Name of ckpt meta file')
  parser.add_argument('--ckptname', default='model', type=str, help='Name of model ckpts')
  parser.add_argument('--imagepath', default='assets/ex-covid.jpeg', type=str, help='Full path to image to be inferenced')
  parser.add_argument('--in_tensorname', default='input_1:0', type=str, help='Name of input tensor to graph')
  parser.add_argument('--out_tensorname', default='norm_dense_2/Softmax:0', type=str, help='Name of output tensor from graph')
  parser.add_argument('--input_size', default=480, type=int, help='Size of input (ex: if 480x480, --input_size 480)')
  parser.add_argument('--top_percent', default=0.08, type=float, help='Percent top crop from top of image')

  args = parser.parse_args([])

  # For COVID-19 positive/negative detection
  mapping = {'negative': 0, 'positive': 1}
  inv_mapping = {0: 'negative', 1: 'positive'}
  mapping_keys = list(mapping.keys())

  sess = tf.Session()
  tf.get_default_graph()
  saver = tf.train.import_meta_graph(os.path.join(args.weightspath, args.metaname))
  saver.restore(sess, os.path.join(args.weightspath, args.ckptname))

  graph = tf.get_default_graph()

  image_tensor = graph.get_tensor_by_name(args.in_tensorname)
  pred_tensor = graph.get_tensor_by_name(args.out_tensorname)

  x = process_image_file(args.imagepath, args.top_percent, args.input_size)
  x = x.astype('float32') / 255.0
  pred = sess.run(pred_tensor, feed_dict={image_tensor: np.expand_dims(x, axis=0)})

  print('Prediction: {}'.format(inv_mapping[pred.argmax(axis=1)[0]]))
  print('Confidence')
  print(' '.join('{}: {:.3f}'.format(cls.capitalize(), pred[0][i]) for cls, i in mapping.items()))
  print('**DISCLAIMER**')
  print('Do not use this prediction for self-diagnosis. You should check with your local authorities for the latest advice on seeking medical assistance.')

In [None]:

def eval(sess, graph, testfile, testfolder, input_tensor, output_tensor, input_size, mapping):
    image_tensor = graph.get_tensor_by_name(input_tensor)
    pred_tensor = graph.get_tensor_by_name(output_tensor)

    y_test = []
    pred = []
    for i in range(len(testfile)):
        line = testfile[i].split()
        x = process_image_file(os.path.join(testfolder, line[1]), 0.08, input_size)
        x = x.astype('float32') / 255.0
        y_test.append(mapping[line[2]])
        pred.append(np.array(sess.run(pred_tensor, feed_dict={image_tensor: np.expand_dims(x, axis=0)})).argmax(axis=1))
    y_test = np.array(y_test)
    pred = np.array(pred)

    matrix = confusion_matrix(y_test, pred)
    matrix = matrix.astype('float')
    #cm_norm = matrix / matrix.sum(axis=1)[:, np.newaxis]
    print(matrix)
    #class_acc = np.array(cm_norm.diagonal())
    class_acc = [matrix[i,i]/np.sum(matrix[i,:]) if np.sum(matrix[i,:]) else 0 for i in range(len(matrix))]

    print('Sens', ', '.join('{}: {:.3f}'.format(cls.capitalize(), class_acc[i]) for cls, i in mapping.items()))
    ppvs = [matrix[i,i]/np.sum(matrix[:,i]) if np.sum(matrix[:,i]) else 0 for i in range(len(matrix))]
    print('PPV', ', '.join('{}: {:.3f}'.format(cls.capitalize(), ppvs[i]) for cls, i in mapping.items()))

def evaluate():
  # To remove TF Warnings
  tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
  os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

  parser = argparse.ArgumentParser(description='COVID-Net Evaluation')
  parser.add_argument('--weightspath', default='/content/drive/MyDrive/covidnet/COVID-NET- CXR-2', type=str, help='Path to model files, defaults to \'/content/drive/MyDrive/covidnet/COVID-NET- CXR-2\'')
  parser.add_argument('--metaname', default='model.meta', type=str, help='Name of ckpt meta file')
  parser.add_argument('--ckptname', default='model', type=str, help='Name of model ckpts')
  parser.add_argument('--testfile', default='/content/drive/MyDrive/covidnet/labels/test_COVIDx8B.txt', type=str, help='Name of testfile')
  parser.add_argument('--testfolder', default='/content/drive/MyDrive/covidnet/data/test', type=str, help='Folder where test data is located')
  parser.add_argument('--in_tensorname', default='input_1:0', type=str, help='Name of input tensor to graph')
  parser.add_argument('--out_tensorname', default='norm_dense_2/Softmax:0', type=str, help='Name of output tensor from graph')
  parser.add_argument('--input_size', default=480, type=int, help='Size of input (ex: if 480x480, --input_size 480)')

  args = parser.parse_args([])

  sess = tf.Session()
  tf.get_default_graph()
  saver = tf.train.import_meta_graph(os.path.join(args.weightspath, args.metaname))
  saver.restore(sess, os.path.join(args.weightspath, args.ckptname))

  graph = tf.get_default_graph()

  file = open(args.testfile, 'r')
  testfile = file.readlines()

  # For COVID-19 positive/negative detection
  mapping = {
      'negative': 0,
      'positive': 1,
  }

  eval(sess, graph, testfile, args.testfolder, args.in_tensorname, args.out_tensorname, args.input_size, mapping)

In [None]:
def train():
  # To remove TF Warnings
  tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
  os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

  covid_percent = 0.16 # in the provided train_COVIDx8B.txt there are ~13800 negative and ~2160 positive samples

  parser = argparse.ArgumentParser(description='COVID-Net Training Script')
  parser.add_argument('--epochs', default=15, type=int, help='Number of epochs')
  parser.add_argument('--lr', default=0.00005, type=float, help='Learning rate')
  parser.add_argument('--bs', default=32, type=int, help='Batch size')
  parser.add_argument('--weightspath', default='/content/drive/MyDrive/covidnet/COVID-Net CXR-2', type=str,
                      help='Path to model files, defaults to \'/content/drive/MyDrive/covidnet/COVID-Net CXR-2\'')
  parser.add_argument('--metaname', default='model.meta', type=str, help='Name of ckpt meta file')
  parser.add_argument('--ckptname', default='model', type=str, help='Name of model ckpts')
  parser.add_argument('--trainfile', default='/content/drive/MyDrive/covidnet/labels/train_COVIDx8B.txt', type=str, help='Path to train file')
  parser.add_argument('--testfile', default='/content/drive/MyDrive/covidnet/labels/test_COVIDx8B.txt', type=str, help='Path to test file')
  parser.add_argument('--name', default='training_checkpoints', type=str, help='Name of folder to store training checkpoints')
  parser.add_argument('--datadir', default='/content/drive/MyDrive/covidnet/data', type=str, help='Path to data folder')
  parser.add_argument('--covid_weight', default=1., type=float, help='Class weighting for covid')
  parser.add_argument('--covid_percent', default=covid_percent, type=float, help='Percentage of covid samples in batch')
  parser.add_argument('--input_size', default=480, type=int, help='Size of input (ex: if 480x480, --input_size 480)')
  parser.add_argument('--top_percent', default=0.08, type=float, help='Percent top crop from top of image')
  parser.add_argument('--in_tensorname', default='input_1:0', type=str, help='Name of input tensor to graph')
  parser.add_argument('--out_tensorname', default='norm_dense_2/Softmax:0', type=str,
                      help='Name of output tensor from graph')
  parser.add_argument('--logit_tensorname', default='norm_dense_2/MatMul:0', type=str,
                      help='Name of logit tensor for loss')
  parser.add_argument('--label_tensorname', default='norm_dense_1_target:0', type=str,
                      help='Name of label tensor for loss')
  parser.add_argument('--weights_tensorname', default='norm_dense_1_sample_weights:0', type=str,
                      help='Name of sample weights tensor for loss')
  parser.add_argument('--training_tensorname', default='keras_learning_phase:0', type=str,
                      help='Name of training placeholder tensor')


  args = parser.parse_args([])

  # Parameters
  learning_rate = args.lr
  batch_size = args.bs
  display_step = 1

  # output path
  outputPath = '/content/drive/MyDrive/covidnet/output/test/'
  runID = args.name + '-lr' + str(learning_rate)
  runPath = outputPath + runID
  pathlib.Path(runPath).mkdir(parents=True, exist_ok=True)
  print('Output: ' + runPath)

  with open(args.trainfile) as f:
      trainfiles = f.readlines()
  with open(args.testfile) as f:
      testfiles = f.readlines()

  # For COVID-19 positive/negative detection
  mapping = {
      'negative': 0,
      'positive': 1,
  }
  class_weights = [1., args.covid_weight]

  generator = BalanceCovidDataset(data_dir=args.datadir,
                                  csv_file=args.trainfile,
                                  batch_size=batch_size,
                                  input_shape=(args.input_size, args.input_size),
                                  mapping=mapping,
                                  covid_percent=args.covid_percent,
                                  class_weights=class_weights,
                                  top_percent=args.top_percent)
  graph = tf.Graph()
  with tf.Session(graph=graph) as sess:
      saver = tf.train.import_meta_graph(os.path.join(args.weightspath, args.metaname))

      #graph = tf.get_default_graph()

      image_tensor = graph.get_tensor_by_name(args.in_tensorname)
      labels_tensor = graph.get_tensor_by_name(args.label_tensorname)
      sample_weights = graph.get_tensor_by_name(args.weights_tensorname)
      pred_tensor = graph.get_tensor_by_name(args.logit_tensorname)
      is_training = graph.get_tensor_by_name(args.training_tensorname)
      # loss expects unscaled logits since it performs a softmax on logits internally for efficiency

      # Define loss and optimizer
      loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(
          logits=pred_tensor, labels=labels_tensor)*sample_weights)
      optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
      train_op = optimizer.minimize(loss_op)

      # Initialize the variables
      init = tf.global_variables_initializer()

      # Run the initializer
      sess.run(init)

      # load weights
      saver.restore(sess, os.path.join(args.weightspath, args.ckptname))
      #saver.restore(sess, tf.train.latest_checkpoint(args.weightspath))

      # save base model
      saver.save(sess, os.path.join(runPath, 'model'))
      print('Saved baseline checkpoint')
      print('Baseline eval:')
      eval(sess, graph, testfiles, os.path.join(args.datadir,'test'),
          args.in_tensorname, args.out_tensorname, args.input_size, mapping)

      # Training cycle
      print('Training started')
      total_batch = len(generator)
      progbar = tf.keras.utils.Progbar(total_batch)
      for epoch in range(args.epochs):
          for i in range(total_batch):
              # Run optimization
              batch_x, batch_y, weights = next(generator)
              sess.run(train_op, feed_dict={image_tensor: batch_x,
                                            labels_tensor: batch_y,
                                            sample_weights: weights,
                                            is_training: True})
              progbar.update(i+1)

          if epoch % display_step == 0:
              pred = sess.run(pred_tensor, feed_dict={image_tensor:batch_x})
              loss = sess.run(loss_op, feed_dict={pred_tensor: pred,
                                                  labels_tensor: batch_y,
                                                  sample_weights: weights})
              print("Epoch:", '%04d' % (epoch + 1), "Minibatch loss=", "{:.9f}".format(loss))
              eval(sess, graph, testfiles, os.path.join(args.datadir,'test'),
                  args.in_tensorname, args.out_tensorname, args.input_size, mapping)
              saver.save(sess, os.path.join(runPath, 'model'), global_step=epoch+1, write_meta_graph=False)
              print('Saving checkpoint at epoch {}'.format(epoch + 1))


  print("Optimization Finished!")

In [None]:
train()

Output: /content/drive/MyDrive/covidnet/output/test/training_checkpoints-lr5e-05
13794 2158
Saved baseline checkpoint
Baseline eval:
[[194.   6.]
 [  9. 191.]]
Sens Negative: 0.970, Positive: 0.955
PPV Negative: 0.956, Positive: 0.970
Training started
Epoch: 0001 Minibatch loss= 0.003375309
[[178.  22.]
 [  6. 194.]]
Sens Negative: 0.890, Positive: 0.970
PPV Negative: 0.967, Positive: 0.898
Saving checkpoint at epoch 1
Epoch: 0002 Minibatch loss= 0.000081834
[[176.  24.]
 [  5. 195.]]
Sens Negative: 0.880, Positive: 0.975
PPV Negative: 0.972, Positive: 0.890
Saving checkpoint at epoch 2
Epoch: 0003 Minibatch loss= 0.000971529
[[159.  41.]
 [  2. 198.]]
Sens Negative: 0.795, Positive: 0.990
PPV Negative: 0.988, Positive: 0.828
Saving checkpoint at epoch 3
Epoch: 0004 Minibatch loss= 0.000350011
[[168.  32.]
 [ 26. 174.]]
Sens Negative: 0.840, Positive: 0.870
PPV Negative: 0.866, Positive: 0.845
Saving checkpoint at epoch 4
Epoch: 0005 Minibatch loss= 0.170102030
[[ 24. 176.]
 [  0. 200.