In [25]:
%config Completer.use_jedi = False
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import cv2
import random
import os
import imageio
import plotly.graph_objects as go
import plotly.express as px
import plotly.figure_factory as ff
from plotly.subplots import make_subplots
from collections import Counter

from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.neighbors import LocalOutlierFactor
from sklearn.metrics import accuracy_score, recall_score, precision_score, classification_report, confusion_matrix
from sklearn.model_selection import RandomizedSearchCV, cross_val_score, RepeatedStratifiedKFold

import tensorflow as tf
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D, GlobalAveragePooling2D, BatchNormalization
from keras.applications import resnet
from keras.applications.resnet import ResNet50
from keras.preprocessing.image import ImageDataGenerator

from tensorflow.keras.optimizers import Adam
import pathlib

In [26]:
#If you are running from colab
if 'google.colab' in str(get_ipython()):
    from google.colab import drive
    drive.mount('/content/drive')
    print('Running on CoLab')
    directory = r'/content/drive/Shareddrives/cancer-detection-model/TEST lung cancer dataset'
else:
    #    If you want to run local
    print('Not running on CoLab')
    directory = r'../../data/cancer-detection-model/TEST lung cancer dataset'

categories = ['Benign cases', 'Malignant cases', 'Normal cases']

Not running on CoLab


In [27]:
# Set the random seed for reproducibility
seed = 42
tf.random.set_seed(seed)
np.random.seed(seed)
random.seed(seed)

In [28]:
def get_test_generator(directory, image_height, image_width, batch_size):
    '''Build generators for train and validatio
        :directory: Location of the data
        :image_height: image height
        :image_width: image width
        :batch_size: The batch size
    '''

    datagen = ImageDataGenerator(dtype='float32',
        rescale=1.0 / 255.0)

    test_generator = datagen.flow_from_directory(
        directory,
        target_size=(image_height, image_width, 1),
        batch_size=batch_size,
        color_mode="grayscale",
        class_mode='categorical',  # 'categorical' for multi-class classification
        shuffle=False,
        classes={'Benign cases': 0, 
                 'Malignant cases': 1, 
                 'Normal cases': 2}
    )

    return test_generator

def process_normalize(directory, categories, image_size):
    '''Process and normalize images
        :directory: Root directory of images
        :categories: Categories of diagnosis
        :image_size: Target image size to resize
    '''
    data = []
    X, y = [], []

    for category_index, category in enumerate(categories):
        path = os.path.join(directory, category)

        # Iterate through images and capture image data, resize it and capture category index
        for file in os.listdir(path):
            file_extension = pathlib.Path(file).suffix
            if any(ext in file_extension for ext in ['jpg', 'jpeg', 'png']):
                filepath = os.path.join(path, file)
                img = cv2.imread(filepath, 0)
                # resize the image
                img = cv2.resize(img, (image_size, image_size))
                data.append([img, category_index])
            else:
                print('Skipping {}'.format(file))

    random.shuffle(data)

    # Split out the image and category data
    image_data, category_data = map(list, zip(*data))
    # Convert the category index into a vector ala [0, 1, 0] because we do
    # not want to imply in the model that index 0 is not as good as 1 and 2 is better than 1 etc.
    y = keras.utils.to_categorical(category_data)

    X = np.array(image_data).reshape(-1, image_size, image_size, 1)

    print('X counts:', X.shape)
    print('y counts:', y.shape)

    # Normalize image values to be between the values of 0 and 1
    X = X / 255.0
    y = np.array(y)

    return X, y

In [29]:
X, y = process_normalize(directory, categories, 256)

# Try the luna dataset
directory = r'../../data/cancer-detection-model/luna16-jpg for testing'
categories = ['Benign', 'Malignant', 'Normal']
X_luna, y_luna = process_normalize(directory, categories, 256)

Skipping .gitignore
X counts: (315, 256, 256, 1)
y counts: (315, 3)
X counts: (4738, 256, 256, 1)
y counts: (4738, 3)


In [30]:
models = {'lung_cancer_detection_One.data_augmentation-model4':
          {'image_height': 256, 'image_width': 256, 'batch_size': 8}}


for model, value in models.items():
    # To load the model architecture and weights in a new session:
    loaded_model = tf.keras.models.load_model(model)

    # Compile the loaded model
    loaded_model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])

    results = loaded_model.evaluate(X, y, batch_size=value['batch_size'])
    print(f"{model} for standard Test Loss: {results[0]}, Test Accuracy: {results[1]}")

        # To load the model architecture and weights in a new session:
    loaded_model = tf.keras.models.load_model(model)

    # Compile the loaded model
    loaded_model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])

    results = loaded_model.evaluate(X_luna, y_luna, batch_size=value['batch_size'])
    print(f"{model} for luna16 Test Loss: {results[0]}, Test Accuracy: {results[1]}")



lung_cancer_detection_One.data_augmentation-model4 for standard Test Loss: 0.16715845465660095, Test Accuracy: 0.9492063522338867
lung_cancer_detection_One.data_augmentation-model4 for luna16 Test Loss: 6.179551601409912, Test Accuracy: 0.2477838695049286
