# Import Packages

In [2]:
import numpy as np
import os
from sklearn.metrics import confusion_matrix
import seaborn as sn; sn.set(font_scale=1.4)
from sklearn.utils import shuffle           
import matplotlib.pyplot as plt             
import cv2                                 
import tensorflow as tf                
from tqdm import tqdm

In [3]:
class_names = ['Cyst','Normal','Stone','Tumor']
class_names_label = {class_name:i for i, class_name in enumerate(class_names)}

nb_classes = len(class_names)

IMAGE_SIZE = (150, 150)

# Loading the Data
We have to write a load_data function that load the images and the labels from the folder.

In [4]:
def load_data():
    datasets = [r"D:\Projects\HealthPulse  Data Driven strategies for Healthcare Optimization\chronic kidney disease\training", r"D:\Projects\HealthPulse  Data Driven strategies for Healthcare Optimization\chronic kidney disease\testing"]
    
    output = []
    
    # Iterate through training and test sets
    for dataset in datasets:
        
        images = []
        labels = []
        
        print("Loading {}".format(dataset))
        
        # Iterate through each folder corresponding to a category
        for folder in os.listdir(dataset):
            label = class_names_label[folder]
            
            # Iterate through each image in our folder
            for file in tqdm(os.listdir(os.path.join(dataset, folder))):
                
                # Get the path name of the image
                img_path = os.path.join(os.path.join(dataset, folder), file)
                
                # Open and resize the img
                image = cv2.imread(img_path)
                image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
                image = cv2.resize(image, IMAGE_SIZE) 
                
                # Append the image and its corresponding label to the output
                images.append(image)
                labels.append(label)
                
        images = np.array(images, dtype = 'float32')
        labels = np.array(labels, dtype = 'int32')   
        
        output.append((images, labels))

    return output

In [5]:
(train_images, train_labels), (test_images, test_labels) = load_data()

Loading D:\Projects\HealthPulse  Data Driven strategies for Healthcare Optimization\chronic kidney disease\training


100%|██████████████████████████████████████████████████████████████████████████████| 2968/2968 [01:25<00:00, 34.63it/s]
100%|██████████████████████████████████████████████████████████████████████████████| 4062/4062 [01:46<00:00, 38.30it/s]
100%|██████████████████████████████████████████████████████████████████████████████| 1113/1113 [00:31<00:00, 35.59it/s]
100%|██████████████████████████████████████████████████████████████████████████████| 1833/1833 [00:51<00:00, 35.59it/s]


Loading D:\Projects\HealthPulse  Data Driven strategies for Healthcare Optimization\chronic kidney disease\testing


100%|████████████████████████████████████████████████████████████████████████████████| 741/741 [00:13<00:00, 56.01it/s]
100%|██████████████████████████████████████████████████████████████████████████████| 1015/1015 [00:22<00:00, 45.59it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 264/264 [00:09<00:00, 26.62it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 450/450 [00:15<00:00, 29.88it/s]


In [6]:
train_images, train_labels = shuffle(train_images, train_labels, random_state=25)

In [7]:
n_train = train_labels.shape[0]
n_test = test_labels.shape[0]

print ("Number of training examples: {}".format(n_train))
print ("Number of testing examples: {}".format(n_test))
print ("Each image is of size: {}".format(IMAGE_SIZE))

Number of training examples: 9976
Number of testing examples: 2470
Each image is of size: (150, 150)


In [8]:
train_images = train_images / 255.0 
test_images = test_images / 255.0

In [9]:
model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(32, (3, 3), activation = 'relu', input_shape = (150, 150, 3)), 
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Conv2D(32, (3, 3), activation = 'relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation=tf.nn.relu),
    tf.keras.layers.Dense(4, activation=tf.nn.softmax)
])

In [10]:
model.compile(optimizer = 'adam', loss = 'sparse_categorical_crossentropy', metrics=['accuracy'])

In [11]:
history = model.fit(train_images, train_labels, batch_size=256, epochs=2, validation_split = 0.2)

Epoch 1/2
Epoch 2/2


In [19]:
predictions = model.predict(test_images)     # Vector of probabilities
pred_labels = np.argmax(predictions, axis = 1) # We take the highest probability
pred_labels



array([0, 0, 2, ..., 2, 2, 2], dtype=int64)

In [36]:
import joblib

In [37]:
model.save_weights("model_kidney_scan.h5")

In [38]:
model_json=model.to_json()
with open("kidney_scan.json", "w") as json_file:
    json_file.write(model_json)