# 1. Import preprocessed data

In [1]:
# Install any necessary environment if code presents errors such as downgrading:
#!pip install pandas==1.5.1


In [2]:
# Standard imports
import pandas as pd
import numpy as np
import pickle
from tensorflow.keras import layers
from tensorflow.keras.models import Model

In [3]:
# Connect to my Google Drive to get to Pickle File
from google.colab import drive
drive.mount("/content/drive")


Mounted at /content/drive


In [4]:
# Load pickle data and ensure correctly aligned
with open("/content/drive/My Drive/Data_CNN_Ready.pkl", "rb") as file:
    data = pickle.load(file)

In [5]:
# Seperate pickled data into stucture ready for modeling
# Note: I am removing the "scaled" nomenclature used in the previously file--as I often forget that.
X_cat_train = np.array(data[0])
X_cat_test = np.array(data[1])
X_img_train = np.array(data[2])
X_img_test = np.array(data[3])
Y_train = np.array(data[4])
Y_test = np.array(data[5])

In [6]:
# Verify these should now be good to send into models.
print(X_cat_train.shape)
print(X_img_train.shape)
print(Y_train.shape)
print(X_cat_test.shape)
print(X_img_test.shape)
print(Y_test.shape)

(2507, 3)
(2507, 256, 256, 1)
(2507, 15)
(836, 3)
(836, 256, 256, 1)
(836, 15)


In [7]:
# Define Model (MLP) for categorical/numerical data portion
cat_input = layers.Input(shape=(3, ), name='cat_input')

cat_h1 = layers.Dense(30, activation="relu")(cat_input)
cat_h2 = layers.Dense(15, activation="relu")(cat_h1)

# we won't use an standard output layer yet as we will combine this with the image data
cat_model = Model(inputs=cat_input, outputs=cat_h2)

In [8]:
# Define Model (CNN) for image data portion
## CNN Model is based on code by pyimagesearch.com
img_input = layers.Input(shape=(256, 256, 1), name="img_input")

img_conv1 = layers.Conv2D(16, (3, 3), activation="relu")(img_input)
img_bn1 = layers.BatchNormalization()(img_conv1)
img_mp1 = layers.MaxPooling2D((2, 2))(img_bn1)
img_conv2 = layers.Conv2D(32, (3, 3), activation="relu")(img_mp1)
img_bn2 = layers.BatchNormalization()(img_conv2)
img_mp2 = layers.MaxPooling2D((2, 2))(img_bn2)
img_conv3 = layers.Conv2D(64, (3, 3), activation="relu")(img_mp2)
img_bn3 = layers.BatchNormalization()(img_conv3)
img_mp3 = layers.MaxPooling2D((2, 2))(img_bn3)
img_flat = layers.Flatten()(img_mp3)
img_dense = layers.Dense(16, activation="relu")(img_flat)
img_bn4 = layers.BatchNormalization()(img_dense)
img_drop = layers.Dropout(0.5)(img_bn4)
# final dense layer matches output of MLP model
img_match = layers.Dense(15, activation="relu")(img_drop)

img_model = Model(inputs=img_input, outputs=img_match)

In [9]:
# Now combine these two models for a final output
combined_input = layers.concatenate([cat_model.output, img_model.output])
combined_h1 = layers.Dense(15, activation="relu")(combined_input)
# using sigmoid to predictd each of 14 possible outcomes which are not mutually exclusive (and 1 technically is)
final_out = layers.Dense(15, activation="sigmoid")(combined_h1)

mixed_model = Model(inputs=[cat_model.input, img_model.input], outputs=final_out)

In [10]:
# Now fit and train the model
mixed_model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])
mixed_model.fit(x=[X_cat_train, X_img_train], y=Y_train, epochs=50, validation_split=0.2)

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

<keras.src.callbacks.History at 0x7a343aea7bb0>

In [11]:
# Evaluate teh model
eval_loss, eval_accuracy = mixed_model.evaluate([X_cat_test, X_img_test], Y_test)
print(f"Loss: {eval_loss}, Accuracy: {eval_accuracy}")

Loss: 0.5657050013542175, Accuracy: 0.10765550285577774


In [12]:
# Finally save the pre-trained model for use elsewhere
mixed_model.save("/content/drive/My Drive/xrays-full.keras")