In [None]:
!pip install tensorflow

In [None]:
from google.colab import drive
import numpy as np
import pickle
import tensorflow as tf
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import Input, Dense, Dropout, Concatenate, GlobalAveragePooling2D, Resizing
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from sklearn.metrics import classification_report, roc_auc_score, precision_score, recall_score, f1_score
from tensorflow.keras.metrics import Accuracy, AUC, Precision, Recall

# Mount Google Drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# Load pickled dataset
with open("/content/drive/My Drive/Graduation Project/Graduation Project 2/df_all_balanced_package.pkl", "rb") as f:
    df_all = pickle.load(f)

X_train_cnn = df_all["X_train_cnn"]
X_train_conn = df_all["X_train_conn"]
y_train = df_all["y_train"]
X_val_cnn = df_all["X_val_cnn"]
X_val_conn = df_all["X_val_conn"]
y_val = df_all["y_val"]
X_test_cnn = df_all["X_test_cnn"]
X_test_conn = df_all["X_test_conn"]
y_test = df_all["y_test"]

In [None]:
cnn_input_shape = (256, 256, 3)
conn_input_shape = (3,)
num_classes = 13

In [None]:
# CNN Input
cnn_input = Input(shape=cnn_input_shape, name='cnn_input')

# Resize the input images to 224x224 (expected input size for ResNet50)
resized_input = Resizing(224, 224)(cnn_input)

# ResNet50 architecture
base_model = ResNet50(include_top=False, weights='imagenet', input_tensor=resized_input)
base_output = base_model.output

x = GlobalAveragePooling2D()(base_output)
x = Dropout(0.3)(x)

# Connected input
conn_input = Input(shape=conn_input_shape, name='conn_input')
y = Dense(64, activation='relu')(conn_input)
y = Dropout(0.3)(y)

# Combine
combined = Concatenate()([x, y])
z = Dense(64, activation='relu')(combined)
z = Dropout(0.3)(z)
output = Dense(num_classes, activation='sigmoid')(z)

# Model
model = Model(inputs=[cnn_input, conn_input], outputs=output)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m94765736/94765736[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [None]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
    loss=tf.keras.losses.BinaryCrossentropy(label_smoothing=0.05),
    metrics=[
        AUC(name='auc'),
        Precision(name='precision'),
        Recall(name='recall')
    ]
)

model.summary()

In [None]:
# Callbacks
callbacks = [
    EarlyStopping(monitor='val_auc', patience=5, restore_best_weights=True, mode='max'),
    ReduceLROnPlateau(monitor='val_loss', factor=0.3, patience=3, min_lr=1e-6, verbose=1, mode='min'),
    ModelCheckpoint("/content/drive/My Drive/Graduation Project/Graduation Project 2/Models/SavedModels2/ResNet50.keras",
                    monitor='val_auc', save_best_only=True, mode='max', verbose=1)
]

In [None]:
# Train the model
history = model.fit(
    [X_train_cnn, X_train_conn],
    y_train,
    validation_data=([X_val_cnn, X_val_conn], y_val),
    epochs=30,
    batch_size=32,
    callbacks=callbacks,
    verbose=1
)

Epoch 1/30
[1m1479/1479[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - accuracy: 0.0000e+00 - auc: 0.6814 - loss: 0.4443 - precision: 0.3202 - recall: 0.1794
Epoch 1: val_auc improved from -inf to 0.66381, saving model to /content/drive/My Drive/Graduation Project/Graduation Project 2/Models/SavedModels2/ResNet50.keras
[1m1479/1479[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3787s[0m 3s/step - accuracy: 0.0000e+00 - auc: 0.6815 - loss: 0.4443 - precision: 0.3203 - recall: 0.1794 - val_accuracy: 0.0000e+00 - val_auc: 0.6638 - val_loss: 0.3915 - val_precision: 0.3296 - val_recall: 0.1498 - learning_rate: 1.0000e-04
Epoch 2/30
[1m1479/1479[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - accuracy: 0.0000e+00 - auc: 0.8626 - loss: 0.3334 - precision: 0.7025 - recall: 0.3410
Epoch 2: val_auc did not improve from 0.66381
[1m1479/1479[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3720s[0m 3s/step - accuracy: 0.0000e+00 - auc: 0.8626 - loss: 0.3334 - p

In [None]:
# Save the model
model.save("/content/drive/My Drive/Graduation Project/Graduation Project 2/Models/SavedModels2/best_ResNet50.h5")
model.save("/content/drive/My Drive/Graduation Project/Graduation Project 2/Models/SavedModels2/best_ResNet50.keras")

In [None]:
# Evaluate the model on test data
y_pred = model.predict([X_test_cnn, X_test_conn])
y_pred_bin = (y_pred > 0.5).astype(int)

# Print evaluation metrics
print("AUC:", roc_auc_score(y_test, y_pred, average='macro'))
print("Precision:", precision_score(y_test, y_pred_bin, average='macro'))
print("Recall:", recall_score(y_test, y_pred_bin, average='macro'))
print("F1 Score:", f1_score(y_test, y_pred_bin, average='macro'))

In [None]:
# Classification report
class_names = [
    "Cardiomegaly", "Emphysema", "Effusion", "Infiltration",
    "Mass", "Nodule", "Atelectasis", "Pneumothorax", "Pleural_Thickening",
    "Pneumonia", "Fibrosis", "Edema", "Consolidation"
]

print("\nClassification Report:\n")
print(classification_report(y_test, y_pred_bin, target_names=class_names))

In [None]:
def find_best_thresholds(y_true, y_probs):
    thresholds = []
    for i in range(y_true.shape[1]):
        best_thresh = 0.5
        best_f1 = 0
        for thresh in np.arange(0.1, 0.9, 0.01):
            preds = (y_probs[:, i] > thresh).astype(int)
            f1 = f1_score(y_true[:, i], preds, zero_division=0)
            if f1 > best_f1:
                best_f1 = f1
                best_thresh = thresh
        thresholds.append(best_thresh)
    return np.array(thresholds)

# Compute best thresholds
best_thresholds = find_best_thresholds(y_val, val_preds)
print("Best thresholds per class:\n", best_thresholds)

In [None]:
# Make predictions using best thresholds
test_probs = model.predict([X_test_cnn, X_test_conn])
test_preds = np.zeros_like(test_probs)

for i in range(test_probs.shape[1]):
    test_preds[:, i] = (test_probs[:, i] > best_thresholds[i]).astype(int)

In [None]:
# Final evaluation
print("AUC:", roc_auc_score(y_test, test_probs, average='macro'))
print("Precision:", precision_score(y_test, test_preds, average='macro'))
print("Recall:", recall_score(y_test, test_preds, average='macro'))
print("F1 Score:", f1_score(y_test, test_preds, average='macro'))

In [None]:
print("\nPer-Class Classification Report:\n")
print(classification_report(y_test, test_preds, target_names=class_names))