In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Dropout, Input, GlobalAveragePooling2D
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import SMOTE

from gtda.homology import CubicalPersistence
from gtda.diagrams import PersistenceImage, PersistenceLandscape

X_img = np.load("/home/sajedhamdan/Desktop/skin_cancer/images_train_256x192.npy")
y = np.load("/home/sajedhamdan/Desktop/skin_cancer/train_labels.npy")
X_img = preprocess_input(X_img.astype(np.float32))
y_cat = to_categorical(y)

sample_size = 2000
X_img, _, y, _ = train_test_split(X_img, y, train_size=sample_size, stratify=y, random_state=42)

# TDA feature extraction
def extract_tda_features(X_rgb):
    X_gray = np.dot(X_rgb[...,:3], [0.2989, 0.5870, 0.1140])

    cp = CubicalPersistence(n_jobs=-1)
    diagrams = cp.fit_transform(X_gray)

    pi = PersistenceImage(sigma=1.0, n_bins=20, weight_function=lambda x: x[1] ** 2)
    pi_feat = pi.fit_transform(diagrams).reshape(len(diagrams), -1)

    pl = PersistenceLandscape(n_layers=5, n_bins=50)
    pl_feat = pl.fit_transform(diagrams).reshape(len(diagrams), -1)

    return np.hstack((pi_feat, pl_feat))

print("Extracting TDA features...")
X_tda_features = extract_tda_features(X_img)
print("TDA features shape:", X_tda_features.shape)

# CNN feature extraction
resnet_base = ResNet50(include_top=False, weights='imagenet', input_shape=(192, 256, 3))
for layer in resnet_base.layers[:-50]:
    layer.trainable = False
for layer in resnet_base.layers[-50:]:
    layer.trainable = True

cnn_output = GlobalAveragePooling2D()(resnet_base.output)
cnn_model = Model(resnet_base.input, cnn_output)

print("Extracting CNN features...")
X_img_features = cnn_model.predict(X_img, batch_size=32, verbose=1)
print("CNN features shape:", X_img_features.shape)

# combining CNN and TDA features
X_combined = np.hstack((X_img_features, X_tda_features))

print("Balancing with SMOTE...")
smote = SMOTE(random_state=42)
y_subset = y[:X_img_features.shape[0]]
X_balanced, y_bal = smote.fit_resample(X_combined, y_subset)

X_img_bal = X_balanced[:, :X_img_features.shape[1]]
X_tda_bal = X_balanced[:, X_img_features.shape[1]:]
y_bal_cat = to_categorical(y_bal)

scaler = StandardScaler()
X_tda_norm = scaler.fit_transform(X_tda_bal)

X_final = np.concatenate([X_img_bal, X_tda_norm], axis=1)
print("Final input shape:", X_final.shape)

# train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X_final, y_bal_cat, test_size=0.2, random_state=42, stratify=y_bal
)

input_layer = Input(shape=(X_final.shape[1],))
x = Dense(512, activation='relu')(input_layer)
x = Dropout(0.5)(x)
x = Dense(256, activation='relu')(x)
x = Dropout(0.3)(x)
output_layer = Dense(7, activation='softmax')(x)
model = Model(inputs=input_layer, outputs=output_layer)

# Callbacks
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping
early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
lr_scheduler = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6, verbose=1)

model.compile(optimizer=Adam(learning_rate=1e-4),
              loss='categorical_crossentropy',
              metrics=['accuracy', tf.keras.metrics.Precision(), tf.keras.metrics.Recall()])

history = model.fit(
    X_train, y_train,
    epochs=100,
    batch_size=64,
    validation_data=(X_test, y_test),
    callbacks=[early_stop, lr_scheduler],
    verbose=1
)

results = model.evaluate(X_test, y_test, verbose=1)
print(f"Test - Accuracy: {results[1]:.4f} | Precision: {results[2]:.4f} | Recall: {results[3]:.4f}")

model.save("tda_resnet_model_v1_lr.keras")


2025-06-08 13:26:03.896318: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-06-08 13:26:04.068280: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1749378364.133675   30786 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1749378364.152590   30786 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1749378364.284177   30786 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking 

Extracting TDA features...
TDA features shape: (2000, 1300)


2025-06-08 13:29:34.244584: E external/local_xla/xla/stream_executor/cuda/cuda_platform.cc:51] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)


Extracting CNN features...
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m99s[0m 2s/step
CNN features shape: (2000, 2048)
Balancing with SMOTE...
Final input shape: (9366, 3348)
Epoch 1/100
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 25ms/step - accuracy: 0.3122 - loss: 1.8456 - precision: 0.4081 - recall: 0.1227 - val_accuracy: 0.5672 - val_loss: 1.1661 - val_precision: 0.7475 - val_recall: 0.3207 - learning_rate: 1.0000e-04
Epoch 2/100
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 23ms/step - accuracy: 0.5036 - loss: 1.3149 - precision: 0.6414 - recall: 0.2840 - val_accuracy: 0.6318 - val_loss: 1.0133 - val_precision: 0.8187 - val_recall: 0.4216 - learning_rate: 1.0000e-04
Epoch 3/100
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 25ms/step - accuracy: 0.5815 - loss: 1.1522 - precision: 0.7186 - recall: 0.3760 - val_accuracy: 0.6601 - val_loss: 0.9111 - val_precision: 0.8224 - val_recall: 0.4968 - learning_rate: 