In [3]:
import numpy as np
import os
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
from sklearn.model_selection import train_test_split
import cv2

# Define paths to dataset
base_path = "/kaggle/input/hsi-skincancer-main"  # Adjust based on Kaggle dataset path
train_path = os.path.join(base_path, "train")

# Parameters
IMG_SIZE = 128  # Reduced from 256 to 128 to save memory
BATCH_SIZE = 8  # Reduced to handle 3D data and fit GPU memory
NUM_CLASSES = 3  # Only class_3, class_4, class_5
EPOCHS = 30  # Increased to 30 for more training
CLASS_MAPPING = {3: 0, 4: 1, 5: 2}  # Map class_3 -> 0, class_4 -> 1, class_5 -> 2
TEST_SIZE = 0.3  # 30% for test, 70% for train

# Function to load and preprocess .npy files for 3D CNN
def load_and_preprocess_npy(file_path, img_size=IMG_SIZE):
    # Load hyperspectral image (shape: 31, 256, 256)
    img = np.load(file_path)
    
    # Transpose to (256, 256, 31) and resize spatial dimensions
    img = np.transpose(img, (1, 2, 0))  # Shape: (256, 256, 31)
    img = cv2.resize(img, (img_size, img_size))  # Shape: (128, 128, 31)
    
    # Transpose to (31, 128, 128) for depth as first dimension, then add channel dimension
    img = np.transpose(img, (2, 0, 1))  # Shape: (31, 128, 128)
    img = np.expand_dims(img, axis=-1)  # Shape: (31, 128, 128, 1)
    
    # Normalize to [0, 1]
    img = img / np.max(img)
    return img

# Function to load all file paths and labels
def load_data_paths(data_path, model_type):
    model_path = os.path.join(data_path, model_type)
    classes = [f"class_{i}" for i in [3, 4, 5]]  # Only class_3, class_4, class_5
    
    file_paths = []
    labels = []
    
    for class_name in classes:
        class_idx = CLASS_MAPPING[int(class_name.split('_')[1])]
        class_path = os.path.join(model_path, class_name)
        if not os.path.exists(class_path):
            print(f"Warning: {class_path} does not exist")
            continue
            
        npy_files = [f for f in os.listdir(class_path) if f.endswith(".npy")]
        for npy_file in npy_files:
            file_path = os.path.join(class_path, npy_file)
            file_paths.append(file_path)
            labels.append(class_idx)
    
    return file_paths, labels

# Data generator to load data in batches
def data_generator(file_paths, labels, img_size=IMG_SIZE, batch_size=BATCH_SIZE):
    while True:
        indices = np.arange(len(file_paths))
        np.random.shuffle(indices)  # Shuffle for randomness
        
        for start_idx in range(0, len(file_paths), batch_size):
            batch_indices = indices[start_idx:start_idx + batch_size]
            X_batch = []
            y_batch = []
            
            for idx in batch_indices:
                img = load_and_preprocess_npy(file_paths[idx], img_size)
                X_batch.append(img)
                y_batch.append(labels[idx])
            
            if X_batch:  # Ensure batch is not empty
                # Transpose to (batch_size, 128, 128, 31, 1) for 3D CNN
                X_batch = np.transpose(np.array(X_batch), (0, 2, 3, 1, 4))  # Shape: (batch_size, 128, 128, 31, 1)
                yield (X_batch,  # Shape: (batch_size, 128, 128, 31, 1)
                       tf.keras.utils.to_categorical(y_batch, NUM_CLASSES))

# Build an optimized 3D CNN model
def build_3d_cnn(input_shape=(IMG_SIZE, IMG_SIZE, 31, 1), num_classes=NUM_CLASSES):
    model = models.Sequential([
        layers.Input(shape=input_shape),  # Explicitly define input shape (height, width, depth, channels)
        layers.Conv3D(32, (3, 3, 3), activation='relu', padding='same'),
        layers.MaxPooling3D((2, 2, 1)),  # Reduced pooling in depth to preserve bands
        layers.Conv3D(64, (3, 3, 3), activation='relu', padding='same'),
        layers.MaxPooling3D((2, 2, 1)),
        layers.Conv3D(128, (3, 3, 3), activation='relu', padding='same'),
        layers.MaxPooling3D((2, 2, 1)),
        layers.Flatten(),
        layers.Dense(512, activation='relu'),
        layers.Dropout(0.5),
        layers.Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001), 
                  loss='categorical_crossentropy', 
                  metrics=['accuracy'])
    return model

# Function to evaluate the model and compute metrics
def evaluate_model(model, test_file_paths, test_labels):
    test_gen = data_generator(test_file_paths, test_labels, batch_size=1)
    
    y_true = []
    y_pred = []
    
    for _ in range(len(test_file_paths)):
        X, y = next(test_gen)
        pred = model.predict(X, verbose=0)
        y_true.append(np.argmax(y, axis=1)[0])
        y_pred.append(np.argmax(pred, axis=1)[0])
    
    # Compute metrics
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, average='macro')
    recall = recall_score(y_true, y_pred, average='macro')
    f1 = f1_score(y_true, y_pred, average='macro')
    
    # Generate classification report
    report = classification_report(y_true, y_pred, labels=[0, 1, 2], target_names=[f'class_{i+3}' for i in range(3)], digits=4)
    
    return accuracy, precision, recall, f1, report

# Main execution for awan dataset
model_type = "awan"

print(f"\nTraining on {model_type} dataset...")

# Load all file paths and labels
file_paths, labels = load_data_paths(train_path, model_type)

if not file_paths:
    print(f"No data found for {model_type}. Skipping...")
else:
    # Split into 70% train and 30% test, stratified to maintain class balance
    train_paths, test_paths, train_labels, test_labels = train_test_split(
        file_paths, labels, test_size=TEST_SIZE, stratify=labels, random_state=42
    )
    
    # Train the model
    train_gen = data_generator(train_paths, train_labels, batch_size=BATCH_SIZE)
    steps_per_epoch = len(train_paths) // BATCH_SIZE
    cnn_model = build_3d_cnn()
    cnn_model.fit(train_gen, steps_per_epoch=steps_per_epoch, epochs=EPOCHS, verbose=1)
    
    # Evaluate on test set
    print(f"Evaluating on {model_type} test split...")
    accuracy, precision, recall, f1, report = evaluate_model(cnn_model, test_paths, test_labels)
    
    # Print epochs and evaluation metrics
    print(f"Epochs: {EPOCHS}")
    print("\nEvaluation Metrics:")
    print(f"Accuracy      : {accuracy:.4f}")
    print(f"Precision (macro): {precision:.4f}")
    print(f"Recall (macro)   : {recall:.4f}")
    print(f"F1 Score (macro) : {f1:.4f}")
    print("\nFull Classification Report:")
    print(report)


Training on awan dataset...
Epoch 1/30


I0000 00:00:1745930084.794529      89 service.cc:148] XLA service 0x7cd148007860 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1745930084.795566      89 service.cc:156]   StreamExecutor device (0): Tesla P100-PCIE-16GB, Compute Capability 6.0
I0000 00:00:1745930085.099109      89 cuda_dnn.cc:529] Loaded cuDNN version 90300
2025-04-29 12:34:51.324329: E external/local_xla/xla/service/slow_operation_alarm.cc:65] Trying algorithm eng0{} for conv (f32[64,32,3,3,3]{4,3,2,1,0}, u8[0]{0}) custom-call(f32[8,32,64,64,31]{4,3,2,1,0}, f32[8,64,64,64,31]{4,3,2,1,0}), window={size=3x3x3 pad=1_1x1_1x1_1}, dim_labels=bf012_oi012->bf012, custom_call_target="__cudnn$convBackwardFilter", backend_config={"cudnn_conv_backend_config":{"activation_mode":"kNone","conv_result_scale":1,"leakyrelu_alpha":0,"side_input_scale":0},"force_earliest_schedule":false,"operation_queue_id":"0","wait_on_operation_queues":[]} is taking a while...
2025-04-29 12:34:53.528

[1m121/121[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 510ms/step - accuracy: 0.4289 - loss: 1.8261
Epoch 2/30
[1m121/121[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 228ms/step - accuracy: 0.6250 - loss: 0.8537
Epoch 3/30
[1m121/121[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 227ms/step - accuracy: 0.6927 - loss: 0.7302
Epoch 4/30
[1m121/121[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 227ms/step - accuracy: 0.7212 - loss: 0.6663
Epoch 5/30
[1m121/121[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 227ms/step - accuracy: 0.7654 - loss: 0.5630
Epoch 6/30
[1m121/121[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 227ms/step - accuracy: 0.8357 - loss: 0.4369
Epoch 7/30
[1m121/121[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 227ms/step - accuracy: 0.8484 - loss: 0.3859
Epoch 8/30
[1m121/121[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 227ms/step - accuracy: 0.8767 - loss: 0.3514
Epoch 9/30
[1m121/121[0m 

In [1]:
import numpy as np
import os
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
from sklearn.model_selection import train_test_split
import cv2

# Define paths to dataset
base_path = "/kaggle/input/hsi-skincancer-main"  # Adjust based on Kaggle dataset path
train_path = os.path.join(base_path, "train")

# Parameters
IMG_SIZE = 128  # Reduced from 256 to 128 to save memory
BATCH_SIZE = 8  # Reduced to handle 3D data and fit GPU memory
NUM_CLASSES = 3  # Only class_3, class_4, class_5
EPOCHS = 50  # Increased to 30 for more training
CLASS_MAPPING = {3: 0, 4: 1, 5: 2}  # Map class_3 -> 0, class_4 -> 1, class_5 -> 2
TEST_SIZE = 0.3  # 30% for test, 70% for train

# Function to load and preprocess .npy files for 3D CNN
def load_and_preprocess_npy(file_path, img_size=IMG_SIZE):
    # Load hyperspectral image (shape: 31, 256, 256)
    img = np.load(file_path)
    
    # Transpose to (256, 256, 31) and resize spatial dimensions
    img = np.transpose(img, (1, 2, 0))  # Shape: (256, 256, 31)
    img = cv2.resize(img, (img_size, img_size))  # Shape: (128, 128, 31)
    
    # Transpose to (31, 128, 128) for depth as first dimension, then add channel dimension
    img = np.transpose(img, (2, 0, 1))  # Shape: (31, 128, 128)
    img = np.expand_dims(img, axis=-1)  # Shape: (31, 128, 128, 1)
    
    # Normalize to [0, 1]
    img = img / np.max(img)
    return img

# Function to load all file paths and labels
def load_data_paths(data_path, model_type):
    model_path = os.path.join(data_path, model_type)
    classes = [f"class_{i}" for i in [3, 4, 5]]  # Only class_3, class_4, class_5
    
    file_paths = []
    labels = []
    
    for class_name in classes:
        class_idx = CLASS_MAPPING[int(class_name.split('_')[1])]
        class_path = os.path.join(model_path, class_name)
        if not os.path.exists(class_path):
            print(f"Warning: {class_path} does not exist")
            continue
            
        npy_files = [f for f in os.listdir(class_path) if f.endswith(".npy")]
        for npy_file in npy_files:
            file_path = os.path.join(class_path, npy_file)
            file_paths.append(file_path)
            labels.append(class_idx)
    
    return file_paths, labels

# Data generator to load data in batches
def data_generator(file_paths, labels, img_size=IMG_SIZE, batch_size=BATCH_SIZE):
    while True:
        indices = np.arange(len(file_paths))
        np.random.shuffle(indices)  # Shuffle for randomness
        
        for start_idx in range(0, len(file_paths), batch_size):
            batch_indices = indices[start_idx:start_idx + batch_size]
            X_batch = []
            y_batch = []
            
            for idx in batch_indices:
                img = load_and_preprocess_npy(file_paths[idx], img_size)
                X_batch.append(img)
                y_batch.append(labels[idx])
            
            if X_batch:  # Ensure batch is not empty
                # Transpose to (batch_size, 128, 128, 31, 1) for 3D CNN
                X_batch = np.transpose(np.array(X_batch), (0, 2, 3, 1, 4))  # Shape: (batch_size, 128, 128, 31, 1)
                yield (X_batch,  # Shape: (batch_size, 128, 128, 31, 1)
                       tf.keras.utils.to_categorical(y_batch, NUM_CLASSES))

# Build an optimized 3D CNN model
def build_3d_cnn(input_shape=(IMG_SIZE, IMG_SIZE, 31, 1), num_classes=NUM_CLASSES):
    model = models.Sequential([
        layers.Input(shape=input_shape),  # Explicitly define input shape (height, width, depth, channels)
        layers.Conv3D(32, (3, 3, 3), activation='relu', padding='same'),
        layers.MaxPooling3D((2, 2, 1)),  # Reduced pooling in depth to preserve bands
        layers.Conv3D(64, (3, 3, 3), activation='relu', padding='same'),
        layers.MaxPooling3D((2, 2, 1)),
        layers.Conv3D(128, (3, 3, 3), activation='relu', padding='same'),
        layers.MaxPooling3D((2, 2, 1)),
        layers.Flatten(),
        layers.Dense(512, activation='relu'),
        layers.Dropout(0.5),
        layers.Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001), 
                  loss='categorical_crossentropy', 
                  metrics=['accuracy'])
    return model

# Function to evaluate the model and compute metrics
def evaluate_model(model, test_file_paths, test_labels):
    test_gen = data_generator(test_file_paths, test_labels, batch_size=1)
    
    y_true = []
    y_pred = []
    
    for _ in range(len(test_file_paths)):
        X, y = next(test_gen)
        pred = model.predict(X, verbose=0)
        y_true.append(np.argmax(y, axis=1)[0])
        y_pred.append(np.argmax(pred, axis=1)[0])
    
    # Compute metrics
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, average='macro')
    recall = recall_score(y_true, y_pred, average='macro')
    f1 = f1_score(y_true, y_pred, average='macro')
    
    # Generate classification report
    report = classification_report(y_true, y_pred, labels=[0, 1, 2], target_names=[f'class_{i+3}' for i in range(3)], digits=4)
    
    return accuracy, precision, recall, f1, report

# Main execution for hrnet dataset
model_type = "hrnet"

print(f"\nTraining on {model_type} dataset...")

# Load all file paths and labels
file_paths, labels = load_data_paths(train_path, model_type)

if not file_paths:
    print(f"No data found for {model_type}. Skipping...")
else:
    # Split into 70% train and 30% test, stratified to maintain class balance
    train_paths, test_paths, train_labels, test_labels = train_test_split(
        file_paths, labels, test_size=TEST_SIZE, stratify=labels, random_state=42
    )
    
    # Train the model
    train_gen = data_generator(train_paths, train_labels, batch_size=BATCH_SIZE)
    steps_per_epoch = len(train_paths) // BATCH_SIZE
    cnn_model = build_3d_cnn()
    cnn_model.fit(train_gen, steps_per_epoch=steps_per_epoch, epochs=EPOCHS, verbose=1)
    
    # Evaluate on test set
    print(f"Evaluating on {model_type} test split...")
    accuracy, precision, recall, f1, report = evaluate_model(cnn_model, test_paths, test_labels)
    
    # Print epochs and evaluation metrics
    print(f"Epochs: {EPOCHS}")
    print("\nEvaluation Metrics:")
    print(f"Accuracy      : {accuracy:.4f}")
    print(f"Precision (macro): {precision:.4f}")
    print(f"Recall (macro)   : {recall:.4f}")
    print(f"F1 Score (macro) : {f1:.4f}")
    print("\nFull Classification Report:")
    print(report)

2025-04-29 19:23:38.914673: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1745954619.151174      31 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1745954619.210869      31 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered



Training on hrnet dataset...


I0000 00:00:1745954632.817575      31 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 15513 MB memory:  -> device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:04.0, compute capability: 6.0


Epoch 1/50


I0000 00:00:1745954639.828626      91 service.cc:148] XLA service 0x7a2b98005eb0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1745954639.829488      91 service.cc:156]   StreamExecutor device (0): Tesla P100-PCIE-16GB, Compute Capability 6.0
I0000 00:00:1745954640.135136      91 cuda_dnn.cc:529] Loaded cuDNN version 90300
2025-04-29 19:24:06.437148: E external/local_xla/xla/service/slow_operation_alarm.cc:65] Trying algorithm eng0{} for conv (f32[64,32,3,3,3]{4,3,2,1,0}, u8[0]{0}) custom-call(f32[8,32,64,64,31]{4,3,2,1,0}, f32[8,64,64,64,31]{4,3,2,1,0}), window={size=3x3x3 pad=1_1x1_1x1_1}, dim_labels=bf012_oi012->bf012, custom_call_target="__cudnn$convBackwardFilter", backend_config={"cudnn_conv_backend_config":{"activation_mode":"kNone","conv_result_scale":1,"leakyrelu_alpha":0,"side_input_scale":0},"force_earliest_schedule":false,"operation_queue_id":"0","wait_on_operation_queues":[]} is taking a while...
2025-04-29 19:24:08.640

[1m121/121[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m75s[0m 471ms/step - accuracy: 0.4791 - loss: 1.0724
Epoch 2/50
[1m121/121[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 228ms/step - accuracy: 0.6943 - loss: 0.7365
Epoch 3/50
[1m121/121[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 227ms/step - accuracy: 0.7134 - loss: 0.6829
Epoch 4/50
[1m121/121[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 227ms/step - accuracy: 0.8225 - loss: 0.4991
Epoch 5/50
[1m121/121[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 227ms/step - accuracy: 0.8381 - loss: 0.4541
Epoch 6/50
[1m121/121[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 227ms/step - accuracy: 0.8222 - loss: 0.4709
Epoch 7/50
[1m121/121[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 227ms/step - accuracy: 0.8794 - loss: 0.3373
Epoch 8/50
[1m121/121[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 227ms/step - accuracy: 0.9134 - loss: 0.2245
Epoch 9/50
[1m121/121[0m 

In [1]:
import numpy as np
import os
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
from sklearn.model_selection import train_test_split
import cv2

# Define paths to dataset
base_path = "/kaggle/input/hsi-skincancer-main"  # Adjust based on Kaggle dataset path
train_path = os.path.join(base_path, "train")

# Parameters
IMG_SIZE = 128  # Reduced from 256 to 128 to save memory
BATCH_SIZE = 8  # Reduced to handle 3D data and fit GPU memory
NUM_CLASSES = 3  # Only class_3, class_4, class_5
EPOCHS = 30  # Increased to 30 for more training
CLASS_MAPPING = {3: 0, 4: 1, 5: 2}  # Map class_3 -> 0, class_4 -> 1, class_5 -> 2
TEST_SIZE = 0.3  # 30% for test, 70% for train

# Function to load and preprocess .npy files for 3D CNN
def load_and_preprocess_npy(file_path, img_size=IMG_SIZE):
    # Load hyperspectral image (shape: 31, 256, 256)
    img = np.load(file_path)
    
    # Transpose to (256, 256, 31) and resize spatial dimensions
    img = np.transpose(img, (1, 2, 0))  # Shape: (256, 256, 31)
    img = cv2.resize(img, (img_size, img_size))  # Shape: (128, 128, 31)
    
    # Transpose to (31, 128, 128) for depth as first dimension, then add channel dimension
    img = np.transpose(img, (2, 0, 1))  # Shape: (31, 128, 128)
    img = np.expand_dims(img, axis=-1)  # Shape: (31, 128, 128, 1)
    
    # Normalize to [0, 1]
    img = img / np.max(img)
    return img

# Function to load all file paths and labels
def load_data_paths(data_path, model_type):
    model_path = os.path.join(data_path, model_type)
    classes = [f"class_{i}" for i in [3, 4, 5]]  # Only class_3, class_4, class_5
    
    file_paths = []
    labels = []
    
    for class_name in classes:
        class_idx = CLASS_MAPPING[int(class_name.split('_')[1])]
        class_path = os.path.join(model_path, class_name)
        if not os.path.exists(class_path):
            print(f"Warning: {class_path} does not exist")
            continue
            
        npy_files = [f for f in os.listdir(class_path) if f.endswith(".npy")]
        for npy_file in npy_files:
            file_path = os.path.join(class_path, npy_file)
            file_paths.append(file_path)
            labels.append(class_idx)
    
    return file_paths, labels

# Data generator to load data in batches
def data_generator(file_paths, labels, img_size=IMG_SIZE, batch_size=BATCH_SIZE):
    while True:
        indices = np.arange(len(file_paths))
        np.random.shuffle(indices)  # Shuffle for randomness
        
        for start_idx in range(0, len(file_paths), batch_size):
            batch_indices = indices[start_idx:start_idx + batch_size]
            X_batch = []
            y_batch = []
            
            for idx in batch_indices:
                img = load_and_preprocess_npy(file_paths[idx], img_size)
                X_batch.append(img)
                y_batch.append(labels[idx])
            
            if X_batch:  # Ensure batch is not empty
                # Transpose to (batch_size, 128, 128, 31, 1) for 3D CNN
                X_batch = np.transpose(np.array(X_batch), (0, 2, 3, 1, 4))  # Shape: (batch_size, 128, 128, 31, 1)
                yield (X_batch,  # Shape: (batch_size, 128, 128, 31, 1)
                       tf.keras.utils.to_categorical(y_batch, NUM_CLASSES))

# Build an optimized 3D CNN model
def build_3d_cnn(input_shape=(IMG_SIZE, IMG_SIZE, 31, 1), num_classes=NUM_CLASSES):
    model = models.Sequential([
        layers.Input(shape=input_shape),  # Explicitly define input shape (height, width, depth, channels)
        layers.Conv3D(32, (3, 3, 3), activation='relu', padding='same'),
        layers.MaxPooling3D((2, 2, 1)),  # Reduced pooling in depth to preserve bands
        layers.Conv3D(64, (3, 3, 3), activation='relu', padding='same'),
        layers.MaxPooling3D((2, 2, 1)),
        layers.Conv3D(128, (3, 3, 3), activation='relu', padding='same'),
        layers.MaxPooling3D((2, 2, 1)),
        layers.Flatten(),
        layers.Dense(512, activation='relu'),
        layers.Dropout(0.5),
        layers.Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001), 
                  loss='categorical_crossentropy', 
                  metrics=['accuracy'])
    return model

# Function to evaluate the model and compute metrics
def evaluate_model(model, test_file_paths, test_labels):
    test_gen = data_generator(test_file_paths, test_labels, batch_size=1)
    
    y_true = []
    y_pred = []
    
    for _ in range(len(test_file_paths)):
        X, y = next(test_gen)
        pred = model.predict(X, verbose=0)
        y_true.append(np.argmax(y, axis=1)[0])
        y_pred.append(np.argmax(pred, axis=1)[0])
    
    # Compute metrics
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, average='macro')
    recall = recall_score(y_true, y_pred, average='macro')
    f1 = f1_score(y_true, y_pred, average='macro')
    
    # Generate classification report
    report = classification_report(y_true, y_pred, labels=[0, 1, 2], target_names=[f'class_{i+3}' for i in range(3)], digits=4)
    
    return accuracy, precision, recall, f1, report

# Main execution for hscnn_plus dataset
model_type = "hscnn_plus"

print(f"\nTraining on {model_type} dataset...")

# Load all file paths and labels
file_paths, labels = load_data_paths(train_path, model_type)

if not file_paths:
    print(f"No data found for {model_type}. Skipping...")
else:
    # Split into 70% train and 30% test, stratified to maintain class balance
    train_paths, test_paths, train_labels, test_labels = train_test_split(
        file_paths, labels, test_size=TEST_SIZE, stratify=labels, random_state=42
    )
    
    # Train the model
    train_gen = data_generator(train_paths, train_labels, batch_size=BATCH_SIZE)
    steps_per_epoch = len(train_paths) // BATCH_SIZE
    cnn_model = build_3d_cnn()
    cnn_model.fit(train_gen, steps_per_epoch=steps_per_epoch, epochs=EPOCHS, verbose=1)
    
    # Evaluate on test set
    print(f"Evaluating on {model_type} test split...")
    accuracy, precision, recall, f1, report = evaluate_model(cnn_model, test_paths, test_labels)
    
    # Print epochs and evaluation metrics
    print(f"Epochs: {EPOCHS}")
    print("\nEvaluation Metrics:")
    print(f"Accuracy      : {accuracy:.4f}")
    print(f"Precision (macro): {precision:.4f}")
    print(f"Recall (macro)   : {recall:.4f}")
    print(f"F1 Score (macro) : {f1:.4f}")
    print("\nFull Classification Report:")
    print(report)

2025-04-29 13:22:29.485431: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1745932949.680983      31 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1745932949.737790      31 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered



Training on hscnn_plus dataset...


I0000 00:00:1745932963.128023      31 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 15513 MB memory:  -> device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:04.0, compute capability: 6.0


Epoch 1/30


I0000 00:00:1745932970.190550      91 service.cc:148] XLA service 0x7b3e3c0053f0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1745932970.191456      91 service.cc:156]   StreamExecutor device (0): Tesla P100-PCIE-16GB, Compute Capability 6.0
I0000 00:00:1745932970.486825      91 cuda_dnn.cc:529] Loaded cuDNN version 90300
2025-04-29 13:22:56.712925: E external/local_xla/xla/service/slow_operation_alarm.cc:65] Trying algorithm eng0{} for conv (f32[64,32,3,3,3]{4,3,2,1,0}, u8[0]{0}) custom-call(f32[8,32,64,64,31]{4,3,2,1,0}, f32[8,64,64,64,31]{4,3,2,1,0}), window={size=3x3x3 pad=1_1x1_1x1_1}, dim_labels=bf012_oi012->bf012, custom_call_target="__cudnn$convBackwardFilter", backend_config={"cudnn_conv_backend_config":{"activation_mode":"kNone","conv_result_scale":1,"leakyrelu_alpha":0,"side_input_scale":0},"force_earliest_schedule":false,"operation_queue_id":"0","wait_on_operation_queues":[]} is taking a while...
2025-04-29 13:22:58.922

[1m121/121[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 541ms/step - accuracy: 0.5211 - loss: 1.1338
Epoch 2/30
[1m121/121[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 228ms/step - accuracy: 0.7045 - loss: 0.6957
Epoch 3/30
[1m121/121[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 227ms/step - accuracy: 0.7582 - loss: 0.5815
Epoch 4/30
[1m121/121[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 227ms/step - accuracy: 0.8264 - loss: 0.4590
Epoch 5/30
[1m121/121[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 227ms/step - accuracy: 0.8590 - loss: 0.3650
Epoch 6/30
[1m121/121[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 227ms/step - accuracy: 0.9212 - loss: 0.2289
Epoch 7/30
[1m121/121[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 227ms/step - accuracy: 0.9526 - loss: 0.1808
Epoch 8/30
[1m121/121[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 227ms/step - accuracy: 0.9527 - loss: 0.1259
Epoch 9/30
[1m121/121[0m 

In [1]:
import numpy as np
import os
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
from sklearn.model_selection import train_test_split
import cv2

# Define paths to dataset
base_path = "/kaggle/input/hsi-skincancer-main"  # Adjust based on Kaggle dataset path
train_path = os.path.join(base_path, "train")

# Parameters
IMG_SIZE = 128  # Reduced from 256 to 128 to save memory
BATCH_SIZE = 8  # Reduced to handle 3D data and fit GPU memory
NUM_CLASSES = 3  # Only class_3, class_4, class_5
EPOCHS = 50  # Increased to 30 for more training
CLASS_MAPPING = {3: 0, 4: 1, 5: 2}  # Map class_3 -> 0, class_4 -> 1, class_5 -> 2
TEST_SIZE = 0.3  # 30% for test, 70% for train

# Function to load and preprocess .npy files for 3D CNN
def load_and_preprocess_npy(file_path, img_size=IMG_SIZE):
    # Load hyperspectral image (shape: 31, 256, 256)
    img = np.load(file_path)
    
    # Transpose to (256, 256, 31) and resize spatial dimensions
    img = np.transpose(img, (1, 2, 0))  # Shape: (256, 256, 31)
    img = cv2.resize(img, (img_size, img_size))  # Shape: (128, 128, 31)
    
    # Transpose to (31, 128, 128) for depth as first dimension, then add channel dimension
    img = np.transpose(img, (2, 0, 1))  # Shape: (31, 128, 128)
    img = np.expand_dims(img, axis=-1)  # Shape: (31, 128, 128, 1)
    
    # Normalize to [0, 1]
    img = img / np.max(img)
    return img

# Function to load all file paths and labels
def load_data_paths(data_path, model_type):
    model_path = os.path.join(data_path, model_type)
    classes = [f"class_{i}" for i in [3, 4, 5]]  # Only class_3, class_4, class_5
    
    file_paths = []
    labels = []
    
    for class_name in classes:
        class_idx = CLASS_MAPPING[int(class_name.split('_')[1])]
        class_path = os.path.join(model_path, class_name)
        if not os.path.exists(class_path):
            print(f"Warning: {class_path} does not exist")
            continue
            
        npy_files = [f for f in os.listdir(class_path) if f.endswith(".npy")]
        for npy_file in npy_files:
            file_path = os.path.join(class_path, npy_file)
            file_paths.append(file_path)
            labels.append(class_idx)
    
    return file_paths, labels

# Data generator to load data in batches
def data_generator(file_paths, labels, img_size=IMG_SIZE, batch_size=BATCH_SIZE):
    while True:
        indices = np.arange(len(file_paths))
        np.random.shuffle(indices)  # Shuffle for randomness
        
        for start_idx in range(0, len(file_paths), batch_size):
            batch_indices = indices[start_idx:start_idx + batch_size]
            X_batch = []
            y_batch = []
            
            for idx in batch_indices:
                img = load_and_preprocess_npy(file_paths[idx], img_size)
                X_batch.append(img)
                y_batch.append(labels[idx])
            
            if X_batch:  # Ensure batch is not empty
                # Transpose to (batch_size, 128, 128, 31, 1) for 3D CNN
                X_batch = np.transpose(np.array(X_batch), (0, 2, 3, 1, 4))  # Shape: (batch_size, 128, 128, 31, 1)
                yield (X_batch,  # Shape: (batch_size, 128, 128, 31, 1)
                       tf.keras.utils.to_categorical(y_batch, NUM_CLASSES))

# Build an optimized 3D CNN model
def build_3d_cnn(input_shape=(IMG_SIZE, IMG_SIZE, 31, 1), num_classes=NUM_CLASSES):
    model = models.Sequential([
        layers.Input(shape=input_shape),  # Explicitly define input shape (height, width, depth, channels)
        layers.Conv3D(32, (3, 3, 3), activation='relu', padding='same'),
        layers.MaxPooling3D((2, 2, 1)),  # Reduced pooling in depth to preserve bands
        layers.Conv3D(64, (3, 3, 3), activation='relu', padding='same'),
        layers.MaxPooling3D((2, 2, 1)),
        layers.Conv3D(128, (3, 3, 3), activation='relu', padding='same'),
        layers.MaxPooling3D((2, 2, 1)),
        layers.Flatten(),
        layers.Dense(512, activation='relu'),
        layers.Dropout(0.5),
        layers.Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001), 
                  loss='categorical_crossentropy', 
                  metrics=['accuracy'])
    return model

# Function to evaluate the model and compute metrics
def evaluate_model(model, test_file_paths, test_labels):
    test_gen = data_generator(test_file_paths, test_labels, batch_size=1)
    
    y_true = []
    y_pred = []
    
    for _ in range(len(test_file_paths)):
        X, y = next(test_gen)
        pred = model.predict(X, verbose=0)
        y_true.append(np.argmax(y, axis=1)[0])
        y_pred.append(np.argmax(pred, axis=1)[0])
    
    # Compute metrics
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, average='macro')
    recall = recall_score(y_true, y_pred, average='macro')
    f1 = f1_score(y_true, y_pred, average='macro')
    
    # Generate classification report
    report = classification_report(y_true, y_pred, labels=[0, 1, 2], target_names=[f'class_{i+3}' for i in range(3)], digits=4)
    
    return accuracy, precision, recall, f1, report

# Main execution for mst_plus_plus dataset
model_type = "mst_plus_plus"

print(f"\nTraining on {model_type} dataset...")

# Load all file paths and labels
file_paths, labels = load_data_paths(train_path, model_type)

if not file_paths:
    print(f"No data found for {model_type}. Skipping...")
else:
    # Split into 70% train and 30% test, stratified to maintain class balance
    train_paths, test_paths, train_labels, test_labels = train_test_split(
        file_paths, labels, test_size=TEST_SIZE, stratify=labels, random_state=42
    )
    
    # Train the model
    train_gen = data_generator(train_paths, train_labels, batch_size=BATCH_SIZE)
    steps_per_epoch = len(train_paths) // BATCH_SIZE
    cnn_model = build_3d_cnn()
    cnn_model.fit(train_gen, steps_per_epoch=steps_per_epoch, epochs=EPOCHS, verbose=1)
    
    # Evaluate on test set
    print(f"Evaluating on {model_type} test split...")
    accuracy, precision, recall, f1, report = evaluate_model(cnn_model, test_paths, test_labels)
    
    # Print epochs and evaluation metrics
    print(f"Epochs: {EPOCHS}")
    print("\nEvaluation Metrics:")
    print(f"Accuracy      : {accuracy:.4f}")
    print(f"Precision (macro): {precision:.4f}")
    print(f"Recall (macro)   : {recall:.4f}")
    print(f"F1 Score (macro) : {f1:.4f}")
    print("\nFull Classification Report:")
    print(report)

2025-04-29 18:55:39.189423: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1745952939.381478      31 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1745952939.442929      31 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered



Training on mst_plus_plus dataset...


I0000 00:00:1745952952.382858      31 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 15513 MB memory:  -> device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:04.0, compute capability: 6.0


Epoch 1/50


I0000 00:00:1745952958.712882      90 service.cc:148] XLA service 0x7c1eb4006190 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1745952958.713386      90 service.cc:156]   StreamExecutor device (0): Tesla P100-PCIE-16GB, Compute Capability 6.0
I0000 00:00:1745952959.006537      90 cuda_dnn.cc:529] Loaded cuDNN version 90300
2025-04-29 18:56:05.204770: E external/local_xla/xla/service/slow_operation_alarm.cc:65] Trying algorithm eng0{} for conv (f32[64,32,3,3,3]{4,3,2,1,0}, u8[0]{0}) custom-call(f32[8,32,64,64,31]{4,3,2,1,0}, f32[8,64,64,64,31]{4,3,2,1,0}), window={size=3x3x3 pad=1_1x1_1x1_1}, dim_labels=bf012_oi012->bf012, custom_call_target="__cudnn$convBackwardFilter", backend_config={"cudnn_conv_backend_config":{"activation_mode":"kNone","conv_result_scale":1,"leakyrelu_alpha":0,"side_input_scale":0},"force_earliest_schedule":false,"operation_queue_id":"0","wait_on_operation_queues":[]} is taking a while...
2025-04-29 18:56:07.409

[1m121/121[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 537ms/step - accuracy: 0.4958 - loss: 1.1329
Epoch 2/50
[1m121/121[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 228ms/step - accuracy: 0.7248 - loss: 0.6812
Epoch 3/50
[1m121/121[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 227ms/step - accuracy: 0.7872 - loss: 0.5771
Epoch 4/50
[1m121/121[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 227ms/step - accuracy: 0.8201 - loss: 0.4783
Epoch 5/50
[1m121/121[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 227ms/step - accuracy: 0.8449 - loss: 0.4267
Epoch 6/50
[1m121/121[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 227ms/step - accuracy: 0.8825 - loss: 0.3117
Epoch 7/50
[1m121/121[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 227ms/step - accuracy: 0.8870 - loss: 0.2658
Epoch 8/50
[1m121/121[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 227ms/step - accuracy: 0.9109 - loss: 0.2514
Epoch 9/50
[1m121/121[0m 