In [2]:
import numpy as np
import os
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from sklearn.model_selection import train_test_split
import cv2

# Define paths to dataset
base_path = "/kaggle/input/hsi-skincancer-main"  # Adjust based on Kaggle dataset path
train_path = os.path.join(base_path, "train")

# Parameters
IMG_SIZE = 128  # Reduced from 256 to 128 to save memory
BATCH_SIZE = 16  # Small batch size to fit in GPU memory
NUM_CLASSES = 3  # Only class_3, class_4, class_5
EPOCHS = 20  # Increased to allow more training
CLASS_MAPPING = {3: 0, 4: 1, 5: 2}  # Map class_3 -> 0, class_4 -> 1, class_5 -> 2
TEST_SIZE = 0.3  # 30% for test, 70% for train

# Function to load and preprocess .npy files
def load_and_preprocess_npy(file_path, img_size=IMG_SIZE):
    # Load hyperspectral image (shape: 31, 256, 256)
    img = np.load(file_path)
    
    # Use all 31 bands instead of averaging (resize spatial dims only)
    img = np.transpose(img, (1, 2, 0))  # Shape: (256, 256, 31)
    img = cv2.resize(img, (img_size, img_size))  # Shape: (128, 128, 31)
    
    # Normalize to [0, 1]
    img = img / np.max(img)
    return img

# Function to load all file paths and labels
def load_data_paths(data_path, model_type):
    model_path = os.path.join(data_path, model_type)
    classes = [f"class_{i}" for i in [3, 4, 5]]  # Only class_3, class_4, class_5
    
    file_paths = []
    labels = []
    
    for class_name in classes:
        class_idx = CLASS_MAPPING[int(class_name.split('_')[1])]
        class_path = os.path.join(model_path, class_name)
        if not os.path.exists(class_path):
            print(f"Warning: {class_path} does not exist")
            continue
            
        npy_files = [f for f in os.listdir(class_path) if f.endswith(".npy")]
        for npy_file in npy_files:
            file_path = os.path.join(class_path, npy_file)
            file_paths.append(file_path)
            labels.append(class_idx)
    
    print(f"Loaded {len(file_paths)} samples for {model_type}: {np.unique(labels, return_counts=True)}")
    return file_paths, labels

# Data generator to load data in batches
def data_generator(file_paths, labels, img_size=IMG_SIZE, batch_size=BATCH_SIZE):
    while True:
        indices = np.arange(len(file_paths))
        np.random.shuffle(indices)  # Shuffle for randomness
        
        for start_idx in range(0, len(file_paths), batch_size):
            batch_indices = indices[start_idx:start_idx + batch_size]
            X_batch = []
            y_batch = []
            
            for idx in batch_indices:
                img = load_and_preprocess_npy(file_paths[idx], img_size)
                X_batch.append(img)
                y_batch.append(labels[idx])
            
            if X_batch:  # Ensure batch is not empty
                yield (np.array(X_batch),  # Shape: (batch_size, 128, 128, 31)
                       tf.keras.utils.to_categorical(y_batch, NUM_CLASSES))
                print(f"Generated batch with shape: {np.array(X_batch).shape}, Sample label: {y_batch[0]}")

# Build an improved 2D CNN model (adjusted for 31 bands)
def build_2d_cnn(input_shape=(IMG_SIZE, IMG_SIZE, 31), num_classes=NUM_CLASSES):
    model = models.Sequential([
        layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(64, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(128, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Flatten(),
        layers.Dense(256, activation='relu'),  # Increased neurons
        layers.Dropout(0.5),
        layers.Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001), 
                  loss='categorical_crossentropy', 
                  metrics=['accuracy'])
    return model

# Function to evaluate the model and compute metrics
def evaluate_model(model, test_file_paths, test_labels):
    test_gen = data_generator(test_file_paths, test_labels, batch_size=1)
    
    y_true = []
    y_pred = []
    
    for _ in range(len(test_file_paths)):
        X, y = next(test_gen)
        pred = model.predict(X, verbose=0)
        y_true.append(np.argmax(y, axis=1)[0])
        y_pred.append(np.argmax(pred, axis=1)[0])
    
    # Compute metrics
    accuracy = accuracy_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred, average='weighted')
    precision = precision_score(y_true, y_pred, average='weighted')
    recall = recall_score(y_true, y_pred, average='weighted')
    
    return accuracy, f1, precision, recall

# Main execution for awan dataset
model_type = "awan"

print(f"\nProcessing {model_type} dataset...")

# Load all file paths and labels
file_paths, labels = load_data_paths(train_path, model_type)

if not file_paths:
    print(f"No data found for {model_type}. Skipping...")
else:
    # Split into 70% train and 30% test, stratified to maintain class balance
    train_paths, test_paths, train_labels, test_labels = train_test_split(
        file_paths, labels, test_size=TEST_SIZE, stratify=labels, random_state=42
    )
    
    # Train the model
    print(f"Training on {model_type} dataset...")
    train_gen = data_generator(train_paths, train_labels, batch_size=BATCH_SIZE)
    steps_per_epoch = len(train_paths) // BATCH_SIZE
    cnn_model = build_2d_cnn()
    cnn_model.fit(train_gen, steps_per_epoch=steps_per_epoch, epochs=EPOCHS, verbose=1)
    
    # Evaluate on test set
    print(f"Evaluating on {model_type} test split...")
    accuracy, f1, precision, recall = evaluate_model(cnn_model, test_paths, test_labels)
    
    print(f"Results for {model_type}:")
    print(f"Accuracy: {accuracy:.4f}")
    print(f"F1 Score (Weighted): {f1:.4f}")
    print(f"Precision (Weighted): {precision:.4f}")
    print(f"Recall (Weighted): {recall:.4f}")


Processing awan dataset...
Loaded 1386 samples for awan: (array([0, 1, 2]), array([462, 462, 462]))
Training on awan dataset...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Generated batch with shape: (16, 128, 128, 31), Sample label: 1
Epoch 1/20
[1m 1/60[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m3:54[0m 4s/step - accuracy: 0.3125 - loss: 1.2916Generated batch with shape: (16, 128, 128, 31), Sample label: 2
Generated batch with shape: (16, 128, 128, 31), Sample label: 2
[1m 3/60[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m6s[0m 114ms/step - accuracy: 0.3299 - loss: 1.2588Generated batch with shape: (16, 128, 128, 31), Sample label: 2
[1m 4/60[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m7s[0m 137ms/step - accuracy: 0.3255 - loss: 1.2526Generated batch with shape: (16, 128, 128, 31), Sample label: 0
[1m 5/60[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m7s[0m 145ms/step - accuracy: 0.3254 - loss: 1.2419Generated batch with shape: (16, 128, 128, 31), Sample label: 2
[1m 6/60[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m8s[0m 155ms/step - accuracy: 0.3267 - loss: 1.2316Generated batch with shape: (16, 128, 128, 31), Sample label: 1
[1m 7/60[

In [1]:
import numpy as np
import os
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
from sklearn.model_selection import train_test_split
import cv2

# Define paths to dataset
base_path = "/kaggle/input/hsi-skincancer-main"  # Adjust based on Kaggle dataset path
train_path = os.path.join(base_path, "train")

# Parameters
IMG_SIZE = 128  # Reduced from 256 to 128 to save memory
BATCH_SIZE = 16  # Small batch size to fit in GPU memory
NUM_CLASSES = 3  # Only class_3, class_4, class_5
EPOCHS = 50  # Increased to 30 for more training
CLASS_MAPPING = {3: 0, 4: 1, 5: 2}  # Map class_3 -> 0, class_4 -> 1, class_5 -> 2
TEST_SIZE = 0.3  # 30% for test, 70% for train

# Function to load and preprocess .npy files
def load_and_preprocess_npy(file_path, img_size=IMG_SIZE):
    # Load hyperspectral image (shape: 31, 256, 256)
    img = np.load(file_path)
    
    # Use a subset of 10 bands to reduce memory (e.g., first 10 bands)
    img = img[:10, :, :]  # Shape: (10, 256, 256)
    img = np.transpose(img, (1, 2, 0))  # Shape: (256, 256, 10)
    img = cv2.resize(img, (img_size, img_size))  # Shape: (128, 128, 10)
    
    # Normalize to [0, 1]
    img = img / np.max(img)
    return img

# Function to load all file paths and labels
def load_data_paths(data_path, model_type):
    model_path = os.path.join(data_path, model_type)
    classes = [f"class_{i}" for i in [3, 4, 5]]  # Only class_3, class_4, class_5
    
    file_paths = []
    labels = []
    
    for class_name in classes:
        class_idx = CLASS_MAPPING[int(class_name.split('_')[1])]
        class_path = os.path.join(model_path, class_name)
        if not os.path.exists(class_path):
            print(f"Warning: {class_path} does not exist")
            continue
            
        npy_files = [f for f in os.listdir(class_path) if f.endswith(".npy")]
        for npy_file in npy_files:
            file_path = os.path.join(class_path, npy_file)
            file_paths.append(file_path)
            labels.append(class_idx)
    
    return file_paths, labels

# Data generator to load data in batches
def data_generator(file_paths, labels, img_size=IMG_SIZE, batch_size=BATCH_SIZE):
    while True:
        indices = np.arange(len(file_paths))
        np.random.shuffle(indices)  # Shuffle for randomness
        
        for start_idx in range(0, len(file_paths), batch_size):
            batch_indices = indices[start_idx:start_idx + batch_size]
            X_batch = []
            y_batch = []
            
            for idx in batch_indices:
                img = load_and_preprocess_npy(file_paths[idx], img_size)
                X_batch.append(img)
                y_batch.append(labels[idx])
            
            if X_batch:  # Ensure batch is not empty
                yield (np.array(X_batch),  # Shape: (batch_size, 128, 128, 10)
                       tf.keras.utils.to_categorical(y_batch, NUM_CLASSES))

# Build an optimized 2D CNN model
def build_2d_cnn(input_shape=(IMG_SIZE, IMG_SIZE, 10), num_classes=NUM_CLASSES):
    # Data augmentation
    data_augmentation = tf.keras.Sequential([
        layers.RandomFlip("horizontal"),
        layers.RandomRotation(0.1),
    ])
    
    model = models.Sequential([
        layers.Input(shape=input_shape),  # Explicitly define input shape
        data_augmentation,
        layers.Conv2D(64, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(128, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(256, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Flatten(),
        layers.Dense(512, activation='relu'),
        layers.Dropout(0.5),
        layers.Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001), 
                  loss='categorical_crossentropy', 
                  metrics=['accuracy'])
    return model

# Function to evaluate the model and compute metrics
def evaluate_model(model, test_file_paths, test_labels):
    test_gen = data_generator(test_file_paths, test_labels, batch_size=1)
    
    y_true = []
    y_pred = []
    
    for _ in range(len(test_file_paths)):
        X, y = next(test_gen)
        pred = model.predict(X, verbose=0)
        y_true.append(np.argmax(y, axis=1)[0])
        y_pred.append(np.argmax(pred, axis=1)[0])
    
    # Compute metrics
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, average='macro')
    recall = recall_score(y_true, y_pred, average='macro')
    f1 = f1_score(y_true, y_pred, average='macro')
    
    # Generate classification report
    report = classification_report(y_true, y_pred, labels=[0, 1, 2], target_names=[f'class_{i+3}' for i in range(3)], digits=4)
    
    return accuracy, precision, recall, f1, report

# Main execution for hrnet dataset
model_type = "hrnet"

print(f"\nTraining on {model_type} dataset...")

# Load all file paths and labels
file_paths, labels = load_data_paths(train_path, model_type)

if not file_paths:
    print(f"No data found for {model_type}. Skipping...")
else:
    # Split into 70% train and 30% test, stratified to maintain class balance
    train_paths, test_paths, train_labels, test_labels = train_test_split(
        file_paths, labels, test_size=TEST_SIZE, stratify=labels, random_state=42
    )
    
    # Train the model
    train_gen = data_generator(train_paths, train_labels, batch_size=BATCH_SIZE)
    steps_per_epoch = len(train_paths) // BATCH_SIZE
    cnn_model = build_2d_cnn()
    cnn_model.fit(train_gen, steps_per_epoch=steps_per_epoch, epochs=EPOCHS, verbose=1)
    
    # Evaluate on test set
    print(f"Evaluating on {model_type} test split...")
    accuracy, precision, recall, f1, report = evaluate_model(cnn_model, test_paths, test_labels)
    
    # Print epochs and evaluation metrics
    print(f"Epochs: {EPOCHS}")
    print("\nEvaluation Metrics:")
    print(f"Accuracy      : {accuracy:.4f}")
    print(f"Precision (macro): {precision:.4f}")
    print(f"Recall (macro)   : {recall:.4f}")
    print(f"F1 Score (macro) : {f1:.4f}")
    print("\nFull Classification Report:")
    print(report)

2025-04-29 18:38:13.305093: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1745951893.549994      31 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1745951893.614085      31 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered



Training on hrnet dataset...


I0000 00:00:1745951908.418719      31 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 15513 MB memory:  -> device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:04.0, compute capability: 6.0


Epoch 1/50


I0000 00:00:1745951920.159684      88 cuda_dnn.cc:529] Loaded cuDNN version 90300


[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m101s[0m 2s/step - accuracy: 0.4562 - loss: 1.0364
Epoch 2/50
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 94ms/step - accuracy: 0.6433 - loss: 0.7784
Epoch 3/50
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 93ms/step - accuracy: 0.6488 - loss: 0.7773
Epoch 4/50
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 107ms/step - accuracy: 0.7051 - loss: 0.6894
Epoch 5/50
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 105ms/step - accuracy: 0.6959 - loss: 0.6894
Epoch 6/50
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 104ms/step - accuracy: 0.7236 - loss: 0.6623
Epoch 7/50
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 78ms/step - accuracy: 0.6737 - loss: 0.7321
Epoch 8/50
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 87ms/step - accuracy: 0.6893 - loss: 0.6986
Epoch 9/50
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[

In [9]:
import numpy as np
import os
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
from sklearn.model_selection import train_test_split
import cv2

# Define paths to dataset
base_path = "/kaggle/input/hsi-skincancer-main"  # Adjust based on Kaggle dataset path
train_path = os.path.join(base_path, "train")

# Parameters
IMG_SIZE = 128  # Reduced from 256 to 128 to save memory
BATCH_SIZE = 16  # Small batch size to fit in GPU memory
NUM_CLASSES = 3  # Only class_3, class_4, class_5
EPOCHS = 20  # Increased to 30 for more training
CLASS_MAPPING = {3: 0, 4: 1, 5: 2}  # Map class_3 -> 0, class_4 -> 1, class_5 -> 2
TEST_SIZE = 0.3  # 30% for test, 70% for train

# Function to load and preprocess .npy files
def load_and_preprocess_npy(file_path, img_size=IMG_SIZE):
    # Load hyperspectral image (shape: 31, 256, 256)
    img = np.load(file_path)
    
    # Use a subset of 10 bands to reduce memory (e.g., first 10 bands)
    img = img[:10, :, :]  # Shape: (10, 256, 256)
    img = np.transpose(img, (1, 2, 0))  # Shape: (256, 256, 10)
    img = cv2.resize(img, (img_size, img_size))  # Shape: (128, 128, 10)
    
    # Normalize to [0, 1]
    img = img / np.max(img)
    return img

# Function to load all file paths and labels
def load_data_paths(data_path, model_type):
    model_path = os.path.join(data_path, model_type)
    classes = [f"class_{i}" for i in [3, 4, 5]]  # Only class_3, class_4, class_5
    
    file_paths = []
    labels = []
    
    for class_name in classes:
        class_idx = CLASS_MAPPING[int(class_name.split('_')[1])]
        class_path = os.path.join(model_path, class_name)
        if not os.path.exists(class_path):
            print(f"Warning: {class_path} does not exist")
            continue
            
        npy_files = [f for f in os.listdir(class_path) if f.endswith(".npy")]
        for npy_file in npy_files:
            file_path = os.path.join(class_path, npy_file)
            file_paths.append(file_path)
            labels.append(class_idx)
    
    return file_paths, labels

# Data generator to load data in batches
def data_generator(file_paths, labels, img_size=IMG_SIZE, batch_size=BATCH_SIZE):
    while True:
        indices = np.arange(len(file_paths))
        np.random.shuffle(indices)  # Shuffle for randomness
        
        for start_idx in range(0, len(file_paths), batch_size):
            batch_indices = indices[start_idx:start_idx + batch_size]
            X_batch = []
            y_batch = []
            
            for idx in batch_indices:
                img = load_and_preprocess_npy(file_paths[idx], img_size)
                X_batch.append(img)
                y_batch.append(labels[idx])
            
            if X_batch:  # Ensure batch is not empty
                yield (np.array(X_batch),  # Shape: (batch_size, 128, 128, 10)
                       tf.keras.utils.to_categorical(y_batch, NUM_CLASSES))

# Build an optimized 2D CNN model
def build_2d_cnn(input_shape=(IMG_SIZE, IMG_SIZE, 10), num_classes=NUM_CLASSES):
    # Data augmentation
    data_augmentation = tf.keras.Sequential([
        layers.RandomFlip("horizontal"),
        layers.RandomRotation(0.1),
    ])
    
    model = models.Sequential([
        layers.Input(shape=input_shape),  # Explicitly define input shape
        data_augmentation,
        layers.Conv2D(64, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(128, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(256, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Flatten(),
        layers.Dense(512, activation='relu'),
        layers.Dropout(0.5),
        layers.Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001), 
                  loss='categorical_crossentropy', 
                  metrics=['accuracy'])
    return model

# Function to evaluate the model and compute metrics
def evaluate_model(model, test_file_paths, test_labels):
    test_gen = data_generator(test_file_paths, test_labels, batch_size=1)
    
    y_true = []
    y_pred = []
    
    for _ in range(len(test_file_paths)):
        X, y = next(test_gen)
        pred = model.predict(X, verbose=0)
        y_true.append(np.argmax(y, axis=1)[0])
        y_pred.append(np.argmax(pred, axis=1)[0])
    
    # Compute metrics
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, average='macro')
    recall = recall_score(y_true, y_pred, average='macro')
    f1 = f1_score(y_true, y_pred, average='macro')
    
    # Generate classification report
    report = classification_report(y_true, y_pred, labels=[0, 1, 2], target_names=[f'class_{i+3}' for i in range(3)], digits=4)
    
    return accuracy, precision, recall, f1, report

# Main execution for hscnn_plus dataset
model_type = "hscnn_plus"

print(f"\nTraining on {model_type} dataset...")

# Load all file paths and labels
file_paths, labels = load_data_paths(train_path, model_type)

if not file_paths:
    print(f"No data found for {model_type}. Skipping...")
else:
    # Split into 70% train and 30% test, stratified to maintain class balance
    train_paths, test_paths, train_labels, test_labels = train_test_split(
        file_paths, labels, test_size=TEST_SIZE, stratify=labels, random_state=42
    )
    
    # Train the model
    train_gen = data_generator(train_paths, train_labels, batch_size=BATCH_SIZE)
    steps_per_epoch = len(train_paths) // BATCH_SIZE
    cnn_model = build_2d_cnn()
    cnn_model.fit(train_gen, steps_per_epoch=steps_per_epoch, epochs=EPOCHS, verbose=1)
    
    # Evaluate on test set
    print(f"Evaluating on {model_type} test split...")
    accuracy, precision, recall, f1, report = evaluate_model(cnn_model, test_paths, test_labels)
    
    # Print epochs and evaluation metrics
    print(f"Epochs: {EPOCHS}")
    print("\nEvaluation Metrics:")
    print(f"Accuracy      : {accuracy:.4f}")
    print(f"Precision (macro): {precision:.4f}")
    print(f"Recall (macro)   : {recall:.4f}")
    print(f"F1 Score (macro) : {f1:.4f}")
    print("\nFull Classification Report:")
    print(report)


Training on hscnn_plus dataset...
Epoch 1/20
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 87ms/step - accuracy: 0.4257 - loss: 1.0738
Epoch 2/20
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 80ms/step - accuracy: 0.6151 - loss: 0.8499
Epoch 3/20
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 75ms/step - accuracy: 0.6912 - loss: 0.7377
Epoch 4/20
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 76ms/step - accuracy: 0.6817 - loss: 0.7379
Epoch 5/20
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 81ms/step - accuracy: 0.6827 - loss: 0.7151
Epoch 6/20
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 76ms/step - accuracy: 0.7276 - loss: 0.6791
Epoch 7/20
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 78ms/step - accuracy: 0.7206 - loss: 0.7027
Epoch 8/20
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 76ms/step - accuracy: 0.6908 - loss: 0.6926
Epoch 9/20
[

In [2]:
import numpy as np
import os
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
from sklearn.model_selection import train_test_split
import cv2

# Define paths to dataset
base_path = "/kaggle/input/hsi-skincancer-main"  # Adjust based on Kaggle dataset path
train_path = os.path.join(base_path, "train")

# Parameters
IMG_SIZE = 128  # Reduced from 256 to 128 to save memory
BATCH_SIZE = 16  # Small batch size to fit in GPU memory
NUM_CLASSES = 3  # Only class_3, class_4, class_5
EPOCHS = 50  # Increased to 30 for more training
CLASS_MAPPING = {3: 0, 4: 1, 5: 2}  # Map class_3 -> 0, class_4 -> 1, class_5 -> 2
TEST_SIZE = 0.3  # 30% for test, 70% for train

# Function to load and preprocess .npy files
def load_and_preprocess_npy(file_path, img_size=IMG_SIZE):
    # Load hyperspectral image (shape: 31, 256, 256)
    img = np.load(file_path)
    
    # Use a subset of 10 bands to reduce memory (e.g., first 10 bands)
    img = img[:10, :, :]  # Shape: (10, 256, 256)
    img = np.transpose(img, (1, 2, 0))  # Shape: (256, 256, 10)
    img = cv2.resize(img, (img_size, img_size))  # Shape: (128, 128, 10)
    
    # Normalize to [0, 1]
    img = img / np.max(img)
    return img

# Function to load all file paths and labels
def load_data_paths(data_path, model_type):
    model_path = os.path.join(data_path, model_type)
    classes = [f"class_{i}" for i in [3, 4, 5]]  # Only class_3, class_4, class_5
    
    file_paths = []
    labels = []
    
    for class_name in classes:
        class_idx = CLASS_MAPPING[int(class_name.split('_')[1])]
        class_path = os.path.join(model_path, class_name)
        if not os.path.exists(class_path):
            print(f"Warning: {class_path} does not exist")
            continue
            
        npy_files = [f for f in os.listdir(class_path) if f.endswith(".npy")]
        for npy_file in npy_files:
            file_path = os.path.join(class_path, npy_file)
            file_paths.append(file_path)
            labels.append(class_idx)
    
    return file_paths, labels

# Data generator to load data in batches
def data_generator(file_paths, labels, img_size=IMG_SIZE, batch_size=BATCH_SIZE):
    while True:
        indices = np.arange(len(file_paths))
        np.random.shuffle(indices)  # Shuffle for randomness
        
        for start_idx in range(0, len(file_paths), batch_size):
            batch_indices = indices[start_idx:start_idx + batch_size]
            X_batch = []
            y_batch = []
            
            for idx in batch_indices:
                img = load_and_preprocess_npy(file_paths[idx], img_size)
                X_batch.append(img)
                y_batch.append(labels[idx])
            
            if X_batch:  # Ensure batch is not empty
                yield (np.array(X_batch),  # Shape: (batch_size, 128, 128, 10)
                       tf.keras.utils.to_categorical(y_batch, NUM_CLASSES))

# Build an optimized 2D CNN model
def build_2d_cnn(input_shape=(IMG_SIZE, IMG_SIZE, 10), num_classes=NUM_CLASSES):
    # Data augmentation
    data_augmentation = tf.keras.Sequential([
        layers.RandomFlip("horizontal"),
        layers.RandomRotation(0.1),
    ])
    
    model = models.Sequential([
        layers.Input(shape=input_shape),  # Explicitly define input shape
        data_augmentation,
        layers.Conv2D(64, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(128, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(256, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Flatten(),
        layers.Dense(512, activation='relu'),
        layers.Dropout(0.5),
        layers.Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001), 
                  loss='categorical_crossentropy', 
                  metrics=['accuracy'])
    return model

# Function to evaluate the model and compute metrics
def evaluate_model(model, test_file_paths, test_labels):
    test_gen = data_generator(test_file_paths, test_labels, batch_size=1)
    
    y_true = []
    y_pred = []
    
    for _ in range(len(test_file_paths)):
        X, y = next(test_gen)
        pred = model.predict(X, verbose=0)
        y_true.append(np.argmax(y, axis=1)[0])
        y_pred.append(np.argmax(pred, axis=1)[0])
    
    # Compute metrics
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, average='macro')
    recall = recall_score(y_true, y_pred, average='macro')
    f1 = f1_score(y_true, y_pred, average='macro')
    
    # Generate classification report
    report = classification_report(y_true, y_pred, labels=[0, 1, 2], target_names=[f'class_{i+3}' for i in range(3)], digits=4)
    
    return accuracy, precision, recall, f1, report

# Main execution for mst_plus_plus dataset
model_type = "mst_plus_plus"

print(f"\nTraining on {model_type} dataset...")

# Load all file paths and labels
file_paths, labels = load_data_paths(train_path, model_type)

if not file_paths:
    print(f"No data found for {model_type}. Skipping...")
else:
    # Split into 70% train and 30% test, stratified to maintain class balance
    train_paths, test_paths, train_labels, test_labels = train_test_split(
        file_paths, labels, test_size=TEST_SIZE, stratify=labels, random_state=42
    )
    
    # Train the model
    train_gen = data_generator(train_paths, train_labels, batch_size=BATCH_SIZE)
    steps_per_epoch = len(train_paths) // BATCH_SIZE
    cnn_model = build_2d_cnn()
    cnn_model.fit(train_gen, steps_per_epoch=steps_per_epoch, epochs=EPOCHS, verbose=1)
    
    # Evaluate on test set
    print(f"Evaluating on {model_type} test split...")
    accuracy, precision, recall, f1, report = evaluate_model(cnn_model, test_paths, test_labels)
    
    # Print epochs and evaluation metrics
    print(f"Epochs: {EPOCHS}")
    print("\nEvaluation Metrics:")
    print(f"Accuracy      : {accuracy:.4f}")
    print(f"Precision (macro): {precision:.4f}")
    print(f"Recall (macro)   : {recall:.4f}")
    print(f"F1 Score (macro) : {f1:.4f}")
    print("\nFull Classification Report:")
    print(report)


Training on mst_plus_plus dataset...
Epoch 1/50
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m58s[0m 936ms/step - accuracy: 0.4459 - loss: 1.0362
Epoch 2/50
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 90ms/step - accuracy: 0.6398 - loss: 0.8056
Epoch 3/50
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 75ms/step - accuracy: 0.6829 - loss: 0.7271
Epoch 4/50
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 75ms/step - accuracy: 0.7258 - loss: 0.7216
Epoch 5/50
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 73ms/step - accuracy: 0.6558 - loss: 0.7875
Epoch 6/50
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 71ms/step - accuracy: 0.7214 - loss: 0.6728
Epoch 7/50
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 73ms/step - accuracy: 0.7431 - loss: 0.6013
Epoch 8/50
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 78ms/step - accuracy: 0.7094 - loss: 0.6771
Epoch 9/