In [None]:
from google.colab import drive
drive.mount('/content/drive')


In [None]:
!pip install nibabel numpy pandas tensorflow scikit-learn scipy openpyxl


In [None]:
import os
import nibabel as nib
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.preprocessing import LabelEncoder
from scipy.ndimage import zoom

# 1) Define the autoencoder
def create_autoencoder(input_shape):
    input_img = layers.Input(shape=input_shape)

    # --- Encoder ---
    x = layers.Conv3D(32, (3, 3, 3), activation='relu', padding='same')(input_img)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPooling3D((2, 2, 2), padding='same')(x)
    x = layers.Conv3D(64, (3, 3, 3), activation='relu', padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPooling3D((2, 2, 2), padding='same')(x)
    x = layers.Conv3D(128, (3, 3, 3), activation='relu', padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPooling3D((2, 2, 2), padding='same')(x)

    # --- Bottleneck ---
    shape_before_flattening = x.shape[1:]
    x = layers.Flatten()(x)
    x = layers.Dropout(0.3)(x)
    bottleneck = layers.Dense(512, activation='tanh')(x)
    bottleneck = layers.Dense(256, activation='tanh')(bottleneck)

    # --- Decoder ---
    x = layers.Dense(np.prod(shape_before_flattening), activation='relu')(bottleneck)
    x = layers.Reshape(shape_before_flattening)(x)
    x = layers.Conv3D(128, (3, 3, 3), activation='relu', padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.UpSampling3D((2, 2, 2))(x)
    x = layers.Conv3D(64, (3, 3, 3), activation='relu', padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.UpSampling3D((2, 2, 2))(x)
    x = layers.Conv3D(32, (3, 3, 3), activation='relu', padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.UpSampling3D((2, 2, 2))(x)
    decoded = layers.Conv3D(1, (3, 3, 3), activation='sigmoid', padding='same')(x)

    autoencoder = models.Model(input_img, decoded)
    encoder = models.Model(input_img, bottleneck)

    return autoencoder, encoder

# 2) Load and resize NIfTI files (with a mask option to avoid label corruption)
def load_and_resize_nifti(file_path, target_shape, is_mask=False):
    img = nib.load(file_path)
    data = img.get_fdata()

    zoom_factors = [float(t) / float(s) for t, s in zip(target_shape, data.shape)]

    interpolation_order = 0 if is_mask else 1

    resized_data = zoom(data, zoom_factors, order=interpolation_order)
    return resized_data

# 3) Process dataset
def process_dataset(image_dir, mask_dir, encoder, target_shape):
    results = []

    for file_name in os.listdir(image_dir):
        if file_name.endswith('.nii.gz'):
            image_path = os.path.join(image_dir, file_name)
            print(f"Processing image: {file_name}")

            image_data = load_and_resize_nifti(image_path, target_shape, is_mask=False)

            mask_file_name = next((m for m in os.listdir(mask_dir) if m.startswith(file_name[:6]) and m.endswith('.nii.gz')), None)
            if mask_file_name:
                mask_path = os.path.join(mask_dir, mask_file_name)
                print(f"Processing mask: {mask_file_name}")
                mask_data = load_and_resize_nifti(mask_path, target_shape, is_mask=True)

                mask_data = np.round(mask_data).astype(int)

                if image_data.shape != mask_data.shape:
                    print(f"Shape mismatch for {file_name}, skipping.")
                    continue

                unique_labels = np.unique(mask_data)
                unique_labels = unique_labels[(unique_labels >= 1) & (unique_labels <= 6)]

                print(f"Valid labels in mask {file_name}: {unique_labels}")

                if len(unique_labels) == 0:
                    print(f"No valid labels found in mask {file_name}, skipping.")
                    continue

                patient_features = []
                for label in unique_labels:
                    binary_mask = (mask_data == label).astype(np.float32)

                    masked_image = image_data * binary_mask

                    # Ensure we process only the region covered by the mask
                    non_zero_indices = np.where(binary_mask > 0)
                    if non_zero_indices[0].size == 0:
                        print(f"No regions found for label {label} in {file_name}, skipping.")
                        continue

                    cropped_image = masked_image[np.min(non_zero_indices[0]):np.max(non_zero_indices[0])+1,
                                                 np.min(non_zero_indices[1]):np.max(non_zero_indices[1])+1,
                                                 np.min(non_zero_indices[2]):np.max(non_zero_indices[2])+1]

                    # Resize cropped image to target_shape
                    zoom_factors = [float(t) / float(s) for t, s in zip(target_shape, cropped_image.shape)]
                    resized_cropped_image = zoom(cropped_image, zoom_factors, order=1)

                    # Normalize resized image
                    max_val = np.max(resized_cropped_image)
                    if max_val > 0:
                        normalized_image = (resized_cropped_image - np.mean(resized_cropped_image)) / (np.std(resized_cropped_image) + 1e-8)
                    else:
                        normalized_image = resized_cropped_image

                    normalized_image = np.expand_dims(normalized_image, axis=-1)
                    normalized_image = np.expand_dims(normalized_image, axis=0)

                    try:
                        deep_features = encoder.predict(normalized_image)
                        patient_features.append((int(label), deep_features.flatten()))
                        print(f"Extracted features for {file_name}, label {label}.")
                    except Exception as e:
                        print(f"Error extracting features for {file_name}, label {label}: {e}")

                if patient_features:
                    results.append((file_name, patient_features))
            else:
                print(f"Mask not found for image {file_name}, skipping.")

    return results

# 4) Save results to Excel (هر لیبل یک ردیف مجزا)
def save_to_excel(results, output_file):
    if not results:
        print("No results to save. The results list is empty.")
        return

    data = []
    num_features = len(results[0][1][0][1])

    for file_name, patient_features in results:
        for (label, feature_vec) in patient_features:
            row = [file_name, label] + feature_vec.tolist()
            data.append(row)

    columns = ['File Name', 'Label'] + [f'Feature_{i+1}' for i in range(num_features)]
    df = pd.DataFrame(data, columns=columns)
    df.to_excel(output_file, index=False)
    print(f"Results successfully saved to {output_file}")

# 5) Main execution
if __name__ == "__main__":
    image_dir = "/content/drive/MyDrive/PANAROMA/pancreatic classification/image & label/image_batch3"
    mask_dir = "/content/drive/MyDrive/PANAROMA/pancreatic classification/image & label/label_batch3"
    output_file = "/content/drive/MyDrive/1/deep_features01.xlsx"

    target_shape = (128, 128, 64)
    input_shape = target_shape + (1,)

    autoencoder, encoder = create_autoencoder(input_shape)
    autoencoder.compile(optimizer='adam', loss='binary_crossentropy')

    results = process_dataset(image_dir, mask_dir, encoder, target_shape)

    save_to_excel(results, output_file)

    print(f"Deep features saved to {output_file}")
