<a href="https://colab.research.google.com/github/MahiKhan5360/Segmentation-using-Capsule-layers-and-CNN/blob/main/Dataset_preprocessing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Check if running in Colab
def is_colab():
    try:
        import google.colab
        return True
    except:
        return False

if is_colab():
    print("Running in Google Colab environment.")
else:
    print("Not running in Google Colab environment.")

# Mount Google Drive if in Colab
if is_colab():
    from google.colab import drive
    drive.mount('/content/drive')
    print("Google Drive mounted successfully.")

# Fix for keras.utils.generic_utils error
import os
os.environ['TF_KERAS'] = '1'  # Force using tf.keras instead of standalone keras

# Install required packages
import sys
import subprocess

# Fix for TensorFlow Addons compatibility
!pip uninstall -y tensorflow-addons
!pip install tensorflow-addons==0.17.1

# Monkey patch for missing generic_utils
import keras
if not hasattr(keras.utils, 'generic_utils'):
    import tensorflow as tf
    keras.utils.generic_utils = tf.keras.utils

# Check if packages are already installed to avoid reinstallation
required_packages = {
    'tensorflow': 'tensorflow>=2.8.0',
    'tensorflow_addons': 'tensorflow-addons==0.17.1',  # with specific version
    'segmentation_models': 'git+https://github.com/qubvel/segmentation_models',  # Use GitHub version
    'albumentations': 'albumentations>=1.1.0',
    'opencv-python': 'opencv-python>=4.5.5',
    'scikit-image': 'scikit-image>=0.19.2',
    'matplotlib': 'matplotlib>=3.5.1',
    'pandas': 'pandas>=1.4.2',
    'tqdm': 'tqdm>=4.64.0',
    'einops': 'einops>=0.4.1',
    'timm': 'timm>=0.5.4',
    'seaborn': 'seaborn>=0.11.2'
}

# Only install packages that are not already installed
for package, install_name in required_packages.items():
    if package == 'tensorflow_addons':  # Skip as we already installed it
        continue
    if package == 'segmentation_models':  # Special handling for segmentation_models
        try:
            __import__(package)
            print(f"{package} is already installed.")
        except ImportError:
            print(f"Installing {package}...")
            subprocess.check_call([sys.executable, "-m", "pip", "install", install_name])
            print(f"{package} installed successfully.")
    else:
        try:
            __import__(package)
            print(f"{package} is already installed.")
        except ImportError:
            print(f"Installing {package}...")
            subprocess.check_call([sys.executable, "-m", "pip", "install", install_name])
            print(f"{package} installed successfully.")

# Set up TensorFlow and GPU configurations
import tensorflow as tf

# Check for GPU availability
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        # Set memory growth to avoid allocating all memory at once
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print(f"Found {len(gpus)} GPU(s). Memory growth enabled.")

        # Print GPU information
        gpu_info = !nvidia-smi
        print("GPU Information:")
        for line in gpu_info:
            print(line)

    except RuntimeError as e:
        print(f"GPU configuration error: {e}")
else:
    print("No GPU found. Running on CPU.")

# Set up mixed precision for faster training
try:
    policy = tf.keras.mixed_precision.Policy('mixed_float16')
    tf.keras.mixed_precision.set_global_policy(policy)
    print(f"Mixed precision policy set to: {policy.name}")
except:
    print("Mixed precision not supported or failed to set up.")

# Print TensorFlow version
print(f"TensorFlow version: {tf.__version__}")

# Verify dataset paths
import os

# Original dataset paths
original_input_path = '/content/drive/MyDrive/ISIC2018_original/ISIC2018_Task1_2_Training_Input'
original_gt_path = '/content/drive/MyDrive/ISIC2018_original/ISIC2018_Task1_Training_GroundTruth'

# Processed dataset paths
processed_base_path = '/content/drive/MyDrive/ISIC2018'

# Check if original dataset exists
if os.path.exists(original_input_path) and os.path.exists(original_gt_path):
    input_files = len(os.listdir(original_input_path))
    gt_files = len(os.listdir(original_gt_path))
    print(f"Original dataset found:")
    print(f"  - Input images: {input_files}")
    print(f"  - Ground truth masks: {gt_files}")
else:
    print("Warning: Original dataset not found at the specified path.")
    print(f"Expected paths: \n  {original_input_path}\n  {original_gt_path}")

# Check if processed dataset directory exists
if not os.path.exists(processed_base_path):
    print(f"Creating processed dataset directory at: {processed_base_path}")
    os.makedirs(processed_base_path, exist_ok=True)
else:
    print(f"Processed dataset directory exists at: {processed_base_path}")
