In [None]:
!git clone https://github.com/ayamohamdd/IP-25.git


Cloning into 'IP-25'...
remote: Enumerating objects: 709, done.[K
remote: Counting objects: 100% (9/9), done.[K
remote: Compressing objects: 100% (7/7), done.[K
remote: Total 709 (delta 0), reused 3 (delta 0), pack-reused 700 (from 1)[K
Receiving objects: 100% (709/709), 403.03 MiB | 50.06 MiB/s, done.
Updating files: 100% (691/691), done.


In [None]:
import tensorflow as tf
import numpy as np
import os
from PIL import Image, ImageEnhance
import cv2
import matplotlib.pyplot as plt
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, UpSampling2D, Concatenate
from tensorflow.keras.optimizers import Adam, SGD
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array, save_img


In [None]:
# # Paths to the directories
# train_image_paths = r"/content/IP-25/train_data/Images"
# train_mask_paths = r"/content/IP-25/train_data/Labels"

# # Output paths
# output_image_path = train_image_paths
# output_mask_path = train_mask_paths

# # Image augmentation setup
# datagen = ImageDataGenerator(
#     rotation_range=40,
#     width_shift_range=0.2,
#     height_shift_range=0.2,
#     shear_range=0.2,
#     zoom_range=0.2,
#     horizontal_flip=True,
#     fill_mode='nearest'
# )

# # Get lists of image and mask files
# image_files = sorted([file for file in os.listdir(train_image_paths) if file.lower().endswith(('jpg', 'jpeg', 'png', 'bmp', 'gif', 'tiff'))])
# mask_files = sorted([file for file in os.listdir(train_mask_paths) if file.lower().endswith(('jpg', 'jpeg', 'png', 'bmp', 'gif', 'tiff'))])

# # Ensure that image and mask filenames correspond (e.g., "image1.jpg" matches "mask1.jpg")
# assert len(image_files) == len(mask_files), "Mismatch in the number of images and masks!"
# for img_file, mask_file in zip(image_files, mask_files):
#     assert os.path.splitext(img_file)[0] == os.path.splitext(mask_file)[0], "Image and mask filenames do not match!"

# # Augment each image-mask pair
# counter = 0
# for img_file, mask_file in zip(image_files, mask_files):
#     # Load the image and mask
#     img_path = os.path.join(train_image_paths, img_file)
#     mask_path = os.path.join(train_mask_paths, mask_file)
#     img = img_to_array(load_img(img_path))
#     mask = img_to_array(load_img(mask_path, color_mode="rgb"))  # Ensure mask is grayscale

#     # Expand dimensions to match the generator input format
#     img = np.expand_dims(img, axis=0)
#     mask = np.expand_dims(mask, axis=0)

#     # Create a combined generator for both image and mask
#     seed = np.random.randint(0, 10000)  # Use the same seed for both
#     aug_img_iter = datagen.flow(img, batch_size=1, seed=seed)
#     aug_mask_iter = datagen.flow(mask, batch_size=1, seed=seed)

#     # Generate augmented images and masks
#     for i in range(3):  # Create 3 augmentations per image-mask pair
#         aug_img = next(aug_img_iter)[0].astype('uint8')  # Augmented image
#         aug_mask = next(aug_mask_iter)[0].astype('uint8')  # Augmented mask

#         # Save augmented image and mask with unique names
#         img_name = f"aug_{counter}_{img_file}"
#         mask_name = f"aug_{counter}_{mask_file}"
#         save_img(os.path.join(output_image_path, img_name), aug_img)
#         save_img(os.path.join(output_mask_path, mask_name), aug_mask)
#         counter += 1

# print(f"Data augmentation complete! Total augmented pairs: {counter}")


Data augmentation complete! Total augmented pairs: 600


In [None]:
def load_images(image_paths, target_size=(256, 256)):
    images = []
    for img_path in image_paths:
        # Load image
        img = load_img(img_path, target_size=target_size)
        img = img_to_array(img) / 255.0  # Normalize images to [0, 1]

        # Convert to uint8 for OpenCV processing
        img = (img * 255).astype(np.uint8)

        # Decrease brightness
        img = cv2.convertScaleAbs(img, alpha=0.8, beta=0)  # Reduce brightness by 20%

        # Apply Gaussian blur
        img = cv2.GaussianBlur(img, (5, 5), 0)

        # Normalize back to [0, 1] after processing
        img = img.astype(np.float32) / 255.0

        images.append(img)
    return np.array(images)



# Map RGB values to class indices
def rgb_to_class_index(mask_rgb):
    color_map = {
        (0, 0, 0): 0,         # Background clutter
        (128, 0, 0): 1,       # Building
        (128, 64, 128): 2,    # Road
        (0, 128, 0): 3,       # Tree
        (128, 128, 0): 4,     # Low vegetation
        (64, 0, 128): 5,      # Moving car
        (192, 0, 192): 6,     # Static car
        (64, 64, 0): 7        # Human
    }

    # Find the closest color in the map to the input RGB value
    min_distance = float('inf')
    closest_color = None
    for color, index in color_map.items():
        distance = np.linalg.norm(np.array(mask_rgb) - np.array(color))  # Euclidean distance
        if distance < min_distance:
            min_distance = distance
            closest_color = index

    return closest_color

def load_masks(mask_paths, target_size=(256, 256)):
    masks = []
    for mask_path in mask_paths:
        mask = load_img(mask_path, target_size=target_size)
        mask = img_to_array(mask)
        # Convert RGB values to class indices (0 to 7)
        mask_class_indices = np.apply_along_axis(rgb_to_class_index, 2, mask.astype(int))
        masks.append(mask_class_indices)
    return np.array(masks)

In [None]:
from keras.models import Model
from keras.layers import Input, Conv2D, MaxPooling2D, UpSampling2D, Concatenate

def unet_model(input_size=(256, 256, 3), num_classes=8):
    inputs = Input(input_size)

    # Contracting path (Encoder)
    conv1 = Conv2D(64, (3, 3), activation='relu', padding='same')(inputs)
    conv1 = Conv2D(64, (3, 3), activation='relu', padding='same')(conv1)
    pool1 = MaxPooling2D((2, 2))(conv1)

    conv2 = Conv2D(128, (3, 3), activation='relu', padding='same')(pool1)
    conv2 = Conv2D(128, (3, 3), activation='relu', padding='same')(conv2)
    pool2 = MaxPooling2D((2, 2))(conv2)

    conv3 = Conv2D(256, (3, 3), activation='relu', padding='same')(pool2)
    conv3 = Conv2D(256, (3, 3), activation='relu', padding='same')(conv3)
    pool3 = MaxPooling2D((2, 2))(conv3)

    # Bottleneck
    conv4 = Conv2D(512, (3, 3), activation='relu', padding='same')(pool3)
    conv4 = Conv2D(512, (3, 3), activation='relu', padding='same')(conv4)

    # Expansive path (Decoder)
    up5 = UpSampling2D((2, 2))(conv4)
    concat5 = Concatenate()([up5, conv3])
    conv5 = Conv2D(256, (3, 3), activation='relu', padding='same')(concat5)
    conv5 = Conv2D(256, (3, 3), activation='relu', padding='same')(conv5)

    up6 = UpSampling2D((2, 2))(conv5)
    concat6 = Concatenate()([up6, conv2])
    conv6 = Conv2D(128, (3, 3), activation='relu', padding='same')(concat6)
    conv6 = Conv2D(128, (3, 3), activation='relu', padding='same')(conv6)

    up7 = UpSampling2D((2, 2))(conv6)
    concat7 = Concatenate()([up7, conv1])
    conv7 = Conv2D(64, (3, 3), activation='relu', padding='same')(concat7)
    conv7 = Conv2D(64, (3, 3), activation='relu', padding='same')(conv7)

    # Final layer (output)
    output = Conv2D(num_classes, (1, 1), activation='softmax')(conv7)

    model = Model(inputs, output)
    return model


In [None]:
# Rand Index - compares predicted and true labels
def rand_index(y_true, y_pred):
    y_pred_classes = tf.argmax(y_pred, axis=-1)  # Convert probabilities to class indices
    y_true_flat = tf.keras.backend.flatten(y_true)  # Flatten ground truth
    y_pred_flat = tf.keras.backend.flatten(y_pred_classes)  # Flatten predictions
    equal = tf.reduce_sum(tf.cast(tf.equal(y_true_flat, y_pred_flat), tf.float32))  # Count equal pairs
    total_pairs = tf.cast(tf.size(y_true_flat, out_type=tf.int32), tf.float32)  # Get total number of elements
    return equal / (total_pairs + tf.keras.backend.epsilon())  # Avoid divide by zero


# Jaccard Index (IoU) - Adjusted for probabilities
def jaccard_index(y_true, y_pred):
    y_pred_classes = tf.argmax(y_pred, axis=-1)
    y_true_one_hot = tf.one_hot(tf.cast(y_true, tf.int32), depth=tf.shape(y_pred)[-1])
    y_pred_one_hot = tf.one_hot(y_pred_classes, depth=tf.shape(y_pred)[-1])
    intersection = tf.reduce_sum(y_true_one_hot * y_pred_one_hot, axis=[1, 2])
    union = tf.reduce_sum(y_true_one_hot + y_pred_one_hot, axis=[1, 2]) - intersection
    return tf.reduce_mean(intersection / (union + tf.keras.backend.epsilon()))

# Precision
def precision(y_true, y_pred):
    y_pred_classes = tf.argmax(y_pred, axis=-1)
    y_true_one_hot = tf.one_hot(tf.cast(y_true, tf.int32), depth=tf.shape(y_pred)[-1])
    y_pred_one_hot = tf.one_hot(y_pred_classes, depth=tf.shape(y_pred)[-1])
    true_positives = tf.reduce_sum(y_true_one_hot * y_pred_one_hot, axis=[1, 2])
    predicted_positives = tf.reduce_sum(y_pred_one_hot, axis=[1, 2])
    return tf.reduce_mean(true_positives / (predicted_positives + tf.keras.backend.epsilon()))

# Recall
def recall(y_true, y_pred):
    y_pred_classes = tf.argmax(y_pred, axis=-1)
    y_true_one_hot = tf.one_hot(tf.cast(y_true, tf.int32), depth=tf.shape(y_pred)[-1])
    y_pred_one_hot = tf.one_hot(y_pred_classes, depth=tf.shape(y_pred)[-1])
    true_positives = tf.reduce_sum(y_true_one_hot * y_pred_one_hot, axis=[1, 2])
    actual_positives = tf.reduce_sum(y_true_one_hot, axis=[1, 2])
    return tf.reduce_mean(true_positives / (actual_positives + tf.keras.backend.epsilon()))



model = unet_model(input_size=(256, 256, 3), num_classes=8)
# Compile with updated metrics
model.compile(
    optimizer=Adam(),
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy', rand_index, jaccard_index, precision, recall]
)

model2 = unet_model(input_size=(256, 256, 3), num_classes=8)
# Compile with updated metrics
model2.compile(
    optimizer=SGD(),
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy', rand_index, jaccard_index, precision, recall]
)



In [None]:
def get_image_paths(directory, extensions=[".jpg", ".png"]):
    return [os.path.join(directory, filename) for filename in os.listdir(directory) if any(filename.endswith(ext) for ext in extensions)]

# Get file paths for images and masks
train_image_paths = get_image_paths(r"/kaggle/working/IP-25/train_data/Images")
train_mask_paths = get_image_paths(r"/kaggle/working/IP-25/train_data/Labels")
val_image_paths = get_image_paths(r"/kaggle/working/IP-25/val_data/Images")
val_mask_paths = get_image_paths(r"/kaggle/working/IP-25/val_data/Labels")

In [None]:
train_images = load_images(train_image_paths)
train_masks = load_masks(train_mask_paths)
val_images = load_images(val_image_paths)
val_masks = load_masks(val_mask_paths)

In [None]:
history = model.fit(
    train_images,
    train_masks,
    validation_data=(val_images, val_masks),
    epochs=120,
    batch_size=32
)

history2 = model2.fit(
    train_images,
    train_masks,
    validation_data=(val_images, val_masks),
    epochs=120,
    batch_size=32
)

In [None]:
def load_images_from_directory(directory_path, target_size=(256, 256), brightness_factor=0.5, blur_kernel_size=(5, 5)):
    image_paths = [os.path.join(directory_path, fname) for fname in os.listdir(directory_path) if fname.endswith('.png')]  # Assuming images are PNG files
    images = []

    for img_path in image_paths:
        # Load the image and resize it
        img = load_img(img_path, target_size=target_size)

        # Convert to array and normalize
        img = img_to_array(img) / 255.0

        # Adjust brightness (lower the brightness by the given factor)
        pil_img = Image.fromarray((img * 255).astype(np.uint8))  # Convert back to PIL for brightness enhancement
        enhancer = ImageEnhance.Brightness(pil_img)
        pil_img = enhancer.enhance(brightness_factor)

        # Convert back to array
        img = np.array(pil_img) / 255.0

        # Apply Gaussian blur using OpenCV
        img = cv2.GaussianBlur(img, blur_kernel_size, 0)

        # Append the processed image
        images.append(img)

    return np.array(images), image_paths  # Return both images and their paths
# Load images with preprocessing
# def load_images_from_directory(directory_path, target_size=(256, 256)):
#     images = []
#     for img_path in image_paths:
#         # Load image
#         img = load_img(img_path, target_size=target_size)
#         img = img_to_array(img) / 255.0  # Normalize images to [0, 1]

#         # Convert to uint8 for OpenCV processing
#         img = (img * 255).astype(np.uint8)

#         # Decrease brightness
#         img = cv2.convertScaleAbs(img, alpha=0.8, beta=0)  # Reduce brightness by 20%

#         # Apply Gaussian blur
#         img = cv2.GaussianBlur(img, (5, 5), 0)

#         # Normalize back to [0, 1] after processing
#         img = img.astype(np.float32) / 255.0

#         images.append(img)
#     return np.array(images)



def visualize_prediction(image, predicted_mask):
    plt.figure(figsize=(12, 8))
    plt.subplot(1, 2, 1)
    plt.title('Input Image')
    plt.imshow(image)

    plt.subplot(1, 2, 2)
    plt.title('Predicted Mask')
    plt.imshow(predicted_mask)

    plt.show()

# Directory paths for test images
test_images_dir = r"/kaggle/working/IP-25/test_data/Images"
# Optionally, if masks are needed later, you can add the path for ground truth masks
# test_masks_dir = r"/kaggle/working/IP-25/test_data/Masks/"

# Load test images
test_images, image_paths = load_images_from_directory(test_images_dir)

# Predict masks for the test images
predictions = model.predict(test_images)

# Visualize the predictions
for i in range(len(test_images)):
    # For visualization, only predicted masks are needed
    predicted_mask = np.argmax(predictions[i], axis=-1)  # Convert to class labels if needed

    # Visualize the input image and the predicted mask
    visualize_prediction(test_images[i], predicted_mask)

In [None]:
def decode_mask(mask):
    color_map = {
        0: [0, 0, 0],  # Background clutter
        1: [128, 0, 0],  # Building
        2: [128, 64, 128],  # Road
        3: [0, 128, 0],  # Tree
        4: [128, 128, 0],  # Low vegetation
        5: [64, 0, 128],  # Moving car
        6: [192, 0, 192],  # Static car
        7: [64, 64, 0]  # Human
    }
    rgb_mask = np.zeros((mask.shape[0], mask.shape[1], 3), dtype=np.uint8)
    for i in range(mask.shape[0]):
        for j in range(mask.shape[1]):
            rgb_mask[i, j] = color_map[mask[i, j]]
    return rgb_mask
