In [None]:
import numpy as np 
import pandas as pd
import os
import tensorflow as tf
from glob import glob
import cv2
from keras.layers import Conv2D, BatchNormalization, Activation, MaxPool2D, Conv2DTranspose, Concatenate, Input
from tensorflow.keras import layers
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau, CSVLogger
from keras.models import Model
import tensorflow.keras.backend as K
from keras.layers import Cropping2D, Dropout
from keras.layers import GlobalAveragePooling2D, GlobalMaxPooling2D, Reshape, Dense, multiply, Permute, Add, Activation, Lambda
from tqdm import tqdm
import time
import warnings
warnings.filterwarnings("ignore")

In [None]:
# Set parameters
batch_size = 5
lr = 1e-3
epochs = 100
width = 512
height = 512

In [None]:
dataset_path = os.path.join("/kaggle/input/uavid-v1")
files_dir = os.path.join("files", "modified_uavid_dataset")
model_file = os.path.join(files_dir, "UnetModel.keras")
log_file = os.path.join(files_dir, "Log-Unet.csv")

# Function to create directory
def create_dir(path):
    if not os.path.exists(path):
        os.makedirs(path)
        
create_dir(files_dir)

In [None]:
def load_data(path):
    train_x = sorted(glob(os.path.join(path, "uavid_train","seq1", "Images", "*")))
    train_y = sorted(glob(os.path.join(path, "uavid_train","seq1", "Labels", "*")))

    valid_x = sorted(glob(os.path.join(path, "uavid_val", "seq16", "Images", "*")))
    valid_y = sorted(glob(os.path.join(path, "uavid_val", "seq16", "Labels", "*")))

    return (train_x, train_y), (valid_x, valid_y)

In [None]:
def read_image(path):
    path = path.decode()
    x = cv2.imread(path, cv2.IMREAD_COLOR)
    x = cv2.resize(x, (width, height))
    x = x / 255.0
    x = x.astype(np.float32)
    return x

In [None]:
color_map = {
    (0, 0, 0): 0,         # Background clutter
    (128, 0, 0): 1,       # Building
    (128, 64, 128): 2,    # Road
    (0, 128, 0): 3,       # Tree
    (128, 128, 0): 4,     # Low vegetation
    (64, 0, 128): 5,      # Moving car
    (192, 0, 192): 6,     # Static car
    (64, 64, 0): 7        # Human
}

def read_mask(path):
    path = path.decode()
    mask = cv2.imread(path)  # Read as a color image (BGR format)
    mask = cv2.cvtColor(mask, cv2.COLOR_BGR2RGB)  # Convert BGR to RGB
    mask = cv2.resize(mask, (width, height), interpolation=cv2.INTER_NEAREST)

    # Create a blank mask to hold class indices
    class_indices = np.zeros((height, width), dtype=np.uint8)

    # Map each RGB value to the corresponding class index
    for rgb, idx in color_map.items():
        class_indices[(mask == rgb).all(axis=-1)] = idx

    return class_indices.astype(np.uint8) 

In [None]:
def tf_parse(x, y):
    def _parse(x, y):
        x = read_image(x)  # Image as float32
        y = read_mask(y)   # Mask as class indices
        return x, y
    
    x, y = tf.numpy_function(_parse, [x, y], [tf.float32, tf.uint8]) 
    x.set_shape([height, width, 3]) 
    y.set_shape([height, width])   
    return x, y

In [None]:
def tf_dataset(x, y, batch=10):
    dataset = tf.data.Dataset.from_tensor_slices((x, y))
    dataset = dataset.map(tf_parse, num_parallel_calls=tf.data.AUTOTUNE)
    dataset = dataset.batch(batch)
    dataset = dataset.prefetch(tf.data.AUTOTUNE)
    return dataset

In [None]:
from sklearn.model_selection import train_test_split

# Load dataset
(train_x, train_y), (valid_x, valid_y) = load_data(dataset_path)

print(f"Train: {len(train_x)} - {len(train_y)}")
print(f"Valid: {len(valid_x)} - {len(valid_y)}")

# Split training set to get 500 for training and 100 for validation
train_x, additional_valid_x, train_y, additional_valid_y = train_test_split(
    train_x, train_y, test_size=100, random_state=42
)

# Combine 100 images from the training set with the existing validation set
valid_x = np.concatenate([valid_x, additional_valid_x], axis=0)
valid_y = np.concatenate([valid_y, additional_valid_y], axis=0)


print(f"New Train: {len(train_x)} - {len(train_y)}")
print(f"New Valid: {len(valid_x)} - {len(valid_y)}")


train_dataset = tf_dataset(train_x, train_y, batch=batch_size)
valid_dataset = tf_dataset(valid_x, valid_y, batch=batch_size)

In [None]:
# Channel Attention Function
def channel_attention(x, channel, reduction=16):
    # Global Average Pooling
    avg_pool = layers.GlobalAveragePooling2D()(x)
    fc1 = layers.Dense(channel // reduction, activation='relu')(avg_pool)
    fc2 = layers.Dense(channel)(fc1)
    attention = Activation('sigmoid')(fc2)
    attention = layers.Reshape((1, 1, channel))(attention)  # Use Keras Reshape
    refined = x * attention
    return refined

# Spatial Attention Function
def spatial_attention(x, channel, reduction=16, dilation_conv_num=2):
    spatial = layers.Conv2D(channel // reduction, kernel_size=1, activation='relu')(x)
    for dilation in range(1, dilation_conv_num + 1):
        spatial = layers.Conv2D(channel // reduction, kernel_size=3, padding='same', dilation_rate=dilation, activation='relu')(spatial)
    spatial = layers.Conv2D(1, kernel_size=1)(spatial)
    attention = Activation('sigmoid')(spatial)
    refined = x * attention
    return refined

# BAM Function: Combine Channel and Spatial Attention
def BAM(x, reduction=16, dilation_conv_num=2):
    channel = x.shape[-1]
    # Apply Channel Attention
    channel_refined = channel_attention(x, channel, reduction)
    
    # Apply Spatial Attention on the result of Channel Attention
    spatial_refined = spatial_attention(channel_refined, channel, reduction, dilation_conv_num)
    
    # Combine the attention features: input * spatial_attention + input * channel_attention
    refined_feature = x * spatial_refined + x * channel_refined
    return refined_feature


In [None]:
def conv_block(inputs, num_filters):
    x = Conv2D(num_filters, (3, 3), padding="same")(inputs)
    x = BatchNormalization()(x)
    x = Activation("relu")(x)
    x = Conv2D(num_filters, (3, 3), padding="same")(x)
    x = BatchNormalization()(x)
    x = Activation("relu")(x)
    return x

In [None]:
def encoder_block(inputs, num_filters):
    x = conv_block(inputs, num_filters)
    x = BAM(x, num_filters)  # Apply BAM
    p = MaxPool2D((2, 2))(x)
    return x, p

In [None]:
def decoder_block(inputs, skip, num_filters):
    x = Conv2DTranspose(num_filters, (2, 2), strides=2, padding="same")(inputs)
    
    crop_height = max(0, x.shape[1] - skip.shape[1])
    crop_width = max(0, x.shape[2] - skip.shape[2])
    if crop_height > 0 or crop_width > 0:
        skip = Cropping2D(((crop_height // 2, crop_height // 2), (crop_width // 2, crop_width // 2)))(skip)
    
    x = Concatenate()([x, skip])
    x = conv_block(x, num_filters)
    x = BAM(x, num_filters)  # Apply BAM
    return x

In [None]:
def build_unet(input_shape):
    inputs = Input(input_shape)

    s1, p1 = encoder_block(inputs, 64)
    s2, p2 = encoder_block(p1, 128)
    s3, p3 = encoder_block(p2, 256)
    s4, p4 = encoder_block(p3, 512)

    b1 = conv_block(p4, 1024)
    b1 = conv_block(b1, 1024)

    d1 = decoder_block(b1, s4, 512)
    d2 = decoder_block(d1, s3, 256)
    d3 = decoder_block(d2, s2, 128)
    d4 = decoder_block(d3, s1, 64)

    outputs = Conv2D(8, (2, 2), padding="same", activation="softmax")(d4)
    model = Model(inputs, outputs, name='UNET_BAM')
    return model

In [None]:
input_shape = (height, width, 3)
model = build_unet(input_shape)

In [None]:
model.summary()

In [None]:
opt = tf.keras.optimizers.Adam(lr)
model.compile(loss='sparse_categorical_crossentropy', optimizer=opt, metrics=["sparse_categorical_accuracy"])

In [None]:
callbacks = [
    ModelCheckpoint(model_file, verbose=1, save_best_only=True),
    ReduceLROnPlateau(monitor="val_loss", mode='auto', factor=0.1, patience=4),
    CSVLogger(log_file),
    EarlyStopping(monitor="val_loss", patience=10, restore_best_weights=True)
]

In [None]:
model = model.fit(
    train_dataset,
    validation_data=valid_dataset,
    epochs=epochs,
    callbacks=callbacks,
    verbose=1
)

In [None]:
datasetpath = "/kaggle/input/uavid-v1/uavid_test"
save_path = os.path.join("Prediction", "modified_uavid_dataset")
model_file = "/kaggle/working/files/modified_uavid_dataset/UnetModel.keras"
create_dir(save_path)

In [None]:

custom_objects = {
    "custom_BAM": BAM,
}
model = tf.keras.models.load_model(model_file, custom_objects=custom_objects)

In [None]:
import matplotlib.pyplot as plt
import os
# Load the CSV log file
log_file = os.path.join(files_dir, "Log-Unet.csv")
log_data = pd.read_csv(log_file)

# Check available columns in the CSV
print(log_data.columns)

# Plot Training and Validation Loss
plt.figure(figsize=(6, 6))
plt.plot(log_data['loss'], label='Training Loss')
plt.plot(log_data['val_loss'], label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training and Validation Loss')
plt.legend()
# Save the Loss plot
loss_plot_file_path = os.path.join(files_dir, 'training_validation_loss.png')
plt.savefig(loss_plot_file_path) # Close the figure to free memory
plt.show()

# Plot Training and Validation Accuracy
plt.figure(figsize=(6, 6))
plt.plot(log_data['sparse_categorical_accuracy'], label='Training Accuracy')
plt.plot(log_data['val_sparse_categorical_accuracy'], label='Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Training and Validation Accuracy')
plt.legend()
# Save the Accuracy plot
accuracy_plot_file_path = os.path.join(files_dir, 'training_validation_accuracy.png')
plt.savefig(accuracy_plot_file_path)  # Close the figure to free memory
plt.show()

In [None]:
test_x = sorted(glob(os.path.join(dataset_path, "uavid_test", "*", "Images", "*")))
print(f"Test: {len(test_x)}")

In [None]:
import matplotlib.colors as mcolors

# Define image dimensions
width = 512
height = 512

# Define RGB to class index mapping
rgb_to_class = {
    (0, 0, 0): 0,         # Background clutter
    (128, 0, 0): 1,       # Building (Red)
    (128, 64, 128): 2,    # Road (Purple)
    (0, 128, 0): 3,       # Tree (Green)
    (128, 128, 0): 4,     # Low vegetation (Yellow)
    (64, 0, 128): 5,      # Moving car (Purple-blue)
    (192, 0, 192): 6,     # Static car (Pink)
    (64, 64, 0): 7        # Human (Dark Yellow)
}

# Reverse mapping to go from class indices to RGB
class_to_rgb = {v: k for k, v in rgb_to_class.items()}

# Normalize RGB values for use in the colormap
class_colors = {k: tuple(v/255.0 for v in rgb) for k, rgb in class_to_rgb.items()}
colors = np.array([class_colors[i] for i in sorted(class_colors.keys())])
cmap = mcolors.ListedColormap(colors)
norm = mcolors.BoundaryNorm(boundaries=np.arange(len(class_colors)+1) - 0.5, ncolors=len(class_colors))

# Function to map class indices back to RGB colors
def map_class_to_rgb(class_mask):
    rgb_mask = np.zeros((class_mask.shape[0], class_mask.shape[1], 3), dtype=np.uint8)
    for class_index, rgb in class_to_rgb.items():
        rgb_mask[class_mask == class_index] = rgb
    return rgb_mask

# Prediction code
plt.figure(figsize=(15, 10))  # Adjust size to fit 4 images comfortably

# Take a batch of images and masks from the validation dataset
batch = next(iter(valid_dataset))  # Fetch the next batch from the dataset
batch_x, batch_y = batch

# Ensure we process all images in the batch
num_images = batch_x.shape[0]

for i in range(num_images):
    # Extract images and masks
    image = batch_x[i].numpy()
    mask = batch_y[i].numpy()

    # Model prediction
    prediction = model.predict(np.expand_dims(image, axis=0))[0]  # Predict for a single image
    predicted_class_indices = np.argmax(prediction, axis=-1)  # Convert softmax output to class indices

    # Map predicted class indices to RGB
    predicted_mask_rgb = map_class_to_rgb(predicted_class_indices)

    # Load the original label image for comparison
    original_label_path = valid_y[i]  # Make sure `valid_y` has the correct paths
    original_label = cv2.imread(original_label_path, cv2.IMREAD_COLOR)
    original_label = cv2.cvtColor(original_label, cv2.COLOR_BGR2RGB)
    original_label = cv2.resize(original_label, (width, height)) / 255.0

    # Plot the images
    plt.subplot(num_images, 3, 3*i + 1)
    plt.imshow(image)
    plt.title(f"Input Image {i+1}")
    plt.axis("off")

    plt.subplot(num_images, 3, 3*i + 2)
    plt.imshow(original_label)
    plt.title(f"Original Label {i+1}")
    plt.axis("off")

    plt.subplot(num_images, 3, 3*i + 3)
    plt.imshow(predicted_mask_rgb)
    plt.title(f"Predicted Mask {i+1}")
    plt.axis("off")

plt.tight_layout()
plt.show()

In [None]:
time_taken = []
for x in tqdm(test_x):
    # Extract sequence folder and image name
    seq_folder = x.split("/")[-3]
    image_name = x.split("/")[-1]
    
    # Read and preprocess the image
    x = cv2.imread(x, cv2.IMREAD_COLOR)
    x = cv2.resize(x, (width, height))
    x = x / 255.0
    x = np.expand_dims(x, axis=0)

    # Predict and measure time
    start_time = time.time()
    p = model.predict(x)[0]  # Remove batch dimension
    total_time = time.time() - start_time
    time_taken.append(total_time)

    # Convert softmax output to class indices
    p_class_indices = np.argmax(p, axis=-1)  # Convert to a single-channel mask of class indices
    
    # Map class indices back to RGB colors using the color map
    p_rgb = np.zeros((p_class_indices.shape[0], p_class_indices.shape[1], 3), dtype=np.uint8)
    
    # Correctly map class indices to RGB colors
    for rgb, idx in color_map.items():
        p_rgb[p_class_indices == idx] = rgb  # Map class index to corresponding RGB value
    
    # Ensure it's in RGB before saving
    p_rgb = cv2.cvtColor(p_rgb, cv2.COLOR_RGB2BGR)

    # Save the mask as an RGB image, including the sequence folder in the name
    save_path_with_name = os.path.join(save_path, f"{seq_folder}_{image_name}")
    cv2.imwrite(save_path_with_name, p_rgb)

In [None]:
mean_time = np.mean(time_taken)
print(f"Mean time taken: {mean_time}")
mean_fps = 1/mean_time
print(f"Mean FPS: {mean_fps}")

In [None]:
!zip -r file.zip /kaggle/working
from IPython.display import FileLink
FileLink(r'file.zip')