In [None]:
import os
from glob import glob
import numpy as np
import cv2
import tensorflow as tf
from tensorflow.keras.layers import Conv2D, BatchNormalization, Activation, Add, GlobalAveragePooling2D, Dense, MaxPooling2D, Input, UpSampling2D
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau, CSVLogger
import matplotlib.pyplot as plt
from tqdm import tqdm
import time
import matplotlib.colors as mcolors
import pandas as pd
from keras.models import Model
from keras.layers import Input, Conv2D, BatchNormalization, Activation, MaxPooling2D, UpSampling2D, Concatenate, Reshape

In [None]:
batch_size = 5
lr = 1e-3
epochs = 100
width = 512
height = 512

In [None]:
dataset_path = os.path.join("/kaggle/input/uavid-v1")
files_dir = os.path.join("files", "modified_uavid_dataset")
model_file = os.path.join(files_dir, "UnetModel.keras")
log_file = os.path.join(files_dir, "Log-Unet.csv")

# Function to create directory
def create_dir(path):
    if not os.path.exists(path):
        os.makedirs(path)
        
create_dir(files_dir)

In [None]:
def load_data(path):
    train_x = sorted(glob(os.path.join(path, "uavid_train","seq1", "Images", "*")))
    train_y = sorted(glob(os.path.join(path, "uavid_train","seq1", "Labels", "*")))

    valid_x = sorted(glob(os.path.join(path, "uavid_val", "seq16", "Images", "*")))
    valid_y = sorted(glob(os.path.join(path, "uavid_val", "seq16", "Labels", "*")))

    return (train_x, train_y), (valid_x, valid_y)

In [None]:
(train_x, train_y), (valid_x, valid_y) = load_data(dataset_path)

train_x, additional_valid_x, train_y, additional_valid_y = train_test_split(
    train_x, train_y, test_size=100, random_state=42
)

valid_x = np.concatenate([valid_x, additional_valid_x], axis=0)
valid_y = np.concatenate([valid_y, additional_valid_y], axis=0)


print(f"New Train: {len(train_x)} - {len(train_y)}")
print(f"New Valid: {len(valid_x)} - {len(valid_y)}")

In [None]:
def read_image(path):
    path = path.decode()
    x = cv2.imread(path, cv2.IMREAD_COLOR)
    x = cv2.resize(x, (width, height))
    x = x / 255.0
    x = x.astype(np.float32)
    return x

In [None]:
color_map = {
    (0, 0, 0): 0,         # Background clutter
    (128, 0, 0): 1,       # Building
    (128, 64, 128): 2,    # Road
    (0, 128, 0): 3,       # Tree
    (128, 128, 0): 4,     # Low vegetation
    (64, 0, 128): 5,      # Moving car
    (192, 0, 192): 6,     # Static car
    (64, 64, 0): 7        # Human
}

def read_mask(path):
    path = path.decode()
    mask = cv2.imread(path)  
    mask = cv2.cvtColor(mask, cv2.COLOR_BGR2RGB) 
    mask = cv2.resize(mask, (width, height), interpolation=cv2.INTER_NEAREST)

    class_indices = np.zeros((height, width), dtype=np.uint8)

    for rgb, idx in color_map.items():
        class_indices[(mask == rgb).all(axis=-1)] = idx

    return class_indices.astype(np.uint8) 

In [None]:
def tf_parse(x, y):
    def _parse(x, y):
        x = read_image(x) 
        y = read_mask(y) 
        return x, y
    
    x, y = tf.numpy_function(_parse, [x, y], [tf.float32, tf.uint8]) 
    x.set_shape([height, width, 3]) 
    y.set_shape([height, width])   
    return x, y

In [None]:
def tf_dataset(x, y, batch=6):
    dataset = tf.data.Dataset.from_tensor_slices((x, y))
    dataset = dataset.map(tf_parse, num_parallel_calls=tf.data.AUTOTUNE)
    dataset = dataset.batch(batch)
    dataset = dataset.prefetch(tf.data.AUTOTUNE)
    return dataset

In [None]:
train_dataset = tf_dataset(train_x, train_y, batch=batch_size)
valid_dataset = tf_dataset(valid_x, valid_y, batch=batch_size)

In [None]:
class SelfAttention(layers.Layer):
    def __init__(self, embed_dim):
        super(SelfAttention, self).__init__()
        self.query_dense = layers.Conv2D(embed_dim, kernel_size=1)
        self.key_dense = layers.Conv2D(embed_dim, kernel_size=1)
        self.value_dense = layers.Conv2D(embed_dim, kernel_size=1)
        self.softmax = layers.Softmax(axis=-1)

    def call(self, inputs):
        batch_size = tf.shape(inputs)[0]
        height = tf.shape(inputs)[1]
        width = tf.shape(inputs)[2]
        channels = inputs.shape[-1]  # Keep channels as a static dimension to avoid shape issues

        # Compute Q, K, V matrices
        query = self.query_dense(inputs)
        key = self.key_dense(inputs)
        value = self.value_dense(inputs)

        # Reshape for attention calculation
        query_flattened = tf.reshape(query, (batch_size, height * width, -1))
        key_flattened = tf.reshape(key, (batch_size, height * width, -1))
        value_flattened = tf.reshape(value, (batch_size, height * width, -1))

        # Attention Scores (scaled dot-product): Q * K^T / sqrt(d_k)
        score = tf.matmul(query_flattened, key_flattened, transpose_b=True)
        score = score / tf.math.sqrt(tf.cast(tf.shape(key_flattened)[-1], tf.float32))

        # Apply softmax to get attention weights
        attention_weights = self.softmax(score)

        # Multiply attention weights with values
        attention_output = tf.matmul(attention_weights, value_flattened)

        # Reshape back to original input shape
        attention_output = tf.reshape(attention_output, (batch_size, height, width, inputs.shape[-1]))
    
        # Combine input with attention output
        output = inputs + attention_output
        return output

In [None]:
def SegUNet(input_shape, n_labels, kernel=3, pool_size=(2, 2), output_mode="softmax"):
    inputs = Input(shape=input_shape)
    
    # Encoder
    conv1 = Conv2D(32, (kernel, kernel), padding="same")(inputs)
    conv1 = BatchNormalization()(conv1)
    conv1 = Activation("relu")(conv1)
    conv2 = Conv2D(32, (kernel, kernel), padding="same")(conv1)
    conv2 = BatchNormalization()(conv2)
    conv2 = Activation("relu")(conv2)
    pool1 = MaxPooling2D(pool_size)(conv2)
    
    conv3 = Conv2D(64, (kernel, kernel), padding="same")(pool1)
    conv3 = BatchNormalization()(conv3)
    conv3 = Activation("relu")(conv3)
    conv4 = Conv2D(64, (kernel, kernel), padding="same")(conv3)
    conv4 = BatchNormalization()(conv4)
    conv4 = Activation("relu")(conv4)
    pool2 = MaxPooling2D(pool_size)(conv4)
    
    conv5 = Conv2D(128, (kernel, kernel), padding="same")(pool2)
    conv5 = BatchNormalization()(conv5)
    conv5 = Activation("relu")(conv5)
    conv6 = Conv2D(128, (kernel, kernel), padding="same")(conv5)
    conv6 = BatchNormalization()(conv6)
    conv6 = Activation("relu")(conv6)
    pool3 = MaxPooling2D(pool_size)(conv6)
    
    conv7 = Conv2D(256, (kernel, kernel), padding="same")(pool3)
    conv7 = BatchNormalization()(conv7)
    conv7 = Activation("relu")(conv7)
    # Apply SAM at this bottleneck layer
    conv7 = SelfAttention(embed_dim=256)(conv7)
    
    pool4 = MaxPooling2D(pool_size)(conv7)
    
    # Bottleneck
    conv8 = Conv2D(512, (kernel, kernel), padding="same")(pool4)
    conv8 = BatchNormalization()(conv8)
    conv8 = Activation("relu")(conv8)
    conv9 = Conv2D(512, (kernel, kernel), padding="same")(conv8)
    conv9 = BatchNormalization()(conv9)
    conv9 = Activation("relu")(conv9)
    # Apply SAM at the main bottleneck layer
    conv9 = SelfAttention(embed_dim=512)(conv9)
    
    # Decoder
    up1 = UpSampling2D(pool_size)(conv9)
    concat1 = Concatenate()([up1, conv7])
    
    conv10 = Conv2D(256, (kernel, kernel), padding="same")(concat1)
    conv10 = BatchNormalization()(conv10)
    conv10 = Activation("relu")(conv10)
    conv11 = Conv2D(256, (kernel, kernel), padding="same")(conv10)
    conv11 = BatchNormalization()(conv11)
    conv11 = Activation("relu")(conv11)
    # Apply SAM in the decoder bottleneck
    conv11 = SelfAttention(embed_dim=256)(conv11)
    
    up2 = UpSampling2D(pool_size)(conv11)
    concat2 = Concatenate()([up2, conv6])
    
    conv12 = Conv2D(128, (kernel, kernel), padding="same")(concat2)
    conv12 = BatchNormalization()(conv12)
    conv12 = Activation("relu")(conv12)
    conv13 = Conv2D(128, (kernel, kernel), padding="same")(conv12)
    conv13 = BatchNormalization()(conv13)
    conv13 = Activation("relu")(conv13)
    # Optional: Apply SAM if memory allows
    # conv13 = SelfAttention(embed_dim=128)(conv13)
    
    up3 = UpSampling2D(pool_size)(conv13)
    concat3 = Concatenate()([up3, conv4])
    
    conv14 = Conv2D(64, (kernel, kernel), padding="same")(concat3)
    conv14 = BatchNormalization()(conv14)
    conv14 = Activation("relu")(conv14)
    conv15 = Conv2D(64, (kernel, kernel), padding="same")(conv14)
    conv15 = BatchNormalization()(conv15)
    conv15 = Activation("relu")(conv15)
    # Optional: Apply SAM if memory allows
    # conv15 = SelfAttention(embed_dim=64)(conv15)
    
    up4 = UpSampling2D(pool_size)(conv15)
    concat4 = Concatenate()([up4, conv2])
    
    conv16 = Conv2D(32, (kernel, kernel), padding="same")(concat4)
    conv16 = BatchNormalization()(conv16)
    conv16 = Activation("relu")(conv16)
    conv17 = Conv2D(32, (kernel, kernel), padding="same")(conv16)
    conv17 = BatchNormalization()(conv17)
    conv17 = Activation("relu")(conv17)
    
    outputs = Conv2D(n_labels, (1, 1), activation=output_mode)(conv17)
    
    model = Model(inputs, outputs)
    return model

In [None]:
input_shape = (512, 512, 3)
model = SegUNet(input_shape, 8)

In [None]:
model.summary()

In [None]:
opt = tf.keras.optimizers.Adam(lr)
model.compile(loss='sparse_categorical_crossentropy', optimizer=opt, metrics=["sparse_categorical_accuracy"])

In [None]:
callbacks = [
    ModelCheckpoint(model_file, verbose=1, save_best_only=True),
    ReduceLROnPlateau(monitor="val_loss", mode='auto', factor=0.1, patience=4),
    CSVLogger(log_file),
    EarlyStopping(monitor="val_loss", patience=10, restore_best_weights=True)
]

In [None]:
model = model.fit(
    train_dataset,
    validation_data=valid_dataset,
    epochs=epochs,
    callbacks=callbacks,
    verbose=1,
)

In [None]:
datasetpath = "/kaggle/input/uavid-v1/uavid_test"
save_path = os.path.join("Prediction", "modified_uavid_dataset")
model_file = "/kaggle/working/files/modified_uavid_dataset/UnetModel.keras"
create_dir(save_path)

In [None]:
@tf.keras.utils.register_keras_serializable(package='Custom', name='SelfAttention')

class SelfAttention(layers.Layer):
    def __init__(self, embed_dim, **kwargs):
        super(SelfAttention, self).__init__(**kwargs)  # Accept additional arguments like 'trainable'
        self.query_dense = layers.Conv2D(embed_dim, kernel_size=1)
        self.key_dense = layers.Conv2D(embed_dim, kernel_size=1)
        self.value_dense = layers.Conv2D(embed_dim, kernel_size=1)
        self.softmax = layers.Softmax(axis=-1)

    def call(self, inputs):
        batch_size = tf.shape(inputs)[0]
        height = tf.shape(inputs)[1]
        width = tf.shape(inputs)[2]
        channels = inputs.shape[-1]  # Keep channels as a static dimension to avoid shape issues

        # Compute Q, K, V matrices
        query = self.query_dense(inputs)
        key = self.key_dense(inputs)
        value = self.value_dense(inputs)

        # Reshape for attention calculation
        query_flattened = tf.reshape(query, (batch_size, height * width, -1))
        key_flattened = tf.reshape(key, (batch_size, height * width, -1))
        value_flattened = tf.reshape(value, (batch_size, height * width, -1))

        # Attention Scores (scaled dot-product): Q * K^T / sqrt(d_k)
        score = tf.matmul(query_flattened, key_flattened, transpose_b=True)
        score = score / tf.math.sqrt(tf.cast(tf.shape(key_flattened)[-1], tf.float32))

        # Apply softmax to get attention weights
        attention_weights = self.softmax(score)

        # Multiply attention weights with values
        attention_output = tf.matmul(attention_weights, value_flattened)

        # Reshape back to original input shape
        attention_output = tf.reshape(attention_output, (batch_size, height, width, inputs.shape[-1]))
    
        # Combine input with attention output
        output = inputs + attention_output
        return output

In [None]:
tf.keras.config.enable_unsafe_deserialization()
custom_objects = {
    "SelfAttention": SelfAttention
}
model = tf.keras.models.load_model(model_file, custom_objects=custom_objects)

In [None]:
import pandas as pd
# Load the CSV log file
log_file = os.path.join(files_dir, "Log-Unet.csv")
log_data = pd.read_csv(log_file)
# Check available columns in the CSV
print(log_data.columns)

# Plot Training and Validation Loss
plt.figure(figsize=(6, 6))
plt.plot(log_data['loss'], label='Training Loss')
plt.plot(log_data['val_loss'], label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training and Validation Loss')
plt.legend()

# Save the Loss plot
loss_plot_file_path = os.path.join(files_dir, 'training_validation_loss.png')
plt.savefig(loss_plot_file_path)
plt.show()

# Plot Training and Validation Accuracy
plt.figure(figsize=(6, 6))
plt.plot(log_data['sparse_categorical_accuracy'], label='Training Accuracy')
plt.plot(log_data['val_sparse_categorical_accuracy'], label='Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Training and Validation Accuracy')
plt.legend()

# Save the Accuracy plot
accuracy_plot_file_path = os.path.join(files_dir, 'training_validation_accuracy.png')
plt.savefig(accuracy_plot_file_path)  # Close the figure to free memory
plt.show()

In [None]:
test_x = sorted(glob(os.path.join(dataset_path, "uavid_test", "*", "Images", "*")))
print(f"Test: {len(test_x)}")

In [None]:
time_taken = []
for x in tqdm(test_x):
    
    seq_folder = x.split("/")[-3]
    image_name = x.split("/")[-1]
    
    x = cv2.imread(x, cv2.IMREAD_COLOR)
    x = cv2.resize(x, (width, height))
    x = x / 255.0
    x = np.expand_dims(x, axis=0)

    start_time = time.time()
    p = model.predict(x)[0] 
    total_time = time.time() - start_time
    time_taken.append(total_time)

    p_class_indices = np.argmax(p, axis=-1)  
    
    p_rgb = np.zeros((p_class_indices.shape[0], p_class_indices.shape[1], 3), dtype=np.uint8)
    
    for rgb, idx in color_map.items():
        p_rgb[p_class_indices == idx] = rgb 
    
    p_rgb = cv2.cvtColor(p_rgb, cv2.COLOR_RGB2BGR)

    save_path_with_name = os.path.join(save_path, f"{seq_folder}_{image_name}")
    cv2.imwrite(save_path_with_name, p_rgb)

In [None]:
class_to_rgb = {v: k for k, v in color_map.items()}

class_colors = {k: tuple(v/255.0 for v in rgb) for k, rgb in class_to_rgb.items()}
colors = np.array([class_colors[i] for i in sorted(class_colors.keys())])
cmap = mcolors.ListedColormap(colors)
norm = mcolors.BoundaryNorm(boundaries=np.arange(len(class_colors)+1) - 0.5, ncolors=len(class_colors))

def map_class_to_rgb(class_mask):
    rgb_mask = np.zeros((class_mask.shape[0], class_mask.shape[1], 3), dtype=np.uint8)
    for class_index, rgb in class_to_rgb.items():
        rgb_mask[class_mask == class_index] = rgb
    return rgb_mask


plt.figure(figsize=(15, 10))  

batch = next(iter(valid_dataset)) 
batch_x, batch_y = batch

num_images = batch_x.shape[0]

for i in range(num_images):

    image = batch_x[i].numpy()
    mask = batch_y[i].numpy()


    prediction = model.predict(np.expand_dims(image, axis=0))[0]  
    predicted_class_indices = np.argmax(prediction, axis=-1) 

    predicted_mask_rgb = map_class_to_rgb(predicted_class_indices)

    original_label_path = valid_y[i] 
    original_label = cv2.imread(original_label_path, cv2.IMREAD_COLOR)
    original_label = cv2.cvtColor(original_label, cv2.COLOR_BGR2RGB)
    original_label = cv2.resize(original_label, (width, height)) / 255.0

    plt.subplot(num_images, 3, 3*i + 1)
    plt.imshow(image)
    plt.title(f"Input Image {i+1}")
    plt.axis("off")

    plt.subplot(num_images, 3, 3*i + 2)
    plt.imshow(original_label)
    plt.title(f"Original Label {i+1}")
    plt.axis("off")

    plt.subplot(num_images, 3, 3*i + 3)
    plt.imshow(predicted_mask_rgb)
    plt.title(f"Predicted Mask {i+1}")
    plt.axis("off")

plt.tight_layout()
plt.show()

In [None]:
!zip -r SAMSEGU.zip /kaggle/working
from IPython.display import FileLink
FileLink(r'SAMSEGU.zip')