In [17]:
import tensorflow as tf
from tensorflow.keras import layers, Model

def build_backbone(input_shape):
    base_model = tf.keras.applications.ResNet50(
        input_shape=input_shape,
        include_top=False,  # We only need the starting blocks not the outputs
        weights='imagenet')
    output = base_model.get_layer('conv4_block6_out').output
    return Model(inputs=base_model.input, outputs=output)

# Pyramid Pooling Module (PPM)
from keras import backend as K

# def pyramid_pooling_module(input_tensor, pool_sizes):
#     input_shape = K.int_shape(input_tensor)[1:3]  # (H, W)
#     pooled_outputs = [input_tensor]

#     for pool_size in pool_sizes:
#         pooled = layers.AveragePooling2D(pool_size)(input_tensor)
#         pooled = layers.Conv2D(512, (1, 1), use_bias=False)(pooled)
#         pooled = layers.BatchNormalization()(pooled)
#         pooled = layers.ReLU()(pooled)
#         pooled = layers.Lambda(lambda x: tf.image.resize(x, input_shape))(pooled)
#         pooled_outputs.append(pooled)

#     return layers.Concatenate()(pooled_outputs)

class CusResize(layers.Layer):
    def __init__(self, input_size, method="bilinear"):
        super(CusResize, self).__init__()
        self.input_size = input_size
        self.method = method

    def call(self, x):
        return tf.image.resize(x, self.input_size, method =self.method)


class PyramidPoolingLayer(layers.Layer):
    def __init__(self, pool_sizes):
        super(PyramidPoolingLayer, self).__init__()
        self.pool_sizes = pool_sizes

    def call(self, x):
        input_height, input_width = tf.shape(x)[1], tf.shape(x)[2]
        pool_outputs = []
        for pool_size in self.pool_sizes:
            pooled = layers.AveragePooling2D(pool_size)(x)
            pooled = layers.Conv2D(512, (1, 1), padding='same')(pooled)
            upsam = layers.UpSampling2D(size=pool_size, interpolation='bilinear')(pooled)
            resized = CusResize([input_height, input_width], method='bilinear')(upsam)
            pool_outputs.append(resized)
        return layers.Concatenate()(pool_outputs)


# PSPNet model
def build_pspnet(input_shape=(1080, 1920, 3), num_classes=40):
    inputs = layers.Input(shape=input_shape)

    # Backbone feature extraction (ResNet)
    backbone = build_backbone(input_shape)
    features = backbone(inputs)
    # print(features, "features")
    
    ppm = PyramidPoolingLayer(pool_sizes=[(1, 1), (2, 2), (4, 4), (8, 8)])(features)
    # print(ppm, "ppm")

    x = layers.Conv2D(512, (3, 3), padding="same", use_bias=False)(ppm)
    x = layers.BatchNormalization()(x)
    x = layers.ReLU()(x)
    # print(x, "after conv")
    
    x = layers.Concatenate()([x, features])
    x = layers.Conv2D(512, (3, 3), padding="same")(x)

    # Upsample to input image size
    x = layers.UpSampling2D(size=(16, 16), interpolation='bilinear')(x)
    # print(x, "cus upsam")
    
    x = CusResize(input_shape[:2])(x)  
    # x = layers.Concatenate()([x, inputs])

    x = layers.Conv2D(num_classes, (1, 1))(x)
    outputs = layers.Softmax()(x)
    
    return Model(inputs, outputs)

input_shape = (1080, 1920, 3) 
num_classes = 40  
pspnet_model = build_pspnet(input_shape, num_classes)

pspnet_model.summary()


<KerasTensor shape=(None, 68, 120, 1024), dtype=float32, sparse=False, name=keras_tensor_2915> features
<KerasTensor shape=(None, 68, 120, 2048), dtype=float32, sparse=False, name=keras_tensor_2916> ppm
<KerasTensor shape=(None, 68, 120, 512), dtype=float32, sparse=False, name=keras_tensor_2919> after conv
<KerasTensor shape=(None, 1088, 1920, 512), dtype=float32, sparse=False, name=keras_tensor_2920> cus upsam
<KerasTensor shape=(None, 1080, 1920, 40), dtype=float32, sparse=False, name=keras_tensor_2922> logits


In [29]:
import tensorflow as tf
import numpy as np
import cv2

CLASS_COLORS = [
    [128, 64, 128], [250, 170, 160], [81, 0, 81], [244, 35, 232], [230, 150, 140],
    [152, 251, 152], [220, 20, 60], [246, 198, 145], [255, 0, 0], [0, 0, 230],
    [119, 11, 32], [255, 204, 54], [0, 0, 142], [0, 0, 70], [0, 60, 100],
    [0, 0, 90], [0, 0, 110], [0, 80, 100], [136, 143, 153], [220, 190, 40],
    [102, 102, 156], [190, 153, 153], [180, 165, 180], [174, 64, 67], [220, 220, 0],
    [250, 170, 30], [153, 153, 153], [153, 153, 153], [169, 187, 214], [70, 70, 70],
    [150, 100, 100], [150, 120, 90], [107, 142, 35], [70, 130, 180], [169, 187, 21],
    [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 142]
]

def rgb_to_class_tensor(rgb_mask):
    if rgb_mask.shape[-1] == 4: # alpha handling
        rgb_mask = rgb_mask[..., :3]

    height, width, _ = rgb_mask.shape
    class_tensor = np.zeros((height, width, len(CLASS_COLORS)), dtype=np.float32)

    for i, color in enumerate(CLASS_COLORS):
        mask = np.all(rgb_mask == color, axis=-1) 
        class_tensor[mask, i] = 1 

    return class_tensor

def class_tensor_to_rgb(class_tensor):
    height, width, num_classes = class_tensor.shape
    rgb_mask = np.zeros((height, width, 3), dtype=np.uint8)
    
    class_indices = np.argmax(class_tensor, axis=-1)
    
    for i, color in enumerate(CLASS_COLORS):
        rgb_mask[class_indices == i] = color
    
    return rgb_mask

def load_dataset(file_path):
    print("""Load dataset from the provided text file.""")
    input_images = []
    masks = []
    
    with open(file_path, 'r') as f:
        # test = 0
        for line in f:
            
            # if test > 10:
            #     break
            # test += 1

            print(line, end=" ")
            parts = line.strip().split()
            if len(parts) == 2: 
                image_path, mask_path = parts
                image = cv2.imread(image_path)
                image = cv2.resize(image, (1920, 1080))
                input_images.append(image)
                
                mask = cv2.imread(mask_path)
                mask = cv2.resize(mask, (1920, 1080))
                class_tensor = rgb_to_class_tensor(mask)
                masks.append(class_tensor)

            elif len(parts) == 1: 
                image_path = parts[0]
                image = cv2.imread(image_path)
                image = cv2.resize(image, (1920, 1080))
                input_images.append(image)
                masks.append(None)

    return np.array(input_images), np.array(masks)


train_images, train_masks = load_dataset('data/training.txt')
val_images, val_masks = load_dataset('data/validation.txt')
test_images, _ = load_dataset('data/testing.txt')


def create_dataset(images, masks, batch_size=32):
    dataset = tf.data.Dataset.from_tensor_slices((images, masks))

    dataset = dataset.shuffle(buffer_size=len(images))
    dataset = dataset.batch(batch_size)
    dataset = dataset.prefetch(tf.data.AUTOTUNE)

    return dataset

def augment_image(img, mask):
    if tf.random.uniform(()) > 0.5:
        img = tf.image.flip_left_right(img)
        mask = tf.image.flip_left_right(mask)
    return img, mask

batch_size = 32 
train_dataset = create_dataset(train_images, train_masks, batch_size)
train_dataset.map(augment_image)
val_dataset = create_dataset(val_images, val_masks, batch_size)


Load dataset from the provided text file.
C:/Users/hp/Documents/code/Py/actual/Ai/inter_boot_2024/own/dataset/train\201\frame0029_leftImg8bit.jpg C:/Users/hp/Documents/code/Py/actual/Ai/inter_boot_2024/own/dataset/labels\201\frame0029_gtFine_labelColors.png
 C:/Users/hp/Documents/code/Py/actual/Ai/inter_boot_2024/own/dataset/train\201\frame0299_leftImg8bit.jpg C:/Users/hp/Documents/code/Py/actual/Ai/inter_boot_2024/own/dataset/labels\201\frame0299_gtFine_labelColors.png
 C:/Users/hp/Documents/code/Py/actual/Ai/inter_boot_2024/own/dataset/train\201\frame0779_leftImg8bit.jpg C:/Users/hp/Documents/code/Py/actual/Ai/inter_boot_2024/own/dataset/labels\201\frame0779_gtFine_labelColors.png
 C:/Users/hp/Documents/code/Py/actual/Ai/inter_boot_2024/own/dataset/train\201\frame2519_leftImg8bit.jpg C:/Users/hp/Documents/code/Py/actual/Ai/inter_boot_2024/own/dataset/labels\201\frame2519_gtFine_labelColors.png
 C:/Users/hp/Documents/code/Py/actual/Ai/inter_boot_2024/own/dataset/train\201\frame2819_le

In [30]:
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-4)
loss = tf.keras.losses.CategoricalCrossentropy()
metrics = [tf.keras.metrics.CategoricalAccuracy(name="accuracy")]

pspnet_model.compile(optimizer=optimizer, loss=loss, metrics=metrics)

epochs = 10

history = pspnet_model.fit(
        train_dataset,
        validation_data=val_dataset,
        epochs=epochs
    )
pspnet_model.save("psp.tf")

Epoch 1/10


: 

In [None]:
def predict_and_save(model, test_images, output_folder="model_output"):
    import os
    os.makedirs(output_folder, exist_ok=True)

    predictions = model.predict(test_images)

    for i, pred in enumerate(predictions):
        rgb_mask = class_tensor_to_rgb(pred)

        output_path = os.path.join(output_folder, f"predicted_mask_{i}.png")
        cv2.imwrite(output_path, rgb_mask)

predict_and_save(pspnet_model, test_images)
