In [1]:
import os
import numpy as np
from PIL import Image
from sklearn.model_selection import train_test_split

img_dir = 'D:\Cityscapes\leftImg8bit_trainvaltest/train'
mask_dir = 'D:\Cityscapes\gtFine_trainvaltest/train'

def remap_labels(mask):
    # Remap any label '35' to a valid range, here assuming '35' can be mapped to '0' (background)
    return np.where(mask == 35, 0, mask)

def load_images_and_masks(img_dir, mask_dir):
    images = []
    masks = []

    cities = os.listdir(img_dir)
    for city in cities:
        img_path = os.path.join(img_dir, city)
        mask_path = os.path.join(mask_dir, city)
        img_files = os.listdir(img_path)
        
        for file in img_files:
            if file.endswith('_leftImg8bit.png'):
                mask_file = file.replace('_leftImg8bit.png', '_gtFine_labelIds.png')
                
                img = Image.open(os.path.join(img_path, file))
                mask = Image.open(os.path.join(mask_path, mask_file))
                
                img = img.resize((256, 256))
                mask = mask.resize((256, 256), resample=Image.NEAREST)  # Use NEAREST to avoid introducing new labels
                
                mask_array = np.array(mask)
                mask_array = remap_labels(mask_array)  # Remap labels

                images.append(np.array(img))
                masks.append(mask_array)

    return np.array(images), np.array(masks)

images, masks = load_images_and_masks(img_dir, mask_dir)

# Split dataset
train_imgs, val_imgs, train_masks, val_masks = train_test_split(images, masks, test_size=0.2, random_state=42)


In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Conv2D, Concatenate, UpSampling2D
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

# Model parameters
input_size = (256, 256, 3)

# Encoder: MobileNetV2
base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=input_size)

# Use layers from MobileNetV2 as encoder
layer_names = [
    'block_1_expand_relu',   # 64x64
    'block_3_expand_relu',   # 32x32
    'block_6_expand_relu',   # 16x16
    'block_13_expand_relu',  # 8x8
    'block_16_project',      # 4x4
]
layers = [base_model.get_layer(name).output for name in layer_names]

# Create the feature extraction model
down_stack = Model(inputs=base_model.input, outputs=layers)
down_stack.trainable = False  # Freeze the encoder

# Decoder/upsampling
def upsample(filters, size):
    return tf.keras.Sequential([
        UpSampling2D(size),
        Conv2D(filters, 3, padding='same', activation='relu')
    ])

# Update the up_stack to ensure matching dimensions
up_stack = [
    upsample(512, 2),  # 4x4 -> 8x8
    upsample(256, 2),  # 8x8 -> 16x16
    upsample(128, 2),  # 16x16 -> 32x32
    upsample(64, 2),   # 32x32 -> 64x64
    upsample(32, 2)    # 64x64 -> 128x128
]

inputs = Input(shape=input_size)
x = inputs

# Downsampling through the model
skips = down_stack(x)
x = skips[-1]
skips = reversed(list(skips[:-1]))

# Upsampling and establishing the skip connections
for up, skip in zip(up_stack, skips):
    x = up(x)
    x = Concatenate()([x, skip])

# Upsample once more to match input dimensions
x = UpSampling2D((2, 2))(x)

# This layer changes depending on your number of classes; for Cityscapes, it's typically 34 classes.
last = Conv2D(35, 3, padding='same')
x = last(x)

# Create the model
model = Model(inputs=inputs, outputs=x)

# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Callbacks
checkpoint = ModelCheckpoint('best_model.keras', verbose=1, save_best_only=True)
early_stopping = EarlyStopping(patience=10, verbose=1)

# Start training
history = model.fit(train_imgs, train_masks, epochs=20, validation_data=(val_imgs, val_masks), callbacks=[checkpoint, early_stopping])
