In [None]:
import tensorflow as tf
import numpy as np
import pandas as pd
import os
import cv2
import random
import matplotlib.pyplot as plt

COLOR_TO_CLASS = {
    (230, 25, 75): 0,
    (145, 30, 180): 1,
    (60, 180, 75): 2,
    (245, 130, 48): 3,
    (255, 255, 255): 4,
    (0, 130, 200): 5,
    (255, 0, 255): 6
}

CLASS_TO_COLOR = {
    0: (230, 25, 75),
    1: (145, 30, 180),
    2: (60, 180, 75),
    3: (245, 130, 48),
    4: (255, 255, 255),
    5: (0, 130, 200),
}

IGNORE_COLOR = (255, 0, 255)
NUM_CLASSES = 6
IMPORTANT_CLASSES = [0, 1, 3, 5]
CLASS_NAMES = ['Building', 'Clutter', 'Vegetation', 'Water', 'Background', 'Car']


def compute_keep_prob_refined(row):
    ratios = {cls: row[f"{i}: {cls}"] for i, cls in enumerate(CLASS_NAMES)}

    if ratios['Background'] >= 0.999:
        return 0.0

    presence_flags = np.array([ratios[cls] > 0.01 for cls in CLASS_NAMES if cls != 'Background'], dtype=np.float32)
    num_present = np.sum(presence_flags)
    non_bg_ratios = np.array([ratios[cls] for cls in CLASS_NAMES if cls != 'Background'])
    std_dev = np.std(non_bg_ratios)

    diversity_score = (num_present / 5.0) - (0.5 * std_dev)
    rare_score = (
        5.0 * ratios['Car'] +
        3.5 * ratios['Water'] +
        1.5 * ratios['Building']
    )

    final_score = diversity_score + rare_score
    keep_prob = 1 / (1 + np.exp(-8 * (final_score - 0.15)))
    return keep_prob

def load_image_paths(df, image_dir, elevation_dir, label_dir):
    image_paths = []
    elevation_paths = []
    label_paths = []
    tile_ids = []
    keep_probs = []

    for _, row in df.iterrows():
        tile_id = row['tile_id']
        image_paths.append(os.path.join(image_dir, f"{tile_id}-ortho.png"))
        elevation_paths.append(os.path.join(elevation_dir, f"{tile_id}-elev.npy"))
        label_paths.append(os.path.join(label_dir, f"{tile_id}-label.png"))
        tile_ids.append(tile_id)
        keep_probs.append(np.float32(row['keep_prob']) if 'keep_prob' in row else np.float32(1.0))

    return image_paths, elevation_paths, label_paths, tile_ids, keep_probs

def augment_image(rgb, elev, label):
    label = tf.expand_dims(label, axis=-1) if tf.rank(label) == 2 else label
    if tf.random.uniform([]) > 0.5:
        rgb = tf.image.flip_left_right(rgb)
        elev = tf.image.flip_left_right(elev)
        label = tf.image.flip_left_right(label)
    if tf.random.uniform([]) > 0.5:
        rgb = tf.image.flip_up_down(rgb)
        elev = tf.image.flip_up_down(elev)
        label = tf.image.flip_up_down(label)
    label = tf.squeeze(label, axis=-1)
    return rgb, elev, label

def decode_coloured_label(label_rgb):
    label_rgb = tf.cast(label_rgb, tf.uint8)
    label_flat = tf.reshape(label_rgb, [-1, 3])
    colors = tf.constant(list(COLOR_TO_CLASS.keys()), dtype=tf.uint8)
    class_ids = tf.constant(list(COLOR_TO_CLASS.values()), dtype=tf.int32)
    mask = tf.reduce_all(tf.equal(tf.expand_dims(label_flat, 1), colors), axis=2)
    indices = tf.argmax(tf.cast(mask, tf.int32), axis=1)
    mapped = tf.gather(class_ids, indices)
    return tf.reshape(mapped, [tf.shape(label_rgb)[0], tf.shape(label_rgb)[1]])

import numpy as np
import tensorflow as tf

# Elevation normalisation function
def normalise_elevation(elev, valid_min=-50, valid_max=500, raster_nodata=-32767):
    elev = np.where(elev == raster_nodata, np.nan, elev)
    elev = np.clip(elev, valid_min, valid_max)
    norm_elev = (elev - valid_min) / (valid_max - valid_min)
    norm_elev = np.nan_to_num(norm_elev, nan=0.0)
    return norm_elev.astype(np.float32)

# Wrapper to use inside tf.numpy_function
def _load_npy_elev_and_normalise(path):
    elev = np.load(path.decode("utf-8"))
    return np.expand_dims(normalise_elevation(elev), axis=-1)


# Elevation standardisation
def standardise_elevation(elev, raster_nodata=-32767):
    valid_mask = elev != raster_nodata
    valid_elev = elev[valid_mask]
    mean = valid_elev.mean()
    std = valid_elev.std()

    standardised = np.zeros_like(elev, dtype=np.float32)
    if std > 0:
        standardised[valid_mask] = (valid_elev - mean) / std
    else:
        standardised[valid_mask] = 0.0

    return np.expand_dims(standardised, axis=-1)

# Wrapper for tf.numpy_function
def _load_npy_elev_and_standardise(path):
    elev = np.load(path.decode("utf-8"))
    return standardise_elevation(elev)


def parse_elevation(rgb_path, elev_path, label_path, tile_id, split='train', augment=False, tile_size=256, dummy=False):
    rgb = tf.io.read_file(rgb_path)
    rgb = tf.image.decode_png(rgb, channels=3)
    rgb = tf.image.convert_image_dtype(rgb, tf.float32)

    label = tf.io.read_file(label_path)
    label = tf.image.decode_png(label, channels=3)
    label = decode_coloured_label(label)

    if dummy:
        # Create dummy elevation channel (all zeros)
        elev = tf.zeros_like(rgb[..., :1])
        elev = tf.random.uniform(tf.shape(elev), 0, 1)

    else:
        # Load and standardise elevation normally
        elev = tf.numpy_function(_load_npy_elev_and_standardise, [elev_path], tf.float32)
        elev.set_shape([None, None, 1])
        

    if split == 'train' and augment:
        rgb, elev, label = augment_image(rgb, elev, label)

    label.set_shape([None, None])
    input_image = tf.concat([rgb, elev], axis=-1)
    input_image = tf.image.resize(input_image, [tile_size, tile_size])

    label = tf.image.resize(label[..., tf.newaxis], [tile_size, tile_size], method='nearest')
    label = tf.squeeze(label, axis=-1)
    label = tf.cast(label, tf.int32)
    label_onehot = tf.one_hot(label, depth=NUM_CLASSES)

    return input_image, label_onehot





def _load_npy(path):
    return np.load(path.decode("utf-8")).astype(np.float32)

def parse_elevation(rgb_path, elev_path, slope_path, label_path, tile_id,
                    split='train', augment=False, tile_size=256, dummy=False):
    # Load RGB
    rgb = tf.io.read_file(rgb_path)
    rgb = tf.image.decode_png(rgb, channels=3)
    rgb = tf.image.convert_image_dtype(rgb, tf.float32)

    # Load Label
    label = tf.io.read_file(label_path)
    label = tf.image.decode_png(label, channels=3)
    label = decode_coloured_label(label)

    if dummy:
        elev = tf.random.uniform(tf.shape(rgb)[..., :1])
        slope = tf.random.uniform(tf.shape(rgb)[..., :1])
    else:
        # Load pre-standardised elevation
        elev = tf.numpy_function(_load_npy, [elev_path], tf.float32)
        slope = tf.numpy_function(_load_npy, [slope_path], tf.float32)
        elev.set_shape([None, None, 1])
        slope.set_shape([None, None, 1])

    if split == 'train' and augment:
        rgb, elev, slope, label = augment_image(rgb, elev, slope, label)

    label.set_shape([None, None])
    label = tf.image.resize(label[..., tf.newaxis], [tile_size, tile_size], method='nearest')
    label = tf.squeeze(label, axis=-1)
    label = tf.cast(label, tf.int32)
    label_onehot = tf.one_hot(label, depth=NUM_CLASSES)

    # Resize and concatenate input channels
    rgb = tf.image.resize(rgb, [tile_size, tile_size])
    elev = tf.image.resize(elev, [tile_size, tile_size])
    slope = tf.image.resize(slope, [tile_size, tile_size])
    input_image = tf.concat([rgb, elev, slope], axis=-1)

    return input_image, label_onehot






def parse_tile(rgb_path, label_path, tile_id, split='train', augment=False, tile_size=256):
    rgb = tf.io.read_file(rgb_path)
    rgb = tf.image.decode_png(rgb, channels=3)
    rgb = tf.image.convert_image_dtype(rgb, tf.float32)

    label = tf.io.read_file(label_path)
    label = tf.image.decode_png(label, channels=3)
    label = decode_coloured_label(label)

    if split == 'train' and augment:
        rgb, _, label = augment_image(rgb, tf.zeros_like(rgb[..., :1]), label)

    rgb.set_shape([None, None, 3])
    label.set_shape([None, None])

    rgb = tf.image.resize(rgb, [tile_size, tile_size])
    label = tf.image.resize(label[..., tf.newaxis], [tile_size, tile_size], method='nearest')
    label = tf.squeeze(label, axis=-1)
    label = tf.cast(label, tf.int32)
    label_onehot = tf.one_hot(label, depth=NUM_CLASSES)

    return rgb, label_onehot



def build_tf_dataset(df, image_dir, elevation_dir, label_dir, slope_path,
                     input_type='rgb', batch_size=32, split='train',
                     augment=False, shuffle=True, tile_size=256, dummy=False):

    image_paths, elevation_paths, slope_paths, label_paths, tile_ids, keep_probs = load_image_paths(df, image_dir, elevation_dir, slope_dir, label_dir)
    dataset = tf.data.Dataset.from_tensor_slices((image_paths, elevation_paths, label_paths, tile_ids, keep_probs))

    if shuffle:
        dataset = dataset.shuffle(buffer_size=len(image_paths), reshuffle_each_iteration=(split == 'train'))

    if split == 'train':
        def filter_fn(rgb_path, elev_path, slope_path, label_path, tile_id, keep_prob):
            return tf.random.uniform([]) < keep_prob
        dataset = dataset.filter(filter_fn)

    def map_fn(rgb_path, elev_path, slope_path, label_path, tile_id, keep_prob):
        if input_type == 'rgb':
            return parse_tile(rgb_path, label_path, tile_id, split, augment, tile_size)
        elif input_type == 'rgb_elev':
            return parse_elevation(rgb_path, elev_path, slope_path, label_path, tile_id, split, augment, tile_size, dummy=dummy)
        else:
            raise ValueError(f"Unsupported input_type: {input_type}")

    dataset = dataset.map(map_fn, num_parallel_calls=tf.data.AUTOTUNE)
    dataset = dataset.batch(batch_size)
    dataset = dataset.prefetch(tf.data.AUTOTUNE)

    return dataset



