In [5]:
from utils import convert_label_to_rgb 
import torchvision.transforms as transforms
from model import multi_unet_model
import tensorflow as tf
import keras
import os
import numpy as np
import pandas
# from dataset import PASCAL2007Dataset
from ohe import Ohe
import matplotlib.pyplot as plt

In [6]:
AUTOTUNE = tf.data.AUTOTUNE

LEARNING_RATE = 1e-4
BATCH_SIZE = 16
NUM_EPOCHS = 100
NUM_WORKERS = 2
PIN_MEMORY = True
LOAD_MODEL = False

In [5]:
class pascal(): 
    train_dir = os.path.join('data', 'pascal', 'train_JPEGImages')
    mask_dir = os.path.join('data', 'pascal', 'train_SegClasses')
    mask_val_dir = os.path.join('data', 'pascal', 'val_SegClasses')
    val_dir = os.path.join('data', 'pascal', 'val_JPEGImages')
    
    color_classes = None

    def __init__(self) -> None:
        raw = pandas.read_csv('colormap.csv')
        raw = raw.drop(['Class'], axis=1)
        pascal.color_classes = tf.convert_to_tensor(raw, dtype=float)
        
        
    def decode_image_jpg(path): 
        img = tf.io.decode_png(path, channels=3)
        
        return tf.image.resize(img, [224, 224], method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)

    def decode_image_png(path): 
        img = tf.io.decode_png(path, channels=3)
        
        return tf.image.resize(img, [224, 224], method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)

    def get_label_image_train(path): 
        # parts = tf.strings.split(path, os.path.sep)
        parts = tf.strings.split(path, 'train_JPEGImages')
        #get the file name since they are the same
        label_file = tf.strings.join([parts[0], tf.constant('train_SegClasses'), parts[1]])
        #label files are pngs need to remove .jpg and replace with png
        label_file = tf.strings.regex_replace(label_file, '.jpg', '.png')
        
        raw_label_file = tf.io.read_file(label_file)
        label_img = pascal.decode_image_png(raw_label_file)

        return label_img

    def get_label_image_val(path): 
        # parts = tf.strings.split(path, os.path.sep)
        parts = tf.strings.split(path, 'val_JPEGImages')
        #get the file name since they are the same
        label_file = tf.strings.join([parts[0], tf.constant('val_SegClasses'), parts[1]])
        #label files are pngs need to remove .jpg and replace with png
        label_file = tf.strings.regex_replace(label_file, '.jpg', '.png')
        
        raw_label_file = tf.io.read_file(label_file)
        label_img = pascal.decode_image_png(raw_label_file)

        return label_img

    def calculate_label_encoding(label_image): 
        image_reshape = tf.reshape(label_image, [-1, 3])
        image_reshape = tf.expand_dims(image_reshape, 1)
        # tf.print(image_reshape)
        
        diff = tf.subtract(image_reshape, pascal.color_classes)
        square_diff = tf.square(diff)
        
        
        # tf.print(square_diff)
        dists = tf.reduce_sum(square_diff, -1)
        # tf.print(dists)
        # print(dists)
        indicies = tf.argmin(dists, axis=-1)
        # print(indicies)

        # zero = tf.constant(0, dtype=tf.int64)
        # where = tf.not_equal(indicies, zero)
        # where = tf.where(where)
        # print(where)
        # tf.print(indicies[where[0][0]])
        
        indicies = tf.reshape(indicies, (224, 224))
        indicies = tf.one_hot(indicies, 22)
        return indicies

    def process_path(path): 
        image = tf.io.read_file(path)
        image = pascal.decode_image_jpg(image)
        
        label_image = pascal.get_label_image_train(path)
        flatten_label_encode = pascal.calculate_label_encoding(label_image)
        
        return image, flatten_label_encode
        
    def process_path_val(path): 
        image = tf.io.read_file(path)
        image = pascal.decode_image_jpg(image)
        
        label_image = pascal.get_label_image_val(path)
        flatten_label_encode = pascal.calculate_label_encoding(label_image)
        
        return image, flatten_label_encode

    def get_train(self): 
        train_ds = tf.data.Dataset.list_files(f'{pascal.train_dir}/*.jpg')
        train_ds = (train_ds.map(pascal.process_path, num_parallel_calls=AUTOTUNE)
            .batch(16))
        
        return train_ds
    
    def get_val(self): 
        val_ds = tf.data.Dataset.list_files(f'{pascal.val_dir}/*.jpg')
        val_ds = (val_ds.map(pascal.process_path_val, num_parallel_calls=AUTOTUNE)
                    .batch(16))
        
        return val_ds

In [1]:
class city_data():
    
    img_dir = './data/city/leftImg8bit'
    fine_dir = './data/city/gtFine'
    city_with_label = []

    def decode_image_png(path, channels): 
        img = tf.io.decode_png(path, channels=channels)
        
        return tf.image.resize(img, [224, 224], method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
    
    def get_label(path):
        #need to get path first
        parts = tf.strings.split(path, '/')

        core_name = tf.strings.split(parts[-1], 'leftImg8bit.png')[0]
        ground_truth_path = tf.strings.join([parts[1], '/', parts[2], '/', 'gtFine/', parts[4], '/', parts[5], '/', core_name, 'gtFine_labelTrainIds.png'])

        label_img = tf.io.read_file(ground_truth_path)
        label_img = city_data.decode_image_png(label_img, 1)
        label_img = tf.cast(label_img, tf.int64)
        one_hot = tf.one_hot(label_img, 19, axis=2)
        one_hot = tf.squeeze(one_hot)

        return one_hot
    
    def process_path(path): 
        image = tf.io.read_file(path)
        image = city_data.decode_image_png(image, 3)
        
        label_image = city_data.get_label(path)
        
        return image, label_image
    
    def __init__(self): 
        #find cities which have labels
        
        for type in os.listdir(city_data.fine_dir): 
            f_t = os.path.join(city_data.fine_dir, type)
            
            for city in os.listdir(f_t):
                city_data.city_with_label.append(city)
                

    def get_train(self): 
        train_ds = None
        
        train_dir = os.path.join(city_data.img_dir, 'train')
        for city in os.listdir(train_dir): 
            if city in city_data.city_with_label: 
                if train_ds is None: 
                    f_c = os.path.join(train_dir, city)
                    train_ds = tf.data.Dataset.list_files(f'{f_c}/*.png')
                else: 
                    train_ds = train_ds.concatenate(tf.data.Dataset.list_files(f'{f_c}/*.png'))
        
        train_ds = (train_ds.map(city_data.process_path, num_parallel_calls=AUTOTUNE)
                    .batch(16))
        return train_ds
    
    def get_val(self):
        val_ds = None
        val_dir = os.path.join(city_data.img_dir, 'val')
        
        for city in os.listdir(val_dir): 
            if city in city_data.city_with_label: 
                if val_ds is None: 
                    f_c = os.path.join(val_dir, city)
                    val_ds = tf.data.Dataset.list_files(f'{f_c}/*.png')
                else: 
                    val_ds = val_ds.concatenate(tf.data.Dataset.list_files(f'{f_c}/*.png'))
                    
        val_ds = (val_ds.map(city_data.process_path, num_parallel_calls=AUTOTUNE)
                    .batch(16))
        return val_ds
    

In [8]:
model = multi_unet_model(n_classes=19, IMG_HEIGHT=224, IMG_WIDTH=224, IMG_CHANNELS=3)
loss = keras.losses.CategoricalCrossentropy()
optimizer = keras.optimizers.AdamW(learning_rate = LEARNING_RATE)
metrics = ["accuracy"]

p = city_data()
train_ds = p.get_train()
val_ds = p.get_val()

tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=os.path.join(os.getcwd(), 'logs', 'city'), histogram_freq=1)

model.compile(optimizer=optimizer, loss=loss, metrics=metrics)
model.fit(train_ds, validation_data=val_ds, epochs=NUM_EPOCHS, callbacks=[tensorboard_callback])

model.save_weights('./model/city.weights.h5')

Epoch 1/100
[1m174/174[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 75ms/step - accuracy: 0.0880 - loss: 61.0510




[1m174/174[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 88ms/step - accuracy: 0.0881 - loss: 60.8806 - val_accuracy: 0.4548 - val_loss: 2.5385
Epoch 2/100
[1m174/174[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 37ms/step - accuracy: 0.3111 - loss: 4.0192 - val_accuracy: 0.4785 - val_loss: 2.4763
Epoch 3/100
[1m174/174[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 35ms/step - accuracy: 0.4089 - loss: 2.8704 - val_accuracy: 0.4812 - val_loss: 2.4649
Epoch 4/100
[1m174/174[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 36ms/step - accuracy: 0.4211 - loss: 2.7178 - val_accuracy: 0.4816 - val_loss: 2.4496
Epoch 5/100
[1m174/174[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 36ms/step - accuracy: 0.4258 - loss: 2.6520 - val_accuracy: 0.4830 - val_loss: 2.4320
Epoch 6/100
[1m174/174[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 36ms/step - accuracy: 0.4288 - loss: 2.6102 - val_accuracy: 0.4848 - val_loss: 2.4159
Epoch 7/100
[1m174/174[0

FileNotFoundError: [Errno 2] Unable to synchronously create file (unable to open file: name = './model/city.weights.h5', errno = 2, error message = 'No such file or directory', flags = 13, o_flags = 242)

In [6]:
model = multi_unet_model(n_classes=22, IMG_HEIGHT=224, IMG_WIDTH=224, IMG_CHANNELS=3)
loss = keras.losses.CategoricalCrossentropy()
optimizer = keras.optimizers.AdamW(learning_rate = LEARNING_RATE)
metrics = ["accuracy"]

p = pascal()
train_ds = p.get_train()
val_ds = p.get_val()


tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=os.path.join(os.getcwd(), 'logs', 'pascal'), histogram_freq=1)

model.compile(optimizer=optimizer, loss=loss, metrics=metrics)
model.fit(train_ds, validation_data=val_ds, epochs=NUM_EPOCHS, callbacks=[tensorboard_callback])


2024-05-03 17:04:34.861488: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:984] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-05-03 17:04:34.864206: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:984] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-05-03 17:04:34.864224: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:984] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-05-03 17:04:34.867162: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:984] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-05-03 17:04:34.867184: I external/local_xla/xla/stream_executor

Epoch 1/50


I0000 00:00:1714755879.144552    3029 service.cc:145] XLA service 0x7f9b40019570 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1714755879.144630    3029 service.cc:153]   StreamExecutor device (0): NVIDIA GeForce RTX 4080, Compute Capability 8.9
2024-05-03 17:04:39.320418: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2024-05-03 17:04:39.682429: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:465] Loaded cuDNN version 8902


[1m 1/14[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m3:49[0m 18s/step - accuracy: 0.0045 - loss: 175.3590

I0000 00:00:1714755893.819843    3029 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m12/14[0m [32m━━━━━━━━━━━━━━━━━[0m[37m━━━[0m [1m0s[0m 55ms/step - accuracy: 0.0159 - loss: 143.1946

KeyboardInterrupt: 