In [1]:
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import tensorflow as tf
import os
import cv2
import tifffile as tiff
import tensorflow.image as tfi
from PIL import Image
tf.debugging.set_log_device_placement(True)
from tensorflow import keras
from tensorflow.keras.callbacks import Callback, EarlyStopping,ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras import layers,models, Model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from keras.callbacks import TensorBoard
from keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Conv2D, SpatialDropout2D, BatchNormalization
from tensorflow.keras.layers import Lambda
from pathlib import Path
from tensorflow.keras.utils import load_img, img_to_array, plot_model
from sklearn.decomposition import PCA
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, UpSampling2D, concatenate, Conv2DTranspose

In [2]:
def load_images_pillow(folder_path):
    images = []
    for filename in os.listdir(folder_path):
        if "_" not in filename:
            if filename.endswith('.png') or filename.endswith('.jpg'):
                img_path = os.path.join(folder_path, filename)
                img = Image.open(img_path)
                img_array = np.array(img)
                images.append(img_array)
    return images

In [3]:
def convert_to_pca(image):
    images = image.reshape(-1, 12)
    images = np.array(images, dtype=np.float32)
    
    pca = PCA(n_components=3)
    images_pca = pca.fit_transform(images)
    images = images_pca.reshape(128, 128, 3)
    images = (images - np.min(images)) / (np.max(images) - np.min(images))
    return images

In [None]:
def unet_model(n_filters=64, input_size= (128,128,12)):
    
    input_layer = Input(shape=(128, 128, 12))
    
    # Encoder
    conv1 = Conv2D(64, 3, activation='relu', padding='same' ,kernel_initializer='he_normal')(input_layer)
    b1 = BatchNormalization()(conv1)
    conv2 = Conv2D(64, 3, activation='relu', padding='same' ,kernel_initializer='he_normal')(b1)
    b2 = BatchNormalization()(conv2)

    pool1 = MaxPooling2D(pool_size=(2, 2))(b2)

    conv3 = Conv2D(64, 3, activation='relu', padding='same' ,kernel_initializer='he_normal')(pool1)
    b3 = BatchNormalization()(conv3)
    conv4 = Conv2D(64, 3, activation='relu', padding='same' ,kernel_initializer='he_normal')(b3)
    b4 = BatchNormalization()(conv4)

    pool2 = MaxPooling2D(pool_size=(2, 2))(b4)

    conv5 = Conv2D(256, 3, activation='relu', padding='same' ,kernel_initializer='he_normal')(pool2)
    b5 = BatchNormalization()(conv5)
    conv6 = Conv2D(256, 3, activation='relu', padding='same' ,kernel_initializer='he_normal')(b5)
    b6 = BatchNormalization()(conv6)

    pool3 = MaxPooling2D(pool_size=(2, 2))(b6)

    conv7 = Conv2D(512, 3, activation='relu', padding='same' ,kernel_initializer='he_normal')(pool3)
    b7 = BatchNormalization()(conv7)
    conv8 = Conv2D(512, 3, activation='relu', padding='same' ,kernel_initializer='he_normal')(b7)
    b8 = BatchNormalization()(conv8)

    pool4 = MaxPooling2D(pool_size=(2, 2))(b8)
    
    #Bottleneck
    conv9 = Conv2D(1024, 3, activation='relu', padding='same' ,kernel_initializer='he_normal')(pool4)
    b9 = BatchNormalization()(conv9)
    conv10 = Conv2D(1024, 3, activation='relu', padding='same' ,kernel_initializer='he_normal')(b9)
    b10 = BatchNormalization()(conv10)

    
    #Decoder
    up1 = UpSampling2D()(b10)
    up1 = Conv2D(filters= 512, kernel_size=(2,2), activation='relu', padding='same' ,kernel_initializer='he_normal')(up1)
    merge1 = concatenate([conv8, up6])  # Ensure Concatenate is used with keyword arguments
    
    conv6 = Conv2D(512, 3, activation='relu', padding='same')(merge6)
    drop6 = BatchNormalization()(conv6)
    conv6 = Conv2D(512, 3, activation='relu', padding='same')(drop6)
    
    up7 = UpSampling2D(size=(2, 2))(conv6)
    up7 = Conv2D(256, 2, activation='relu', padding='same')(up7)
    merge7 = concatenate([conv6, up7])
    conv7 = Conv2D(256, 3, activation='relu', padding='same')(merge7)
    drop7 = BatchNormalization()(conv7)
    conv7 = Conv2D(256, 3, activation='relu', padding='same')(drop7)

    up8 = UpSampling2D(size=(2, 2))(conv7)
    up8 = Conv2D(128, 2, activation='relu', padding='same')(up8)
    merge8 = concatenate([conv2, up8])
    conv8 = Conv2D(128, 3, activation='relu', padding='same')(merge8)
    drop8 = BatchNormalization()(conv8)
    conv8 = Conv2D(128, 3, activation='relu', padding='same')(drop8)

    up9 = UpSampling2D(size=(2, 2))(conv8)
    up9 = Conv2D(64, 2, activation='relu', padding='same')(up9)
    merge9 = concatenate([conv1, up9])
    conv9 = Conv2D(64, 3, activation='relu', padding='same')(merge9)
    conv9 = Conv2D(64, 3, activation='relu', padding='same')(conv9)    
    conv10 = Conv2D(1, (1, 1), activation='sigmoid')(conv9)

    model = Model(inputs=input_layer, outputs=conv10)

    return model

In [None]:
u_net_model=unet_model()

In [None]:
plot_model(u_net_model, "UNet.png", show_shapes=True)

In [None]:
u_net_model.summary()


In [None]:
images_path = "/kaggle/input/water-segmentation/data/images/*.tif"
labels_path = "/kaggle/input/water-segmentation/data/labels"

images = tiff.imread(images_path)
labels = load_images_pillow(labels_path)

In [None]:
type(labels)

In [None]:
[len(labels) , len(images)]


In [None]:
plt.figure(figsize=(15,6))
fig, axs = plt.subplots(1, 2, figsize=(16, 8), constrained_layout=True)
axs[0].imshow(labels[205])
axs[1].imshow(convert_to_pca((images[205])))

plt.show()

In [None]:
# train_datagen = ImageDataGenerator(
#     rescale=1./255,
#     rotation_range=40,
#     width_shift_range=0.2,
#     height_shift_range=0.2,
#     shear_range=0.2,
#     zoom_range=0.2,
#     horizontal_flip=True,
#     fill_mode='nearest'
# )

# val_datagen = ImageDataGenerator(rescale=1./255)

# test_datagen = ImageDataGenerator(rescale=1./255)

In [None]:
X_train, X_valid, y_train, y_valid = train_test_split(images, labels, test_size= .1, random_state= 42)
X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, test_size= .1, random_state= 42)


In [None]:
# labels= tf.convert_to_tensor(labels)
# images= tf.convert_to_tensor(images)

In [None]:
# train_generator = train_datagen.flow(
#     x= X_train,
#     y= y_train,
#     batch_size=32,
# )

# valid_generator = train_datagen.flow(
#     x= X_valid,
#     y= y_valid,
#     batch_size=32,
# )

# test_datagen_generator = train_datagen.flow(
#     x= X_test,
#     y= y_test,
#     batch_size=32,
#     shuffle= False
# )

In [None]:
u_net_model.compile(optimizer=tf.keras.optimizers.Adam(),
                   loss='binary_crossentropy',
                   metrics=['accuracy'])

In [None]:
# Create checkpoint callback
checkpoint_path = "model_checkpoint.keras"
checkpoint_callback = ModelCheckpoint(checkpoint_path,
                                      monitor="val_accuracy",
                                      save_best_only=True)

# Setup EarlyStopping callback to stop training if model's val_loss doesn't improve for 3 epochs
early_stopping = EarlyStopping(monitor = "val_loss", # watch the val loss metric
                               patience = 10,
                               restore_best_weights = True) # if val loss decreases for 3 epochs in a row, stop training

reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=1e-6)

In [None]:
batch_size = 32
target_size = (128, 128)

# Convert numpy arrays to TensorFlow datasets
train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))
val_dataset = tf.data.Dataset.from_tensor_slices((X_valid, y_valid))
test_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test))

# Shuffle, batch, and prefetch the datasets
batch_size = 32
train_dataset = train_dataset.shuffle(buffer_size=1000).batch(batch_size).prefetch(tf.data.experimental.AUTOTUNE)
val_dataset = val_dataset.batch(batch_size).prefetch(tf.data.experimental.AUTOTUNE)
test_dataset = test_dataset.batch(batch_size).prefetch(tf.data.experimental.AUTOTUNE)

In [None]:
for x_batch, y_batch in train_dataset:
    print(x_batch.shape, y_batch.shape)

In [None]:
steps_per_epoch = len(X_train) // batch_size
validation_steps = len(X_valid) // batch_size

In [None]:
history = u_net_model.fit(
    train_dataset,
    epochs=150,
    validation_data= valid_generator,
    steps_per_epoch=steps_per_epoch,
    validation_steps=validation_steps,
    callbacks=[
        early_stopping,
        reduce_lr, 
        checkpoint_callback
    ]
 )

In [None]:
results = u_net_model.evaluate(test_dataset , verbose=0)

print("    Test Loss: {:.5f}".format(results[0]))
print("Test Accuracy: {:.2f}%".format(results[1] * 100))

# Trash


In [None]:
# def read_label(path, size= 128):
#     label = load_img(path)
# #     label = img_to_array(label)
# #     label = cv2.cvtColor(label, cv2.COLOR_RGB2GRAY)
#     label = tf.convert_to_tensor(label, dtype= tf.float32)
# #     label = tf.cast(label, tf.float32)
#     label *= 255.
#     label = tf.convert_to_tensor(label, dtype= tf.float32)
#     return label

In [None]:
#     inputs = tf.keras.layers.Input((128,128,3))
    
#     cblock1 = conv_block(inputs, num_filters= n_filters)
#     cblock2 = conv_block(cblock1[0], num_filters= n_filters*2)
#     cblock3 = conv_block(cblock2[0], num_filters= n_filters*4)
#     cblock4 = conv_block(cblock3[0], num_filters= n_filters*8)
#     cblock5 = conv_block(cblock4[0], num_filters= n_filters*16, Max_pool=0)
    
    
#     ublock6 = upsampling_block(cblock5[0], cblock4[1],  n_filters * 8)
#     ublock7 = upsampling_block(ublock6, cblock3[1],  n_filters * 4)
#     ublock8 = upsampling_block(ublock7, cblock2[1],  n_filters * 2)
#     ublock9 = upsampling_block(ublock8, cblock1[1],  n_filters)

#     conv9 = tf.keras.layers.Conv2D(n_filters,3,
#                  activation='relu',
#                  padding='same',
#                  kernel_initializer='he_normal')(ublock9)
    
    
#     conv10 = tf.keras.layers.Conv2D(3, kernel_size=3, padding="same",activation = 'sigmoid')(conv9)
#     model = tf.keras.Model(inputs=inputs, outputs=conv10)
#     return model

In [None]:
# input_folder = "/kaggle/input/water-segmentation/data/labels"
# output_folder = '/kaggle/working/labels'
# os.makedirs(output_folder, exist_ok=True)

# for filename in os.listdir(input_folder):
#     if filename.endswith('.png') or filename.endswith('.jpg'):
#         # Open image file
#         img = Image.open(os.path.join(input_folder, filename))
#         # Convert image to grayscale
#         gray_img = img.convert('L')
#         # Save image with new dimensions
#         gray_img = gray_img.resize((128, 128))
#         gray_img = gray_img.convert('L')  # Ensure it's grayscale
#         gray_img.save(os.path.join(output_folder, filename))

In [None]:
# images = tiff.imread(images_path)
# # labels = read_label(os.path.join(labels_path, x)) for x in os.listdir(labels_path) if "_" not in x
# i = 0
# labels 
# for x in os.listdir(labels_path):
#     if "_" not in x:
#         labels[i] = 

In [None]:
# import numpy as np
# from tensorflow.image import resize

# def data_generator(images, labels, batch_size, target_size=(128, 128)):
#     while True:
#         for start in range(0, len(images), batch_size):
#             end = min(start + batch_size, len(images))
#             X_batch = images[start:end]
#             y_batch = labels[start:end]

#             # Resize images and labels to the target size
#             X_batch_resized = np.array([resize(img, target_size) for img in X_batch])
#             y_batch_resized = np.array([resize(lbl, target_size) for lbl in y_batch])

#             # Normalize images
#             X_batch_resized = X_batch_resized / 255.0

#             # Ensure labels have the correct shape (e.g., for binary segmentation)
#             if y_batch_resized.ndim == 4 and y_batch_resized.shape[-1] != 1:
#                 y_batch_resized = np.expand_dims(y_batch_resized[..., 0], axis=-1)

#             yield X_batch_resized, y_batch_resized


In [None]:
# labels = np.array(labels, dtype=np.float32)
# labels = np.array([cv2.cvtColor(label, cv2.COLOR_RGB2GRAY) for label in labels])
# for label in range(len(labels)):
#     labels[label] = np.array(labels[label], dtype=np.float32)
#     labels[label] = cv2.cvtColor(labels[label], cv2.COLOR_RGB2GRAY)
    

# # Optionally, add a channel dimension back if needed (to match the U-Net input/output shape)
# labels = np.expand_dims(labels, axis=-1)

In [None]:
# batch_size = 32
# target_size = (128, 128)  # Make sure this matches the output size of your model

# # Create generators
# train_generator = train_generator(X_train, y_train, batch_size, target_size)
# val_generator = val_generator(X_valid, y_valid, batch_size, target_size)
# test_generator = test_generator(X_test, y_test, batch_size, target_size)