# Collect water data for the training set.

### Collect water data

In [None]:
import cv2
import numpy as np
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
from scipy.spatial import ConvexHull
from skimage import feature
import json

In [None]:
point_list = []
lbp_list = []
for i in range(1,61):
    # Read training data image
    image = cv2.imread(f'./training_dataset/image/{i}.jpg')
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image_hsv = cv2.cvtColor(image, cv2.COLOR_RGB2HSV)
    image_gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
    
    # Use K-means with k = 10 for image segmentation.
    pixels = image_hsv.reshape(-1, 3)
    k_value = 10
    kmeans = KMeans(n_clusters=k_value)
    kmeans.fit(pixels)
    labels = kmeans.labels_
    segmented_image = labels.reshape(image.shape[:2])

    # Read training data mask
    mask = cv2.imread(f'./training_dataset/mask/{i}.jpg', cv2.IMREAD_GRAYSCALE)

    # Find the segment with the maximum intersection with the mask.
    intersection_areas = []
    mean_hsv_values = []
    mean_lbp_values = []
    for label_value in range(k_value):
        label_mask = (segmented_image == label_value).reshape(image.shape[:2])

        # intersection
        intersection = cv2.bitwise_and(label_mask.astype(np.uint8), mask)
        intersection_area = np.sum(intersection)
        intersection_areas.append(intersection_area)
        pixels_in_cluster = image_hsv[label_mask]

        # LBP feature
        lbp_feature = feature.local_binary_pattern(image_gray, P=8, R=1, method="uniform")
        lbp_mean = np.mean(lbp_feature[label_mask])

        # Calculate the average HSV in the segment
        mean_lbp_values.append(lbp_mean)
        mean_hsv = np.mean(pixels_in_cluster, axis=0)
        mean_hsv_values.append(mean_hsv)
    max_intersection_label = np.argmax(intersection_areas)

    # Obtain the mask for the category with the maximum intersection.
    max_intersection_mask = (segmented_image == max_intersection_label).reshape(image.shape[:2])
    point_list.append([mean_hsv_values[max_intersection_label][0], mean_hsv_values[max_intersection_label][1], mean_hsv_values[max_intersection_label][2]])
    lbp_list.append(mean_lbp_values[max_intersection_label])

    # print(mean_lbp_values[max_intersection_label])

### Store the water data

In [7]:
data = {
    "point_list": point_list,
    "lbp_list": lbp_list
}
with open('water_data.json', 'w') as file:
    json.dump(data, file, indent=4)


### Show the maximum intersection

In [None]:
plt.subplot(1, 2, 1)
plt.imshow(mask, cmap='gray')
plt.title('Original Mask')

plt.subplot(1, 2, 2)
plt.imshow(max_intersection_mask, cmap='gray')
plt.title('Max Intersection Category')
plt.show()

# Predict the segment of the water body

In [None]:
import cv2
import numpy as np
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
from scipy.spatial import ConvexHull
from skimage import feature
import json

### Read the water data

In [8]:
with open('water_data.json', 'r') as file:
    loaded_data = json.load(file)

loaded_point_list = loaded_data["point_list"]
loaded_lbp_list = loaded_data["lbp_list"]

In [None]:
sorted_indices = np.argsort(loaded_point_list[:, 0])
sorted_points_3d = loaded_point_list[sorted_indices]

# Divide the collected HSV into four categories
split_index_1 = len(sorted_points_3d) // 4
split_index_2 = 2 * split_index_1
split_index_3 = 3 * split_index_1

# Map each category into a convex polygon on the HSV space
hull_3d_1 = ConvexHull(sorted_points_3d[:split_index_1])
hull_3d_2 = ConvexHull(sorted_points_3d[split_index_1:split_index_2])
hull_3d_3 = ConvexHull(sorted_points_3d[split_index_2:split_index_3])
hull_3d_4 = ConvexHull(sorted_points_3d[split_index_3:])

# Find the threshold for each category
split_val_1 = sorted_points_3d[split_index_1][0]
split_val_2 = sorted_points_3d[split_index_2][0]
split_val_3 = sorted_points_3d[split_index_3][0]
print("Split_val_1: " + split_val_1)
print("Split_val_2: " + split_val_2)
print("Split_val_3: " + split_val_3)

In [None]:
tolerance = 1
def is_hsv_in_polygon(hsv, hull_3d):
    hsv_point = np.array(hsv, dtype=np.float32)
    is_inside = hull_3d.equations[:, :-1].dot(hsv_point) + hull_3d.equations[:, -1] <= tolerance
    return np.all(is_inside)

In [None]:
for index in range(1,61):
    # Read training data image
    image = cv2.imread(f'./training_dataset/image/{index}.jpg')
    # image = cv2.imread(f'./image_test/{index}.jpg')
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image_hsv = cv2.cvtColor(image, cv2.COLOR_RGB2HSV)
    image_gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
    
    # Use K-means with k = 10 for image segmentation.
    pixels = image_hsv.reshape(-1, 3)
    k_value = 10
    kmeans = KMeans(n_clusters=k_value)
    kmeans.fit(pixels)
    labels = kmeans.labels_
    segmented_image = labels.reshape(image.shape[:2])

    # Calculate the average LBP (Local Binary Pattern) and HSV for each segment
    mean_hsv_values = []
    mean_lbp_values = []
    for label_value in range(k_value):
        label_mask = (labels == label_value).reshape(image.shape[:2])

        pixels_in_cluster = image_hsv[label_mask]

        lbp_feature = feature.local_binary_pattern(image_gray, P=8, R=1, method="uniform")
        lbp_mean = np.mean(lbp_feature[label_mask])
        mean_lbp_values.append(lbp_mean)
        mean_hsv = np.mean(pixels_in_cluster, axis=0)
        mean_hsv_values.append(mean_hsv)
    binary_image = np.zeros_like(segmented_image, dtype=np.uint8)

    # Select different convex polygons for water features based on different Hue values. 
    # Determine whether a segment is within the polygon, indicating whether it is a water body.
    for i, mean_hsv in enumerate(mean_hsv_values):
        label_mask = (segmented_image == i).reshape(image.shape[:2])

        if(mean_hsv[0] <= split_val_1):
            hull = hull_3d_1
        elif(mean_hsv[0] <= split_val_2):
            hull = hull_3d_2
        elif(mean_hsv[0] <= split_val_3):
            hull = hull_3d_3
        else:
            hull = hull_3d_4
        binary_image[label_mask] = 255 if is_hsv_in_polygon(mean_hsv, hull)  else 0


    # Store the predicted result
    cv2.imwrite(f'./training_dataset/result_Kmean_feature/test_{index}.jpg', binary_image)