In [None]:
import numpy as np
import cv2
import xml.etree.ElementTree as ET
from keras.models import Model
from keras.layers import Input, Conv2D, MaxPooling2D, UpSampling2D, Concatenate
from keras.optimizers import Adam
from sklearn.model_selection import train_test_split
import imgaug.augmenters as iaa
from imgaug.augmentables.polys import Polygon, PolygonsOnImage
from PIL import Image

In [None]:
# Load and parse the XML annotation file
def parse_annotations(xml_file):
    tree = ET.parse(xml_file)
    root = tree.getroot()
    
    annotations = []
    for image in root.findall('image'):
        filename = image.get('name')
        height = int(image.get('height'))
        width = int(image.get('width'))
        
        polygons = []
        for poly in image.findall('polygon'):
            label = poly.get('label')
            points = poly.get('points')
            points = [(float(pt.split(',')[0]), float(pt.split(',')[1])) for pt in points.split(';')]
            polygons.append((label, points))
        
        annotations.append({'filename': filename, 'height': height, 'width': width, 'polygons': polygons})
    
    return annotations

In [None]:
# Create mask from polygon annotations
def create_mask_from_polygons(height, width, polygons):
    mask = np.zeros((height, width), dtype=np.uint8)
    for label, points in polygons:
        points = np.array(points, dtype=np.int32)
        cv2.fillPoly(mask, [points], 255)
    return mask

In [None]:
# Define U-Net model
def unet(input_size=(128, 128, 1)):
    inputs = Input(input_size)
    conv1 = Conv2D(64, 3, activation='relu', padding='same')(inputs)
    conv1 = Conv2D(64, 3, activation='relu', padding='same')(conv1)
    pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)
    
    conv2 = Conv2D(128, 3, activation='relu', padding='same')(pool1)
    conv2 = Conv2D(128, 3, activation='relu', padding='same')(conv2)
    pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)
    
    conv3 = Conv2D(256, 3, activation='relu', padding='same')(pool2)
    conv3 = Conv2D(256, 3, activation='relu', padding='same')(conv3)
    pool3 = MaxPooling2D(pool_size=(2, 2))(conv3)
    
    conv4 = Conv2D(512, 3, activation='relu', padding='same')(pool3)
    conv4 = Conv2D(512, 3, activation='relu', padding='same')(conv4)
    drop4 = MaxPooling2D(pool_size=(2, 2))(conv4)
    
    conv5 = Conv2D(1024, 3, activation='relu', padding='same')(drop4)
    conv5 = Conv2D(1024, 3, activation='relu', padding='same')(conv5)

    up6 = UpSampling2D(size=(2, 2))(conv5)
    up6 = Concatenate()([up6, conv4])
    conv6 = Conv2D(512, 3, activation='relu', padding='same')(up6)
    conv6 = Conv2D(512, 3, activation='relu', padding='same')(conv6)
    
    up7 = UpSampling2D(size=(2, 2))(conv6)
    up7 = Concatenate()([up7, conv3])
    conv7 = Conv2D(256, 3, activation='relu', padding='same')(up7)
    conv7 = Conv2D(256, 3, activation='relu', padding='same')(conv7)
    
    up8 = UpSampling2D(size=(2, 2))(conv7)
    up8 = Concatenate()([up8, conv2])
    conv8 = Conv2D(128, 3, activation='relu', padding='same')(up8)
    conv8 = Conv2D(128, 3, activation='relu', padding='same')(conv8)
    
    up9 = UpSampling2D(size=(2, 2))(conv8)
    up9 = Concatenate()([up9, conv1])
    conv9 = Conv2D(64, 3, activation='relu', padding='same')(up9)
    conv9 = Conv2D(64, 3, activation='relu', padding='same')(conv9)

    conv10 = Conv2D(1, 1, activation='sigmoid')(conv9)
    
    model = Model(inputs=[inputs], outputs=[conv10])
    
    model.compile(optimizer=Adam(lr=1e-4), loss='binary_crossentropy', metrics=['accuracy'])
    
    return model

In [None]:
# Load and preprocess data
annotations = parse_annotations('path_to_annotations.xml')
images = []
masks = []

for annotation in annotations:
    img = cv2.imread(annotation['filename'], cv2.IMREAD_GRAYSCALE)
    img = cv2.resize(img, (128, 128))
    mask = create_mask_from_polygons(annotation['height'], annotation['width'], annotation['polygons'])
    mask = cv2.resize(mask, (128, 128))
    images.append(img)
    masks.append(mask)

images = np.array(images).reshape(-1, 128, 128, 1)
masks = np.array(masks).reshape(-1, 128, 128, 1)

X_train, X_val, y_train, y_val = train_test_split(images, masks, test_size=0.2, random_state=42)

In [None]:
# Train the model
model = unet()
model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=50, batch_size=8)

In [None]:
# Save the model
model.save('unet_polygon.h5')

In [None]:
# Image Augmentation
def augment_images(images, masks, num_augmentations=35):
    seq = iaa.Sequential([
        iaa.Fliplr(0.5), # horizontal flips
        iaa.Affine(rotate=(-20, 20)), # rotation
        iaa.Multiply((0.8, 1.2)), # change brightness
        iaa.GaussianBlur(sigma=(0, 3.0)) # blur
    ])
    
    augmented_images = []
    augmented_masks = []
    
    for i in range(len(images)):
        for _ in range(num_augmentations):
            image_aug, mask_aug = seq(image=images[i], segmentation_maps=masks[i])
            augmented_images.append(image_aug)
            augmented_masks.append(mask_aug)
    
    return np.array(augmented_images), np.array(augmented_masks)

aug_images, aug_masks = augment_images(images, masks)
aug_X_train, aug_X_val, aug_y_train, aug_y_val = train_test_split(aug_images, aug_masks, test_size=0.2, random_state=42)

In [None]:
# Train on augmented data
model.fit(aug_X_train, aug_y_train, validation_data=(aug_X_val, aug_y_val), epochs=50, batch_size=8)
model.save('unet_augmented_polygon.h5')

In [None]:
# Predict on new images
def predict_new_image(model, image_path):
    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    img_resized = cv2.resize(img, (128, 128)).reshape(1, 128, 128, 1)
    prediction = model.predict(img_resized)
    prediction_resized = cv2.resize(prediction[0], (img.shape[1], img.shape[0]))
    return prediction_resized

new_image_prediction = predict_new_image(model, 'path_to_new_image.jpg')
cv2.imwrite('predicted_mask.png', new_image_prediction * 255)