# Networks

In [4]:
import tensorflow as tf
from tensorflow.keras.layers import Conv2D, BatchNormalization, MaxPooling2D, Dropout, Flatten, Dense
from tensorflow.keras.layers import LayerNormalization, MultiHeadAttention, Input, LSTM, Attention, AveragePooling2D, Concatenate, GlobalAveragePooling2D


In [3]:
# This model architecture uses a combination of convolutional and LSTM layers to extract spatial and temporal features from the input data, 
# and an attention layer to focus on the most relevant features for the task. 
# This type of architecture has been shown to perform well on a variety of tasks, including image and video classification, natural language processing, and speech recognition.

def LSTM_Attention(input_shape, num_classes):
    inputs = Input(shape=input_shape)

    x = Conv2D(64, (3, 3), padding='same', activation='relu')(inputs)
    x = BatchNormalization()(x)
    x = Conv2D(64, (3, 3), padding='same', activation='relu')(x)
    x = BatchNormalization()(x)
    x = MaxPooling2D((2, 2))(x)

    x = Conv2D(128, (3, 3), padding='same', activation='relu')(x)
    x = BatchNormalization()(x)
    x = Conv2D(128, (3, 3), padding='same', activation='relu')(x)
    x = BatchNormalization()(x)
    x = MaxPooling2D((2, 2))(x)

    x = Conv2D(256, (3, 3), padding='same', activation='relu')(x)
    x = BatchNormalization()(x)
    x = Conv2D(256, (3, 3), padding='same', activation='relu')(x)
    x = BatchNormalization()(x)
    x = MaxPooling2D((2, 2))(x)

    x = LSTM(256, return_sequences=True)(x)
    x = Attention()(x)

    x = Dense(256, activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)

    outputs = Dense(num_classes, activation='softmax')(x)

    model = tf.keras.Model(inputs=inputs, outputs=outputs)

    return model

In [5]:
# In this article: https://reader.elsevier.com/reader/sd/pii/S2468785522002312?token=DF0636838957FD7127260415FABFB0B0485A88DDB54E84357BDE1D1B2E7F7D0D9374FEA27ED601525C4B8FAEEF7BDCC3&originRegion=eu-west-1&originCreation=20221208094015
# They use Google Inception V3: https://keras.io/api/applications/inceptionv3/

# Define the inception module
def inception_module(x, filters):
    conv1 = Conv2D(filters[0], (1, 1), padding='same', activation='relu')(x)

    conv3 = Conv2D(filters[1], (1, 1), padding='same', activation='relu')(x)
    conv3 = Conv2D(filters[2], (3, 3), padding='same', activation='relu')(conv3)

    conv5 = Conv2D(filters[3], (1, 1), padding='same', activation='relu')(x)
    conv5 = Conv2D(filters[4], (5, 5), padding='same', activation='relu')(conv5)

    pool = MaxPooling2D((3, 3), strides=(1, 1), padding='same')(x)
    pool = Conv2D(filters[5], (1, 1), padding='same', activation='relu')(pool)

    x = Concatenate()([conv1, conv3, conv5, pool])

    return x

# Define the model architecture
def create_model(input_shape, num_classes):
    inputs = Input(shape=input_shape)

    x = Conv2D(32, (3, 3), padding='same', strides=(2, 2), activation='relu')(inputs)
    x = BatchNormalization()(x)
    x = Conv2D(32, (3, 3), padding='same', activation='relu')(x)
    x = BatchNormalization()(x)
    x = Conv2D(64, (3, 3), padding='same', activation='relu')(x)
    x = BatchNormalization()(x)
    x = MaxPooling2D((3, 3), strides=(2, 2), padding='same')(x)
    x = Conv2D(80, (1, 1), padding='same', activation='relu')(x)
    x = BatchNormalization()(x)
    x = Conv2D(192, (3, 3), padding='same', activation='relu')(x)
    x = BatchNormalization()(x)
    x = MaxPooling2D((3, 3), strides=(2, 2), padding='same')(x)

    x = inception_module(x, 64, (96, 128), (16, 32), 32)
    x = inception_module(x, 128, (128, 192), (32, 96), 64)
    x = MaxPooling2D((3, 3), strides=(2, 2), padding='same')(x)

    x = inception_module(x, 192, (96, 208), (16, 48), 64)
    x = inception_module(x, 160, (112, 224), (24, 64), 64)
    x = inception_module(x, 128, (128, 256), (24, 64), 64)
    x = inception_module(x, 112, (144, 288), (32, 64), 64)
    x = inception_module(x, 256, (160, 320), (32, 128), 128)
    x = MaxPooling2D((3, 3), strides=(2, 2), padding='same')(x)

    x = inception_module(x, 256, (160, 320), (32, 128), 128)
    x = inception_module(x, 384, (192, 384), (48, 128), 128)

    x = AveragePooling2D((2, 2), strides=(1, 1), padding='same')(x)
    x = Dropout(0.4)(x)

    x = GlobalAveragePooling2D()(x)
    x = Dense(num_classes, activation='softmax')(x)

    model = tf.keras.Model(inputs=inputs, outputs=x)

    return model

# Metrics
https://towardsdatascience.com/metrics-to-evaluate-your-semantic-segmentation-model-6bcb99639aa2

## Intersection-over-union 
The intersection-over-union (IoU) metric measures the overlap between the predicted and ground-truth segmentation masks. It is calculated by dividing the intersection of the two masks by their union, and is commonly used to evaluate the performance of binary and multi-class segmentation models.

## Pixel accuracy
The pixel accuracy metric measures the percentage of pixels that are correctly classified by the model. It is calculated by dividing the number of correctly classified pixels by the total number of pixels in the image, and is commonly used to evaluate the performance of multi-class segmentation models.

## Mean Average Precision (mAP)
The mean average precision (mAP) metric measures the average precision of the model across all classes. It is calculated by first computing the average precision for each class, and then averaging the results across all classes. The mAP metric is commonly used to evaluate the performance of object detection and segmentation models.

## Dice coefficient
Dice coefficient is a measure of the overlap between two sets of data. It is often used in the field of image analysis to compare the similarity of two images, but it can be applied to any kind of data where it is meaningful to compare the overlap between two sets. The Dice coefficient is calculated as the ratio of the number of elements that are present in both sets to the total number of elements in both sets. A Dice coefficient of 1 indicates that the two sets are identical, while a coefficient of 0 indicates that there is no overlap between the sets.

In [7]:
# Calculate the intersection-over-union (IoU)
def iou(y_true, y_pred):
    # Flatten the predicted and ground-truth masks
    y_true_flat = tf.reshape(y_true, [-1])
    y_pred_flat = tf.reshape(y_pred, [-1])

    # Compute the intersection and union
    intersection = tf.reduce_sum(y_true_flat * y_pred_flat)
    union = tf.reduce_sum(y_true_flat) + tf.reduce_sum(y_pred_flat) - intersection

    # Compute the IoU
    iou = intersection / union

    return iou

# Calculate the pixel accuracy
def pixel_accuracy(y_true, y_pred):
    # Flatten the predicted and ground-truth masks
    y_true_flat = tf.reshape(y_true, [-1])
    y_pred_flat = tf.reshape(y_pred, [-1])

    # Compute the pixel accuracy
    accuracy = tf.reduce_mean(tf.cast(tf.equal(y_true_flat, y_pred_flat), tf.float32))

    return accuracy

# Calculate the mean average precision (mAP)
def mean_average_precision(y_true, y_pred):
    # Flatten the predicted and ground-truth masks
    y_true_flat = tf.reshape(y_true, [-1, num_classes])
    y_pred_flat = tf.reshape(y_pred, [-1, num_classes])

    # Compute the average precision for each class
    precisions = []
    for i in range(num_classes):
        precision, _ = tf.metrics.precision_at_k(y_true_flat[:, i], y_pred_flat[:, i], 1)
        precisions.append(precision)

    # Compute the mAP
    mAP = tf.reduce_mean(precisions)

    return mAP

def dice_coef(y_true, y_pred, smooth=1):
  intersection = K.sum(y_true * y_pred, axis=[1,2,3])
  union = K.sum(y_true, axis=[1,2,3]) + K.sum(y_pred, axis=[1,2,3])
  dice = K.mean((2. * intersection + smooth)/(union + smooth), axis=0)
  return dice

In [25]:
hej = "/Users/daniel/Desktop/ImagestoDBWithoutFrame/IMG_0182.JPG"

hej.rpartition('/')[0]


'/Users/daniel/Desktop/ImagestoDBWithoutFrame'

In [16]:
split[0]+"_mask."+split[1]

'IMG_0182_mask.JPG'