# model Analysis

#### Importing libraries

In [None]:
import numpy as np
import pandas as pd
import json
import cv2
import os
import matplotlib.pyplot as plt
from PIL import Image
from tensorflow.keras.applications import VGG16
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Conv2DTranspose, concatenate
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import f1_score, roc_auc_score, roc_curve, classification_report, accuracy_score


This imports essential libraries for handling arrays, images, JSON files, deep learning (Keras), and performance metrics (scikit-learn).

Improvement: If you’re only using certain functions from libraries (like pandas), consider importing only those to reduce overhead.

#### Loading JSON Files

In [None]:
def load_json(json_path):
    with open(json_path, 'r') as f:
        data = json.load(f)
    return data


#### Parsing

In [None]:
def parse_json(data):
    categories = {cat['id']: cat['name'] for cat in data['categories']}
    images = {img['id']: img['file_name'] for img in data['images']}
    return categories, images


#### Loading Images and Masks

In [None]:
def load_image_and_mask(image_path, mask_path):
    image = cv2.imread(image_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
    return image, mask


Loads an image and its corresponding mask using OpenCV.

Improvement: You could add an assert statement to check if files are loaded correctly to avoid issues downstream.

In [None]:
def plot_images_with_masks(images, masks, categories, category_names):
    fig, axes = plt.subplots(nrows=2, ncols=4, figsize=(10, 10))
    for i, (image_path, mask_path, category) in enumerate(zip(images, masks, categories)):
        image, mask = load_image_and_mask(image_path, mask_path)
        row, col = divmod(i, 4)
        ax_image = axes[0, col]
        ax_image.imshow(image)
        ax_image.axis('off')
        ax_image.set_title(f"Category: {category_names[category]}")
        ax_mask = axes[1, col]
        ax_mask.imshow(image)
        ax_mask.imshow(mask, cmap='jet', alpha=0.55)
        ax_mask.axis('off')
    plt.subplots_adjust(wspace=0, hspace=0)
    plt.tight_layout()
    plt.show()


Plots a grid of images and their corresponding masks with category names.

Improvement: You could add input validation to ensure that the lengths of images, masks, and categories are the same.

#### Loading Image and Mask Using PIL

In [None]:
def load_image_and_mask_pil(image_path, mask_path, target_size=(256, 256)):
    image = Image.open(image_path).convert('RGB')
    mask = Image.open(mask_path).convert('L')
    image = image.resize(target_size, Image.Resampling.LANCZOS)
    mask = mask.resize(target_size, Image.Resampling.LANCZOS)
    image = np.array(image)
    mask = np.array(mask)
    return image, mask


Uses PIL to load and resize images, converting them into arrays.

Improvement: Add error handling for potential issues with file paths or unsupported image formats.

#### Visualizing Batches of Images and Masks

In [None]:
def visualize_batch(images, masks):
    batch_size = len(images)
    fig, axes = plt.subplots(batch_size, 2, figsize=(10, batch_size * 5))
    for i in range(batch_size):
        ax_image = axes[i, 0]
        ax_image.imshow(images[i])
        ax_image.axis('off')
        ax_mask = axes[i, 1]
        ax_mask.imshow(masks[i], cmap='gray')
        ax_mask.axis('off')
    plt.tight_layout()
    plt.show()


Displays a batch of images and masks side by side.

Improvement: Check if images and masks lists are non-empty to prevent potential errors.

#### Dataset Generator

In [None]:
def dataset_generator(image_dir, mask_dir, annotations, batch_size, target_size=(256, 256)):
    image_info = annotations['images']
    while True:
        np.random.shuffle(image_info)
        for batch_start in range(0, len(image_info), batch_size):
            images = []
            masks = []
            for i in range(batch_start, min(batch_start + batch_size, len(image_info))):
                image_data = image_info[i]
                image_filename = image_data['file_name']
                image_path = os.path.join(image_dir, image_filename)
                mask_filename = f"{image_filename}_mask.png"
                mask_path = os.path.join(mask_dir, mask_filename)
                try:
                    image, mask = load_image_and_mask_pil(image_path, mask_path, target_size)
                except (FileNotFoundError, ValueError) as e:
                    print(f"Error loading image or mask: {e}")
                    continue
                images.append(image / 255.0)
                masks.append(mask / 255.0)
            yield np.array(images), np.array(masks)


Generates batches of images and masks for training.

Improvement: Include optional augmentation for training images to increase dataset variability.

#### U-Net Model with VGG16 Backbone

In [None]:
def unet_vgg16_model(input_shape):
    vgg_base = VGG16(weights='imagenet', include_top=False, input_shape=input_shape)
    for layer in vgg_base.layers:
        layer.trainable = False

    inputs = vgg_base.input
    c1 = vgg_base.get_layer('block1_conv2').output
    p1 = MaxPooling2D((2, 2))(c1)
    c2 = vgg_base.get_layer('block2_conv2').output
    p2 = MaxPooling2D((2, 2))(c2)
    c3 = vgg_base.get_layer('block3_conv3').output
    c4 = vgg_base.get_layer('block4_conv3').output

    u5 = Conv2DTranspose(256, (2, 2), strides=(2, 2), padding='same')(c4)
    u5 = concatenate([u5, c3])
    c5 = Conv2D(256, (3, 3), activation='relu', padding='same')(u5)
    c5 = Conv2D(256, (3, 3), activation='relu', padding='same')(c5)

    u6 = Conv2DTranspose(128, (2, 2), strides=(2, 2), padding='same')(c5)
    u6 = concatenate([u6, c2])
    c6 = Conv2D(128, (3, 3), activation='relu', padding='same')(u6)
    c6 = Conv2D(128, (3, 3), activation='relu', padding='same')(c6)

    u7 = Conv2DTranspose(64, (2, 2), strides=(2, 2), padding='same')(c6)
    u7 = concatenate([u7, c1])
    c7 = Conv2D(64, (3, 3), activation='relu', padding='same')(u7)
    c7 = Conv2D(64, (3, 3), activation='relu', padding='same')(c7)

    outputs = Conv2D(1, (1, 1), activation='sigmoid')(c7)
    model = Model(inputs=[inputs], outputs=[outputs])
    return model


Defines a U-Net model for image segmentation using VGG16 as the encoder backbone.

Improvement: Experiment with making different layers trainable to see if performance improves.

#### Plotting Training Metrics

In [None]:
def plot_training_metrics(history):
    plt.figure(figsize=(14, 5))
    plt.subplot(1, 2, 1)
    plt.plot(history.history['accuracy'], label='Training Accuracy')
    plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
    plt.title('Accuracy during Training and Validation')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.subplot(1, 2, 2)
    plt.plot(history.history['loss'], label='Training Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title('Loss during Training and Validation')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.show()


Visualizes the training and validation metrics over epochs.

Improvement: Add smoothing to the plots to make trends clearer.

#### Evaluating the Model

In [None]:
def evaluate_model(model, data_gen, steps):
    y_true = []
    y_pred = []
    for i in range(steps):
        x_batch, y_batch = next(data_gen)
        predictions = model.predict(x_batch)
        predictions_bin = (predictions >= 0.5).astype(np.int32)
        y_true.extend(y_batch.flatten())
        y_pred.extend(predictions_bin.flatten())
    f1 = f1_score(y_true, y_pred)
    auc = roc_auc_score(y_true, y_pred)
    return f1, auc


Calculates F1 score and AUC for model evaluation.

Improvement: Add additional metrics, like IoU (Intersection over Union), which is common for segmentation tasks.