In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('C:/Users/Jaber/OneDrive - University of Florida/Educational/GitHub/Datasets/ImageSegmentation/Dental_XRay_Computacional_Vision_Segmentation'):
    for filename in filenames:
        print(os.path.join(dirname, filename))


import json
import numpy as np
import cv2
import os
import matplotlib.pyplot as plt

# Passo 1: Leitura e Processamento do JSON 
## Primeiro, precisamos ler e processar o arquivo JSON para extrair as informações de rótulos e categorias:
# Função para carregar o arquivo JSON
def load_json(json_path):
    with open(json_path, 'r') as f:
        data = json.load(f)
    return data

# Exemplo de leitura de um JSON
json_path = 'C:/Users/Jaber/OneDrive - University of Florida/Educational/GitHub/Datasets/ImageSegmentation/Dental_XRay_Computacional_Vision_Segmentation/train_annotations.coco.json'
data = load_json(json_path)

# Função para mapear as categorias e imagens
def parse_json(data):
    categories = {cat['id']: cat['name'] for cat in data['categories']}
    images = {img['id']: img['file_name'] for img in data['images']}

    return categories, images

# Extraindo categorias e imagens
categories, images = parse_json(data)

print("Categorias:", categories)
print("Imagens:", images)


categories_df = pd.DataFrame(list(categories.items()), columns=['Category_ID', 'Category_Name'])

images_df = pd.DataFrame(list(images.items()), columns=['Image_ID', 'File_Name'])

categories_df

images_df

def load_image_and_mask(image_path, mask_path):
    image = cv2.imread(image_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
    return image, mask

def plot_images_with_masks(images, masks, categories, category_names):
    fig, axes = plt.subplots(nrows=2, ncols=4, figsize=(10, 10))
    for i, (image_path, mask_path, category) in enumerate(zip(images, masks, categories)):
        image, mask = load_image_and_mask(image_path, mask_path)
        row, col = divmod(i, 4)
        ax_image = axes[0, col]
        ax_image.imshow(image)
        ax_image.axis('off')
        ax_image.set_title(f"Categoria: {category_names[category]}")
        ax_mask = axes[1, col]
        ax_mask.imshow(image)
        ax_mask.imshow(mask, cmap='jet', alpha=0.55)
        ax_mask.axis('off')
    plt.subplots_adjust(wspace=0, hspace=0)
    plt.tight_layout()
    plt.show()

category_names = {0: 'vzrad2', 1: 'Caries', 2: 'Crown', 3: 'Filling'}

image_dir = 'C:/Users/Jaber/OneDrive - University of Florida/Educational/GitHub/Datasets/ImageSegmentation/Dental_XRay_Computacional_Vision_Segmentation/Dental X_Ray/train'
mask_dir = 'C:/Users/Jaber/OneDrive - University of Florida/Educational/GitHub/Datasets/ImageSegmentation/Dental_XRay_Computacional_Vision_Segmentation/Dental X_Ray/train/train_mask'

images = [
    os.path.join(image_dir, '0a4f2d22-Hematian_Fariba_57y_31052021_132542_jpg.rf.6bfcf8cfb273a9a0767ea25499da98bc.jpg'),
    os.path.join(image_dir, '3975890000-jpg_png_jpg.rf.1d1b03af3734eba73e9d12a295578027.jpg'),
    os.path.join(image_dir, '4054820000-jpg_png_jpg.rf.1d4a4d0a6fd081280376d10fc085cffa.jpg'),
    os.path.join(image_dir, '3794860000-jpg_png_jpg.rf.1cb8e683ddd8a1131d0ffd0c53ee4ac0.jpg')
]

masks = [
    os.path.join(mask_dir, '0a4f2d22-Hematian_Fariba_57y_31052021_132542_jpg.rf.6bfcf8cfb273a9a0767ea25499da98bc.jpg_mask.png'),
    os.path.join(mask_dir, '3975890000-jpg_png_jpg.rf.1d1b03af3734eba73e9d12a295578027.jpg_mask.png'),
    os.path.join(mask_dir, '4054820000-jpg_png_jpg.rf.1d4a4d0a6fd081280376d10fc085cffa.jpg_mask.png'),
    os.path.join(mask_dir, '3794860000-jpg_png_jpg.rf.1cb8e683ddd8a1131d0ffd0c53ee4ac0.jpg_mask.png')
]

categories = [0, 1, 2, 3]

plot_images_with_masks(images, masks, categories, category_names)

from PIL import Image
from tensorflow.keras.applications import VGG16
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Conv2DTranspose, concatenate
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

def load_annotations(json_path):
    with open(json_path, 'r') as f:
        annotations = json.load(f)
    return annotations

def load_image_and_mask(image_path, mask_path, target_size=(256, 256)):
    image = Image.open(image_path).convert('RGB')
    mask = Image.open(mask_path).convert('L')
    image = image.resize(target_size, Image.Resampling.LANCZOS)
    mask = mask.resize(target_size, Image.Resampling.LANCZOS)
    image = np.array(image)
    mask = np.array(mask)
    return image, mask

def visualize_batch(images, masks):
    batch_size = len(images)
    fig, axes = plt.subplots(batch_size, 2, figsize=(10, batch_size * 5))
    for i in range(batch_size):
        ax_image = axes[i, 0]
        ax_image.imshow(images[i])
        ax_image.axis('off')
        ax_mask = axes[i, 1]
        ax_mask.imshow(masks[i], cmap='gray')
        ax_mask.axis('off')
    plt.tight_layout()
    plt.show()

def dataset_generator(image_dir, mask_dir, annotations, batch_size, target_size=(256, 256)):
    image_info = annotations['images']
    while True:
        np.random.shuffle(image_info)
        for batch_start in range(0, len(image_info), batch_size):
            images = []
            masks = []
            for i in range(batch_start, min(batch_start + batch_size, len(image_info))):
                image_data = image_info[i]
                image_filename = image_data['file_name']
                image_path = os.path.join(image_dir, image_filename)
                mask_filename = f"{image_filename}_mask.png"
                mask_path = os.path.join(mask_dir, mask_filename)
                try:
                    image, mask = load_image_and_mask(image_path, mask_path, target_size)
                except (FileNotFoundError, ValueError) as e:
                    print(f"Error loading image or mask: {e}")
                    continue
                images.append(image / 255.0)
                masks.append(mask / 255.0)
            yield np.array(images), np.array(masks)

def unet_vgg16_model(input_shape):
    vgg_base = VGG16(weights='imagenet', include_top=False, input_shape=input_shape)
    for layer in vgg_base.layers:
        layer.trainable = False

    inputs = vgg_base.input
    c1 = vgg_base.get_layer('block1_conv2').output
    p1 = MaxPooling2D((2, 2))(c1)
    c2 = vgg_base.get_layer('block2_conv2').output
    p2 = MaxPooling2D((2, 2))(c2)
    c3 = vgg_base.get_layer('block3_conv3').output
    c4 = vgg_base.get_layer('block4_conv3').output

    u5 = Conv2DTranspose(256, (2, 2), strides=(2, 2), padding='same')(c4)
    u5 = concatenate([u5, c3])
    c5 = Conv2D(256, (3, 3), activation='relu', padding='same')(u5)
    c5 = Conv2D(256, (3, 3), activation='relu', padding='same')(c5)

    u6 = Conv2DTranspose(128, (2, 2), strides=(2, 2), padding='same')(c5)
    u6 = concatenate([u6, c2])
    c6 = Conv2D(128, (3, 3), activation='relu', padding='same')(u6)
    c6 = Conv2D(128, (3, 3), activation='relu', padding='same')(c6)

    u7 = Conv2DTranspose(64, (2, 2), strides=(2, 2), padding='same')(c6)
    u7 = concatenate([u7, c1])
    c7 = Conv2D(64, (3, 3), activation='relu', padding='same')(u7)
    c7 = Conv2D(64, (3, 3), activation='relu', padding='same')(c7)

    outputs = Conv2D(1, (1, 1), activation='sigmoid')(c7)
    model = Model(inputs=[inputs], outputs=[outputs])
    return model

input_shape = (256, 256, 3)
batch_size = 8

train_image_dir = 'C:/Users/Jaber/OneDrive - University of Florida/Educational/GitHub/Datasets/ImageSegmentation/Dental_XRay_Computacional_Vision_Segmentation/Dental X_Ray/train'
train_mask_dir = 'C:/Users/Jaber/OneDrive - University of Florida/Educational/GitHub/Datasets/ImageSegmentation/Dental_XRay_Computacional_Vision_Segmentation/Dental X_Ray/train/train_mask'
train_annotation_file = 'C:/Users/Jaber/OneDrive - University of Florida/Educational/GitHub/Datasets/ImageSegmentation/Dental_XRay_Computacional_Vision_Segmentation/train_annotations.coco.json'

valid_image_dir = 'C:/Users/Jaber/OneDrive - University of Florida/Educational/GitHub/Datasets/ImageSegmentation/Dental_XRay_Computacional_Vision_Segmentation/Dental X_Ray/valid'
valid_mask_dir = 'C:/Users/Jaber/OneDrive - University of Florida/Educational/GitHub/Datasets/ImageSegmentation/Dental_XRay_Computacional_Vision_Segmentation/Dental X_Ray/valid/valid_mask'
valid_annotation_file = 'C:/Users/Jaber/OneDrive - University of Florida/Educational/GitHub/Datasets/ImageSegmentation/Dental_XRay_Computacional_Vision_Segmentation/valid_annotations.coco.json'

train_annotations = load_annotations(train_annotation_file)
valid_annotations = load_annotations(valid_annotation_file)

train_data_gen = dataset_generator(train_image_dir, train_mask_dir, train_annotations, batch_size, target_size=(256, 256))
valid_data_gen = dataset_generator(valid_image_dir, valid_mask_dir, valid_annotations, batch_size, target_size=(256, 256))

images, masks = next(train_data_gen)
visualize_batch(images, masks)

train_steps_per_epoch = len(train_annotations['images']) // batch_size
valid_steps_per_epoch = len(valid_annotations['images']) // batch_size

model = unet_vgg16_model(input_shape)
model.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['accuracy'])

history = model.fit(
    train_data_gen,
    steps_per_epoch=train_steps_per_epoch,
    validation_data=valid_data_gen,
    validation_steps=valid_steps_per_epoch,
    epochs=7,
    verbose=1
)

from sklearn.metrics import f1_score, roc_auc_score

def plot_training_metrics(history):
    plt.figure(figsize=(14, 5))

    plt.subplot(1, 2, 1)
    plt.plot(history.history['accuracy'], label='Training Accuracy')
    plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
    plt.title('Accuracy during Training and Validation')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()

    plt.subplot(1, 2, 2)
    plt.plot(history.history['loss'], label='Training Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title('Loss during Training and Validation')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()

    plt.show()

plot_training_metrics(history)

model.save('vgg16_unet_model.h5')

import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import f1_score, roc_auc_score, roc_curve, classification_report, accuracy_score

def evaluate_model(model, data_gen, steps):
    y_true = []
    y_pred = []
    
    for i in range(steps):
        x_batch, y_batch = next(data_gen)
        predictions = model.predict(x_batch)
        predictions_bin = (predictions >= 0.5).astype(np.int32)
        y_true.extend(y_batch.flatten())
        y_pred.extend(predictions_bin.flatten())
        
        if i % 20 == 0:
            plot_single_prediction(x_batch, y_batch, predictions_bin)
    
    y_true = np.array(y_true)
    y_pred = np.array(y_pred)
    
    print(f"Unique values in y_true: {np.unique(y_true)}")
    print(f"Unique values in y_pred: {np.unique(y_pred)}")

    if np.array_equal(np.unique(y_true), [0, 1]) and np.array_equal(np.unique(y_pred), [0, 1]):
        f1 = f1_score(y_true, y_pred)
        auc = roc_auc_score(y_true, y_pred)

        print(f'F1-Score: {f1}')
        print(f'AUC: {auc}')

        fpr, tpr, _ = roc_curve(y_true, y_pred)
        plt.figure()
        plt.plot(fpr, tpr, marker='.')
        plt.title('ROC Curve')
        plt.xlabel('False Positive Rate')
        plt.ylabel('True Positive Rate')
        plt.show()

        print(classification_report(y_true, y_pred))
    else:
        print("Error: y_true or y_pred is not in the expected binary format.")

def plot_single_prediction(images, masks_true, masks_pred):
    image = images[0]
    mask_true = masks_true[0]
    mask_pred = masks_pred[0]
    mask_true_bin = (mask_true >= 0.5).astype(np.int32)
    accuracy = accuracy_score(mask_true_bin.flatten(), mask_pred.flatten())
    
    fig, ax = plt.subplots(1, 3, figsize=(15, 5))
    ax[0].imshow(image)
    ax[0].set_title('Original Image')
    ax[0].axis('off')
    ax[1].imshow(mask_true.squeeze(), cmap='gray')
    ax[1].set_title('Original Mask')
    ax[1].axis('off')
    ax[2].imshow(image)
    ax[2].imshow(mask_pred.squeeze(), cmap='jet', alpha=0.5)
    ax[2].set_title(f'Predicted Mask (Accuracy: {accuracy:.2f})')
    ax[2].axis('off')
    plt.tight_layout()
    plt.show()

test_image_dir = 'C:/Users/Jaber/OneDrive - University of Florida/Educational/GitHub/Datasets/ImageSegmentation/Dental_XRay_Computacional_Vision_Segmentation/Dental X_Ray/test'
test_mask_dir = 'C:/Users/Jaber/OneDrive - University of Florida/Educational/GitHub/Datasets/ImageSegmentation/Dental_XRay_Computacional_Vision_Segmentation/Dental X_Ray/test/test_mask'
test_annotation_file = 'C:/Users/Jaber/OneDrive - University of Florida/Educational/GitHub/Datasets/ImageSegmentation/Dental_XRay_Computacional_Vision_Segmentation/test_annotations.coco.json'
test_annotations = load_annotations(test_annotation_file)

test_data_gen = dataset_generator(test_image_dir, test_mask_dir, test_annotations, batch_size, target_size=(256, 256))
test_steps = len(test_annotations['images']) // batch_size

evaluate_model(model, test_data_gen, test_steps)
