In [None]:
""" 
Imatges de sample data > 29 > 15.08.24-16.08.24

"""

In [None]:
"""
pip install torch torchvision torchaudio
pip install detectron2 opencv-python matplotlib numpy
pip install tensorflow tensorflow-datasets
pip install mrcnn

"""

'\ncal tmb:\n\nsudo apt update\nsudo apt install -y qtbase5-dev qtchooser qt5-qmake qtbase5-dev-tools libxcb-xinerama0\n\n'

### Define dataset:

In [19]:
import torch
import os
from torch.utils.data import Dataset
from PIL import Image
import numpy as np
import json
from torchvision import transforms

class BabySegmentationDataset(Dataset):
    def __init__(self, image_folder, annotation_folder, image_list, transforms=None):
        """
        Args:
            image_folder (string): Path to the folder containing images.
            annotation_folder (string): Path to the folder containing annotations.
            image_list (list): List of image names (from train.txt).
            transforms (callable, optional): A function/transform to apply to the image.
        """
        self.image_folder = image_folder
        self.annotation_folder = annotation_folder
        self.image_list = image_list
        self.transforms = transforms

    def __len__(self):
        return len(self.image_list)

    def __getitem__(self, idx):
        # Load image
        img_name = self.image_list[idx] + '.jpg'
        img_path = os.path.join(self.image_folder, img_name)
        image = Image.open(img_path).convert("RGB")
        
        # Load annotation (binary mask)
        mask_name = self.image_list[idx] + '.png'
        mask_path = os.path.join(self.annotation_folder, mask_name)
        mask = Image.open(mask_path)
        
        # Convert mask to numpy array
        mask = np.array(mask)

        # Create a binary mask where 1 represents baby and 0 is the background
        # (assuming your segmentation mask has 'baby' in it as a distinct color or value)
        # You may need to modify this based on the exact annotation format.
        baby_mask = mask == 1  # This assumes the mask uses 1 for baby pixels

        # Prepare the target for Mask R-CNN
        target = {}
        target['boxes'] = torch.tensor([[0, 0, image.width, image.height]])  # Define a dummy bounding box for simplicity
        target['labels'] = torch.tensor([1])  # Assume 'baby' is label 1
        target['masks'] = torch.tensor(baby_mask, dtype=torch.uint8).unsqueeze(0)  # Add a channel dimension for the mask

        # Apply transformations
        if self.transforms:
            image = self.transforms(image)

        return image, target


# Set up transforms (if any)
transform = transforms.Compose([
    transforms.ToTensor(),
])

# Prepare your list of image names from 'train.txt'
with open('/home/martina/codi2/3year/synthesisII/prova_dataset/ImageSets/Main/train.txt', 'r') as f:
    train_images = [line.strip() for line in f.readlines()]

# Define dataset and dataloaders
train_dataset = BabySegmentationDataset(
    image_folder='/home/martina/codi2/3year/synthesisII/prova_dataset/JPEGImages',
    annotation_folder='/home/martina/codi2/3year/synthesisII/prova_dataset/SegmentationClass',
    image_list=train_images[:5],  # Using the first 5 images for training
    transforms=transform
)

val_dataset = BabySegmentationDataset(
    image_folder='/home/martina/codi2/3year/synthesisII/prova_dataset/JPEGImages',
    annotation_folder='/home/martina/codi2/3year/synthesisII/prova_dataset/SegmentationClass',
    image_list=train_images[5:7],  # Using the next 2 images for testing
    transforms=transform
)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=2, shuffle=True, collate_fn=lambda x: tuple(zip(*x)))
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=2, shuffle=False, collate_fn=lambda x: tuple(zip(*x)))


In [20]:
import torch
from torchvision import models

# Load pre-trained Mask R-CNN model
model = models.detection.maskrcnn_resnet50_fpn(pretrained=True)

# Modify the model to match the number of classes in your dataset
num_classes = 2  # 1 class (baby) + background
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = models.detection.faster_rcnn.FastRCNNPredictor(in_features, num_classes)
model.roi_heads.mask_predictor = models.detection.mask_rcnn.MaskRCNNPredictor(in_features, 256, num_classes)

# Move the model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    for images, targets in train_loader:
        images = [image.to(device) for image in images]
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
        
        optimizer.zero_grad()
        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())
        
        losses.backward()
        optimizer.step()

    print(f"Epoch {epoch} Loss: {losses.item()}")


: 

In [None]:
model.eval()
with torch.no_grad():
    for images, targets in val_loader:
        images = [image.to(device) for image in images]
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
        
        prediction = model(images)  # Output of the model
        
        # Here you can evaluate your predictions (compare masks and ground truths, etc.)
