In [1]:
!git clone https://github.com/JulienDelavande/hackaton_water_seg.git

Cloning into 'hackaton_water_seg'...
remote: Enumerating objects: 1687, done.[K
remote: Counting objects: 100% (11/11), done.[K
remote: Compressing objects: 100% (8/8), done.[K
remote: Total 1687 (delta 1), reused 6 (delta 0), pack-reused 1676[K
Receiving objects: 100% (1687/1687), 199.42 MiB | 27.33 MiB/s, done.
Resolving deltas: 100% (98/98), done.
Updating files: 100% (1944/1944), done.


In [3]:
!cd hackaton_water_seg/

In [7]:
import matplotlib.pyplot as plt
import numpy as np
import PIL
import os
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import os
from PIL import Image

In [8]:
# If colab
PREFOLDER = 'hackaton_water_seg/'

In [None]:
# else
PREFOLDER = './'

In [9]:
FOLDER_IMAGE_TRAIN = f'{PREFOLDER}dataset/trainset/images'
FOLDER_MASK_TRAIN = f'{PREFOLDER}dataset/trainset/masks'

FOLDER_IMAGE_TEST = f'{PREFOLDER}dataset/testset/images'

In [10]:
# see if each iamge has a mask
image_names = []
mask_names = []
for image in os.listdir(FOLDER_IMAGE_TRAIN):
    image_names.append(image.split('.')[0])
for image in os.listdir(FOLDER_MASK_TRAIN):
    mask_names.append(image.split('.')[0])

print('Number of images:', len(image_names))
print('Number of masks:', len(mask_names))

# check if each image has a mask
for image in image_names:
    if image not in mask_names:
        print('Image', image, 'has no mask')

Number of images: 891
Number of masks: 891


In [11]:
class WaterSegmentationDataset(Dataset):
    def __init__(self, images_dir, masks_dir, transform=None):
        self.images_dir = images_dir
        self.masks_dir = masks_dir
        self.transform = transform
        self.images = os.listdir(images_dir)

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_name = self.images[idx]
        img_path = os.path.join(self.images_dir, img_name)
        mask_path = os.path.join(self.masks_dir, img_name.replace('.jpg', '.png'))
        image = Image.open(img_path).convert("RGB")
        mask = Image.open(mask_path).convert("L")

        if self.transform:
            image = self.transform(image)
            mask = self.transform(mask)

        return image, mask

In [12]:
import torch.nn as nn

class DoubleConv(nn.Module):
    """(convolution => [BN] => ReLU) * 2"""

    def __init__(self, in_channels, out_channels, mid_channels=None):
        super().__init__()
        if not mid_channels:
            mid_channels = out_channels
        self.double_conv = nn.Sequential(
            nn.Conv2d(in_channels, mid_channels, kernel_size=3, padding=1),
            nn.BatchNorm2d(mid_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(mid_channels, out_channels, kernel_size=3, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True)
        )

    def forward(self, x):
        return self.double_conv(x)

class UNet(nn.Module):
    def __init__(self, n_channels, n_classes):
        super(UNet, self).__init__()
        self.n_channels = n_channels
        self.n_classes = n_classes

        self.inc = DoubleConv(n_channels, 64)
        self.down1 = nn.MaxPool2d(2)
        self.conv1 = DoubleConv(64, 128)
        self.down2 = nn.MaxPool2d(2)
        self.conv2 = DoubleConv(128, 256)
        self.down3 = nn.MaxPool2d(2)
        self.conv3 = DoubleConv(256, 512)

        self.up1 = nn.ConvTranspose2d(512, 256, kernel_size=2, stride=2)
        self.conv4 = DoubleConv(512, 256)
        self.up2 = nn.ConvTranspose2d(256, 128, kernel_size=2, stride=2)
        self.conv5 = DoubleConv(256, 128)
        self.up3 = nn.ConvTranspose2d(128, 64, kernel_size=2, stride=2)
        self.conv6 = DoubleConv(128, 64)

        self.outc = nn.Conv2d(64, n_classes, kernel_size=1)

    def forward(self, x):
        x1 = self.inc(x)
        x2 = self.down1(x1)
        x2 = self.conv1(x2)
        x3 = self.down2(x2)
        x3 = self.conv2(x3)
        x4 = self.down3(x3)
        x4 = self.conv3(x4)

        x5 = self.up1(x4)
        x5 = torch.cat([x5, x3], dim=1)
        x5 = self.conv4(x5)
        x6 = self.up2(x5)
        x6 = torch.cat([x6, x2], dim=1)
        x6 = self.conv5(x6)
        x7 = self.up3(x6)
        x7 = torch.cat([x7, x1], dim=1)
        x7 = self.conv6(x7)

        logits = self.outc(x7)
        return logits


In [17]:
def calculate_accuracy(outputs, masks):
    sigm = torch.sigmoid(outputs)  # Applique la sigmoïde pour obtenir des probabilités
    preds = sigm > 0.5  # Seuille les probabilités pour obtenir des prédictions binaires
    correct = (preds == masks).float()  # Compare les prédictions aux vrais masques
    accuracy = correct.sum() / (masks.numel() + 1e-10)  # Calcule l'accuracy
    return accuracy

In [30]:
import torch.optim as optim
from torch.utils.data import random_split
from sklearn.model_selection import train_test_split

# Paramètres
n_epochs = 25
batch_size = 32
learning_rate = 0.001

# Préparation des datasets et dataloaders
transform = transforms.Compose([
    transforms.Resize((480, 640)),
    transforms.ToTensor(),
])

dataset = WaterSegmentationDataset(images_dir=FOLDER_IMAGE_TRAIN, masks_dir=FOLDER_MASK_TRAIN, transform=transform)
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

# Initialisation du modèle, de l'optimiseur et de la fonction de perte
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = UNet(n_channels=3, n_classes=1).to(device)
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
criterion = nn.BCEWithLogitsLoss()

# Boucle d'entraînement avec calcul de l'accuracy
for epoch in range(n_epochs):
    model.train()
    train_loss = 0.0
    train_accuracy = 0.0
    val_loss = 0.0
    val_accuracy = 0.0
    num_batches_train = 0
    num_batches_val = 0

    for images, masks in train_loader:
        images = images.to(device)
        masks = masks.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, masks)
        accuracy = calculate_accuracy(outputs, masks)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        train_accuracy += accuracy.item()
        num_batches_train += 1

    # Validation
    model.eval()
    with torch.no_grad():
        for images, masks in val_loader:
            images = images.to(device)
            masks = masks.to(device)
            outputs = model(images)
            loss = criterion(outputs, masks)
            accuracy = calculate_accuracy(outputs, masks)

            val_loss += loss.item()
            val_accuracy += accuracy.item()
            num_batches_val += 1

    # Calcul et affichage des moyennes
    train_loss /= num_batches_train
    train_accuracy /= num_batches_train
    val_loss /= num_batches_val
    val_accuracy /= num_batches_val

    print(f'Epoch {epoch+1}, Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.4f}, Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.4f}')


OutOfMemoryError: CUDA out of memory. Tried to allocate 1.17 GiB. GPU 0 has a total capacty of 14.75 GiB of which 149.06 MiB is free. Process 5313 has 14.60 GiB memory in use. Of the allocated memory 13.78 GiB is allocated by PyTorch, and 702.37 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [23]:
def dataset_accuracy(model, data_loader, device):
    """
    Calcule l'accuracy moyenne sur un dataset entier.

    Parameters:
    - model: Le modèle PyTorch à évaluer.
    - data_loader: DataLoader contenant le dataset sur lequel calculer l'accuracy.
    - device: Le device sur lequel exécuter les calculs ('cpu' ou 'cuda').

    Returns:
    - L'accuracy moyenne sur l'ensemble du dataset.
    """
    model.eval()  # Met le modèle en mode évaluation
    total_accuracy = 0.0
    total_samples = 0

    with torch.no_grad():  # Pas besoin de calculer les gradients
        for images, masks in data_loader:
            images, masks = images.to(device), masks.to(device)
            outputs = model(images)
            accuracy = calculate_accuracy(outputs, masks)
            total_accuracy += accuracy.item() * images.size(0)  # Multiplie par le nombre d'échantillons dans le batch
            total_samples += images.size(0)

    return total_accuracy / total_samples

# Exemple d'utilisation
# Assurez-vous que model, train_loader (ou val_loader), et device sont correctement définis
model_accuracy = dataset_accuracy(model, val_loader, device)
print(f'Accuracy on the dataset: {model_accuracy:.4f}')


Accuracy on the dataset: 0.8681


In [19]:
import cv2
import numpy as np
import pandas as pd
import os

# Fonction pour charger et prédire les masques des images de test
def predict_masks(model, test_dir, output_dir):
    model.eval()
    test_images = os.listdir(test_dir)
    predictions = []
    for image_name in test_images:
        image_path = os.path.join(test_dir, image_name)
        image = Image.open(image_path).convert("RGB")
        image = transform(image).unsqueeze(0).to(device)

        with torch.no_grad():
            output = model(image)
            predicted_mask = torch.sigmoid(output).cpu().numpy()[0, 0]
            predicted_mask = (predicted_mask > 0.5).astype(np.uint8) * 255  # Thresholding

            # Save predicted mask
            cv2.imwrite(os.path.join(output_dir, image_name.replace('.jpg', '.png')), predicted_mask)


# Prédire et créer le fichier de soumission
test_dir = FOLDER_IMAGE_TEST
output_dir = "./predicted_masks"
os.makedirs(output_dir, exist_ok=True)
predict_masks(model, test_dir, output_dir)

# Utilisez le script create_submission_csv.py pour convertir le dossier des masques prédits en fichier CSV de soumission
# python3 create_submission_csv.py ./predicted_masks


In [22]:
NAME_SUBMISSION = './submissionV1.csv'
%run hackaton_water_seg/create_submission_csv.py ./predicted_masks -c './submissionV1.csv'

In [None]:
import shutil

def zip_predicted_masks(output_dir, zip_name):
    """
    Crée une archive zip du dossier contenant les masques prédits.

    Parameters:
    - output_dir (str): Chemin du dossier contenant les masques prédits.
    - zip_name (str): Chemin et nom du fichier zip à créer, sans l'extension .zip.
    """
    # Assurez-vous que l'extension .zip n'est pas incluse dans zip_name
    zip_name = zip_name if not zip_name.endswith('.zip') else zip_name[:-4]
    # Crée l'archive zip
    shutil.make_archive(zip_name, 'zip', output_dir)
    print(f'Created zip archive: {zip_name}.zip')

# Exemple d'utilisation de la fonction
NAME_PREDICTED_MASK = './predicted_mask_V1'
zip_predicted_masks(output_dir, "./predicted_masks_archive")


/content


In [29]:
!cd /content/hackaton_water_seg
!git push

fatal: not a git repository (or any of the parent directories): .git
