In [1]:
# Basic python import
import os
import sys
import yaml
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import tifffile

# Pytorch
import torch
from torch.utils.data import DataLoader, Dataset
import torch.optim as optim
import torch.nn as nn
import torchvision.transforms as transforms

sys.path.append('../input/')
import models

# Fonctions and classes used

In [4]:
class CustomTestDataset(Dataset):
    def __init__(self, root_dir, reshape_size):
        self.root_dir = root_dir
        self.image_files = os.listdir(os.path.join(root_dir, "test_images"))
        self.meta_df = pd.read_csv(os.path.join(root_dir,'test.csv')).sort_values(by = 'id')
        self.format_transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Resize((reshape_size, reshape_size))
        ])

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        image_path = os.path.join(self.root_dir, "test_images", self.image_files[idx])
        image_id = self.image_files[idx][:-5]
        image = tifffile.imread(image_path)
        organ = self.meta_df[self.meta_df["id"] == int(self.image_files[idx][:-5])]["organ"].values[0]
        image_tensor = self.format_transform(image)

        return (image_id, image_tensor, organ)

def get_test_dataset_and_dataloader(batch_size = 4,input_size = 1024, root_dir = os.path.join('..','data')):
    """
    Load the training and test datasets into data loaders.
    """

    test_dataset = CustomTestDataset(root_dir = root_dir, reshape_size = 1024)

    if batch_size > 1:
        test_dl = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)
        return test_dataset, test_dl

    return test_dataset, None

def make_submission(model, device, test_df, test_dataset, threshold):
    submission = {'id':[], 'rle':[]}
    linestart = None
    for id, image, organ in test_dataset:
        mask = model(torch.unsqueeze(image, dim=0).to(device)).cpu().detach().numpy() # Depend on the test dataloader
        submission["id"].append(id)
        rle = ""
        binary_mask = (mask> threshold).astype(np.uint8)[0][0]
        mask_size = binary_mask.shape[0]
        for k, line in enumerate(binary_mask):
            for l, pixel in enumerate(line):
                if pixel == 0 and linestart : # End current segment
                    lineend = k * mask_size + l - 1
                    rle = rle + f"{linestart} {lineend} "
                    linestart = None
                if linestart and l == mask_size - 1:# Segment end at border
                    lineend = k * mask_size + l
                    rle = rle + f"{linestart} {lineend} "
                    linestart = None
                elif pixel == 1 and not(linestart) : # Start new segment
                    linestart = k * mask_size
                    print(linestart)
        submission['rle'].append(rle)

    # Create .csv
    #test_df = pd.read_csv("HuBMAP-tissue-segmentation/data/" + "test.csv")
    print(submission)
    sub = pd.DataFrame(submission)
    sub.to_csv('submission.csv', index=False)

# Config

Ma proposition : 
Avoir un Notebook template associé à des paramètres par défaut. 
Ensuite pour chaque expérience on le duplique, et on change les valeurs des paramètres que l'on souhaite.

In [5]:
data_dir = os.path.join('../input','hubmap-organ-segmentation')

DEBUG = False

batch_size = 1

device = torch.device("cuda:0" if torch.cuda.is_available() else 'cpu')

# Create Dataset and Dataloader

In [6]:
test_dataset, test_dataloader = get_test_dataset_and_dataloader(batch_size=batch_size ,input_size=1024,root_dir = data_dir)

# Load model

In [7]:
model_name = '../input/models/test_model.pt'
MODEL = torch.load(model_name)

# Submission

In [11]:
threshold_best = 0.56

test_df = None

make_submission(MODEL, device, test_df, test_dataset, threshold_best)

{'id': ['10078'], 'rle': ['']}
