In [111]:
%load_ext autoreload
%autoreload 2
import rasterio
import os
import numpy as np
from tqdm import tqdm
import pandas as pd
import torch
from torchvision import transforms
import utilities
from evaluation import rle_encoder_decoder
from evaluation import evaluation_function
from unet import UNet

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### Deep Learning Model Architecture

The following cell was ran on kaggle with different hyperparameters to save time  
and selected hyperparameters are shown below

In [None]:
model = UNet()
utilities.run(model, lr=1e-3, num_epochs=30, input_dir='inputs/cleaned', patience=6, gamma=0.5, batch_size=4)

In [None]:
model = UNet()
model.load_state_dict(torch.load('models/unet_model.pkl'))
with open('thresholds/unet_best_threshold.txt', 'r') as f:
    best_threshold = float(f.read())
print(best_threshold)

0.46666666666666673


  model.load_state_dict(torch.load('models/unet_model.pkl'))


In [109]:
data_files = [f for f in os.listdir('inputs/cleaned/data')]
_, _, test_files = utilities.split_dataset(data_files)
print("Test: ", len(test_files))
mean, std = utilities.getMeanAndStd(data_files)
transform = utilities.transforms.Compose([transforms.Normalize(mean, std)])
dataloader = utilities.get_data_loader(test_files, trans=transform, input_dir='inputs/cleaned', batch_size=4, shuffle=False)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = model.to(device)

Test:  1379


### Evaluating our DL model

In [9]:
utilities.validate_one_epoch(model, dataloader, device, thresholds=[best_threshold])

                                                                                                       

Best threshold: 0.47 with loss: 0.0912 with Dice: 0.9325




(0.932514849264091, 0.46666666666666673)

In [103]:
torch.save(model, 'models/whole_unet_model.pkl')

In [110]:
input_dir = 'inputs/reviewed'
data_files = [f for f in os.listdir(input_dir + '/data')]

ids = []
originals = []
preds = []

for f in tqdm(data_files, desc="Calculating Score"):
    original_mask_path = input_dir + '/masks/' + f
    original_image_path = input_dir + '/data/' + f

    id = os.path.splitext(f)[0]

    pred = utilities.inference(model, original_image_path, best_threshold, transform)
    if np.all(pred == 0): continue

    with rasterio.open(original_mask_path) as src:
        original_mask = src.read(1).astype(np.float32)
    if np.all(original_mask == 0): continue

    original = rle_encoder_decoder.rle_encode(original_mask)
    pred = rle_encoder_decoder.rle_encode(pred)

    preds.append(pred)
    originals.append(original)
    ids.append(id)

submission_df = pd.DataFrame({
    'id' : ids,
    'segmentation': preds
})

solution_df = pd.DataFrame({
    'id' : ids,
    'segmentation': originals
})
    
print(evaluation_function.score(solution_df, submission_df, 'id'))

Calculating Score: 100%|██████████| 850/850 [01:24<00:00, 10.06it/s]


0.9194284259589189
