Check the running torch version

In [1]:
import torch
torch.__version__

'1.2.0'

In [2]:
import sys

assert sys.version_info >= (3, 6) # Python ≥3.6 required

In [3]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

# Build a lines segmentation model based on unet architecture 

Here is [the paper](https://arxiv.org/pdf/1505.04597.pdf).

Checkout this [cheat sheet](https://pytorch.org/tutorials/beginner/ptcheat.html?highlight=loss) as a quick referesher.

In [4]:
%load_ext autoreload
%autoreload 2

import numpy as np

import matplotlib.pyplot as plt
%matplotlib inline

import sys
sys.path.append('..')

from pathlib import Path
Path.ls = lambda x: list(x.iterdir())  # Source: https://github.com/fastai/fastai/blob/master/fastai/core.py#L236

from tqdm import tqdm

from PIL import Image

import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils

from torchsummary import summary

In [5]:
from src.datasets import Dataset
from src.networks import UNetMini
from src.utils import img_to_array, array_to_img
from src.utils import to_categorical

In [6]:
lines_detector_path = Dataset.processed_data_path()/"lines-segmentation"

lines_detector_train_images_path = lines_detector_path/"train"/"images"
lines_detector_train_masks_path = lines_detector_path/"train"/"masks"

lines_detector_valid_images_path = lines_detector_path/"valid"/"images"
lines_detector_valid_masks_path = lines_detector_path/"valid"/"masks"

lines_detector_test_images_path = lines_detector_path/"test"/"images"
lines_detector_test_masks_path = lines_detector_path/"test"/"masks"

Hyperparameters

In [7]:
batch_size = 16
epochs = 4
learning_rate = 0.001

number_of_classes = 3

In [8]:
model_save_path = Path("../weights/lines_segmentation.ckpt")

## Load dataset

In [9]:
def load_images_and_masks_in_path(images_path: Path, masks_path: Path):
    x = []
    y = []

    sorted_image_names = sorted(images_path.glob("*.png"))
    sorted_mask_names = sorted(masks_path.glob("*.png"))
    for image_file_name, mask_file_name in tqdm(zip(sorted_image_names, sorted_mask_names)):
        image = img_to_array(Image.open(image_file_name))
        mask = img_to_array(Image.open(mask_file_name))

        x.append(image)
        y.append(mask)
        
    return np.array(x), np.array(y)

train_images, train_masks = load_images_and_masks_in_path(lines_detector_train_images_path, lines_detector_train_masks_path)
valid_images, valid_masks = load_images_and_masks_in_path(lines_detector_valid_images_path, lines_detector_valid_masks_path)
test_images, test_masks = load_images_and_masks_in_path(lines_detector_test_images_path, lines_detector_test_masks_path)

986it [00:03, 300.57it/s]
246it [00:00, 291.83it/s]
307it [00:01, 300.55it/s]


In [10]:
def get_transformations(train=False):
    transformations = []
    
    # Converts the image, a PIL image, into a PyTorch Tensor
    # It swap color axis because
    # numpy image: H x W x C
    # torch image: C X H X W
    transformations.append(transforms.ToTensor())
    
    # Make sure we apply the same transformations to the input image and mask images
    if train:
        # during training, do some more transformations
#         transformations.append(transforms.RandomHorizontalFlip())
#         transformations.append(transforms.RandomPerspective())
#         transformations.append(transforms.RandomRotation(degrees=(-40, 40)))
        pass
    
    return transforms.Compose(transformations)

In [21]:
# mask = train_masks[0]
# mask = mask / 127
# mask[np.logical_and(mask > .8, mask < 1.7)] = 1
# mask[mask >= 1.7] = 2
# mask[mask <= .8] = 0
# mask = mask.astype(np.uint8)
# mask.shape
# mask.reshape(mask.shape[1], mask.shape[2])

IndexError: tuple index out of range

In [11]:
class FormsDataset(Dataset):

    def __init__(self, images, masks, num_classes: int, transforms=None):
        self.images = images
        self.masks = masks
        self.num_classes = num_classes
        self.transforms = transforms

    def __getitem__(self, idx):
        image = self.images[idx]
        image = np.expand_dims(image, -1)
        image = image / 255
        image = image.astype(np.float32)
#         image = np.transpose(image, (2, 1, 0))

#         seed = random.randint(0, 2**31 - 1)
#         random.seed(seed) # apply this seed to img tranfsorms
#         torch.manual_seed(seed)
        if self.transforms:
            image = self.transforms(image)
            
        mask = self.masks[idx]
        mask = mask / 127
        mask[np.logical_and(mask > .8, mask < 1.7)] = 1
        mask[mask >= 1.7] = 2
        mask[mask <= .8] = 0
        mask = mask.astype(np.uint8)
#         mask = to_categorical(mask, self.num_classes).astype(np.int)
#         mask = np.transpose(mask, (2, 1, 0))

#         random.seed(seed) # apply this seed to target tranfsorms
#         torch.manual_seed(seed)
        if self.transforms:
            mask = self.transforms(mask)

        return image, mask

    def __len__(self):
        return len(self.images)


# TODO :: (channels, H, W)
train_dataset = FormsDataset(train_images, train_masks, number_of_classes, get_transformations(True))
train_data_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
print(f'Train dataset has {len(train_data_loader)} batches of size {batch_size}')

valid_dataset = FormsDataset(valid_images, valid_masks, number_of_classes, get_transformations(False))
valid_data_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=True)
print(f'Valid dataset has {len(valid_data_loader)} batches of size {batch_size}')

test_dataset = FormsDataset(test_images, test_masks, number_of_classes, get_transformations(False))
test_data_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)
print(f'Valid dataset has {len(test_data_loader)} batches of size {batch_size}')

Train dataset has 62 batches of size 16
Valid dataset has 16 batches of size 16
Valid dataset has 20 batches of size 16


In [12]:
image, mask = train_dataset[0]
image.shape, mask.shape

(torch.Size([1, 256, 256]), torch.Size([1, 256, 256]))

In [13]:
for image, mask in train_data_loader:
    print(f"{image.shape}, {mask.shape}")
    break

torch.Size([16, 1, 256, 256]), torch.Size([16, 1, 256, 256])


## Build the model

In [14]:
model = UNetMini(number_of_classes).to(device)

summary(model, input_size=(1, 256, 256))  # (channels, H, W)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 32, 256, 256]             320
              ReLU-2         [-1, 32, 256, 256]               0
         Dropout2d-3         [-1, 32, 256, 256]               0
            Conv2d-4         [-1, 32, 256, 256]           9,248
              ReLU-5         [-1, 32, 256, 256]               0
         MaxPool2d-6         [-1, 32, 128, 128]               0
            Conv2d-7         [-1, 64, 128, 128]          18,496
              ReLU-8         [-1, 64, 128, 128]               0
         Dropout2d-9         [-1, 64, 128, 128]               0
           Conv2d-10         [-1, 64, 128, 128]          36,928
             ReLU-11         [-1, 64, 128, 128]               0
        MaxPool2d-12           [-1, 64, 64, 64]               0
           Conv2d-13          [-1, 128, 64, 64]          73,856
             ReLU-14          [-1, 128,

## Train the model

In [15]:
# Loss and optimizer
criterion = torch.nn.NLLLoss()  # CrossEntropyLoss()  # This criterion combines nn.LogSoftmax() and nn.NLLLoss() in one single class
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [24]:
# Train the model
total_steps = len(train_data_loader)
for epoch in range(epochs):
    for i, (images, masks) in enumerate(train_data_loader, 1):
        images = images.to(device)
        masks = masks.to(device).type(torch.LongTensor)
        masks = masks.reshape(masks.shape[0], masks.shape[2], masks.shape[3])
        
        # Forward pass
        outputs = model(images)
        softmax = F.log_softmax(outputs, dim=1)
#         print(f"softmax {softmax.shape}, masks {masks.shape}")
        loss = criterion(softmax, masks)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if (i) % 10 == 0:
            print (f"Epoch [{epoch + 1}/{num_epochs}], Step [{i}/{total_steps}], Loss: {loss.item():.4f}")

softmax torch.Size([16, 3, 256, 256]), masks torch.Size([16, 256, 256])
softmax torch.Size([16, 3, 256, 256]), masks torch.Size([16, 256, 256])
softmax torch.Size([16, 3, 256, 256]), masks torch.Size([16, 256, 256])
softmax torch.Size([16, 3, 256, 256]), masks torch.Size([16, 256, 256])


KeyboardInterrupt: 

## Test the model

In [None]:
# Test the model
model.eval()  # eval mode (batchnorm uses moving mean/variance instead of mini-batch mean/variance)
with torch.no_grad():
    correct = 0
    total = 0
    for images, masks in test_loader:
        images = images.to(device)
        masks = masks.to(device)
        
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == masks).sum().item()

    print(outputs)
    print('Test Accuracy of the model on the 10000 test images: {} %'.format(100 * correct / total))

# Save the model

In [None]:
torch.save(model.state_dict(), model_save_path)

## Restore the model

## Predict on one image

In [None]:
image_array = x[0]
test_batch = image_array.reshape((1,) + image_array.shape)

preds = model.predict(test_batch)
preds.shape

In [None]:
# Merge the 3 channel classes into one for color
pred_image_array = np.expand_dims(np.argmax(preds[0], axis=2), axis=-1)
array_to_img(pred_image_array)

In [None]:
np.min(preds[0]), np.max(preds[0])

In [None]:
np.min(pred_image_array), np.max(pred_image_array)

In [None]:
np.argmax(preds[0], axis=1).shape

In [None]:
np.argmax(preds[0], axis=2).shape

In [None]:
np.expand_dims(np.argmax(preds[0], axis=2), axis=-1).shape

In [None]:
pred_image_array.shape

Check predication array

In [None]:
for row in preds[0]:
    if np.max(row) > 0:
        print(row)

In [None]:
np.argmax(preds[0], axis=2).shape

In [None]:
for row in np.argmax(preds[0], axis=2):
    if np.max(row) > 0:
        print(row)
    else:
        print(">> EMPTY ROW <<")

Check target array

In [None]:
for row in y[0]:
    if np.max(row) > 0:
        print(row)

In [None]:
np.argmax(y[0], axis=2).shape

In [None]:
for row in np.argmax(y[0], axis=2):
    if np.max(row) > 0:
        print(row)
    else:
        print(">> EMPTY ROW <<")