In [1]:

import json

import albumentations as A
import cv2
import matplotlib.pyplot as plt
import numpy as np
import torch
from PIL import Image
from torch import nn
from torch.utils.data import Dataset, DataLoader

from settings import BASE_DIR

/Users/akhil/code/ml_gallery/ml_py


In [2]:

def plot_img(image, landmarks=None, circles=None, circles2=None, landmarks2=None):
    """
    image: np.array of shape (c, h, w)
    landmarks: np.array of shape (n, 2)
    circles: np.array of shape (n, 3)
    """
    plt.imshow(np.moveaxis(np.array(image), 0, -1))

    _, h, w = image.shape

    if landmarks is not None and len(landmarks) > 0:
        x = landmarks[:, 0] * w
        y = landmarks[:, 1] * h
        plt.scatter(x, y)

    if circles is not None and len(circles) > 0:
        for circle in circles:
            xc, yc, r = circle
            plt.gca().add_patch(plt.Circle((xc, yc), r, fill=False))

    if landmarks2 is not None and len(landmarks2) > 0:
        x = landmarks2[:, 0] * w
        y = landmarks2[:, 1] * h
        plt.scatter(x, y, color='red')

    if circles2 is not None and len(circles2) > 0:
        for circle in circles2:
            xc, yc, r = circle
            plt.gca().add_patch(plt.Circle((xc, yc), r, color='red', fill=False))



In [3]:
transform = A.Compose (
    [
        # A.Resize(height=300, width=400),
        A.RandomSizedCrop(min_max_height=(250, 250), height=300, width=400, p=0.5),
        # A.CenterCrop(height=200, width=200),
        A.ToGray(p=0.2),
        A.ChannelDropout(channel_drop_range=(1, 2), p=0.2),
        A.ChannelShuffle(p=0.2),
        A.HueSaturationValue(p=0.2),
        A.ImageCompression(quality_lower=60, p=0.1),
        A.Posterize(p=0.2),
        # A.RandomSunFlare(p=1),
        A.Rotate(limit=40, p=0.5, border_mode=cv2.BORDER_CONSTANT),
        A.HorizontalFlip(p=0.5),
        # A.RandomScale(p=1),
        # A.Lambda(image=lambda x: x/255, keypoint=lambda x: x/255),
        A.Normalize(mean=[0,0,0], std=[1,1,1], max_pixel_value=255),
        # ToTensorV2(),
    ],
    keypoint_params=A.KeypointParams(format='xy', remove_invisible=False)
)

In [4]:
def normalize_inner_width(inner_widths: np.array):
    return inner_widths / (400 * 0.25)

def normalize_outer_width(widths: np.array):
    return widths / (400 * 0.50)

def denormalize_inner_width(inner_widths: np.array):
    return inner_widths * (400 * 0.25)

def denormalize_outer_width(widths: np.array):
    return widths * (400 * 0.50)



In [5]:
class IrisImageDataset(Dataset):
    def __init__(self, images_dir, labels_path, transform=None):
        super(IrisImageDataset, self).__init__()
        self.data = []
        self.images_dir = images_dir
        self.labels_path = labels_path
        self.transform = transform
        self.height = 300
        self.width = 400

        with open(labels_path) as json_file:
            self.labels = json.load(json_file)

        self.image_names = sorted(list(self.labels.keys()))

    def __len__(self):
        return len(self.image_names)

    def __getitem__(self, index):
        image_name = self.image_names[index]
        label = self.labels[image_name]
        image = Image.open(f'{self.images_dir}/{image_name}.tiff')
        image = np.array(image)

        inner_landmarks = label['inner']['landmarks']
        outer_landmarks = label['outer']['landmarks']
        inner_circle = label['inner'].get('circles')
        center = [[inner_circle['xc'], inner_circle['yc']]] if inner_circle else []

        landmarks = inner_landmarks + outer_landmarks + center

        if self.transform:
            augmentations = self.transform(image=image, keypoints=landmarks)
            image = augmentations['image']
            landmarks = augmentations['keypoints']

        n_inner = len(inner_landmarks)
        n_outer = len(outer_landmarks)

        inner = np.array(landmarks[:n_inner])
        outer = np.array(landmarks[n_inner: n_inner + n_outer])
        
        labels = {
            'inner': self.normalize_landmarks(inner).tolist(),
            'outer': self.normalize_landmarks(outer).tolist(),
            'center': landmarks[-1],
            'inner_width': normalize_inner_width(self.get_width(inner)),
            'outer_width': normalize_outer_width(self.get_width(outer)),
            'name': image_name
        }

        # Covert from channels last to channels first
        image = np.moveaxis(image, -1, 0)

        return image, labels

    @staticmethod
    def get_width( landmarks: np.array) -> float:
            xs = landmarks[:, 0]
            width = float(np.max(xs) - np.min(xs))
            return width

    def normalize_landmarks(self, landmarks):
        return landmarks / [self.width, self.height]

class IrisWidthsDataset(IrisImageDataset):
    def __getitem__(self, index):
        image, labels = super().__getitem__(index)
        return image, (labels['inner_width'], labels['outer_width'])


In [6]:
data_dir = f'{BASE_DIR}/data/pupil'
images_dir = f'{data_dir}/train/images'
labels_path = f'{data_dir}/train/labels.json'

In [7]:
dataset = IrisWidthsDataset(images_dir=images_dir, labels_path=labels_path, transform=transform)

In [14]:
train_loader = DataLoader(dataset, batch_size=32, shuffle=True)  #, collate_fn=lambda x: x)

In [21]:
model = nn.Sequential(
            nn.Conv2d(3, 8, padding=1, kernel_size=3),
            nn.ReLU(),
            nn.Conv2d(8, 8, padding=1, kernel_size=3, stride=2),
            nn.ReLU(),

            nn.Conv2d(8, 16, padding=1, kernel_size=3),
            nn.ReLU(),
            nn.Conv2d(16, 16, padding=1, kernel_size=3, stride=2),
            nn.ReLU(),

            nn.Conv2d(16, 32, padding=1, kernel_size=3),
            nn.ReLU(),
            nn.Conv2d(32, 32, padding=1, kernel_size=3, stride=2),
            nn.ReLU(),
            # 38, 50

            nn.Conv2d(32, 64, padding=1, kernel_size=3),
            nn.ReLU(),
            nn.Conv2d(64, 64, padding=1, kernel_size=3, stride=2),
            nn.ReLU(),
            # 19, 25

            nn.Conv2d(64, 128, padding=1, kernel_size=3),
            nn.ReLU(),
            nn.Conv2d(128, 128, padding=1, kernel_size=3, stride=2),
            nn.ReLU(),
            # 10, 13

            nn.Conv2d(128, 256, padding=1, kernel_size=3),
            nn.ReLU(),
            nn.Conv2d(256, 256, padding=1, kernel_size=3, stride=2),
            nn.ReLU(),
            # 5, 7

            nn.Conv2d(256, 512, padding=1, kernel_size=3),
            nn.ReLU(),
            nn.Conv2d(512, 64, padding=1, kernel_size=3, stride=2),
            nn.ReLU(),
            # 3, 4

            nn.Flatten(),

            nn.Linear(768, 512),
            nn.ReLU(),
            nn.Dropout(0.3),

            nn.Linear(512, 2)
        )

optim = torch.optim.Adam(model.parameters(), lr=1e-5)

In [None]:
epochs = 1

for epoch in range(epochs):
    for images, labels in train_loader:
        labels = torch.stack(labels).T
        yh = model(images)
        loss = torch.sum(torch.abs(yh - labels))

        optim.zero_grad()
        loss.backward()
        optim.step()

        print(loss.item())


10.926250858761184
9.955546074657986
12.511601254195938
10.654044902385301
13.07051544098223
7.911095046309355
11.286894446209637
10.17953937595911
11.287046178105559
14.160103127216622
13.029746209035304
9.756012420240307
14.034630073652224
11.104508263907565
11.586050183317234
11.877145105453973
9.59671926446672
9.177974738467565
9.068739701975332
12.232161608804683
14.057092585925794
12.13858868934307
11.13666127408969
12.344181762311019
9.315906461620706
11.372291973690984
10.319932505354705
12.561710843132232
11.913772261283722
10.02459680555182
11.697000893516067
9.591020478411924
12.442567323176835
10.431284680294501
10.85898361426171
12.22146493438548
14.245477486465711
13.675035960227383
13.518501519634231
9.585601319897025
12.228581521406644
9.016443899145116
10.397096060928195
13.659704481625845
9.265039976673282
12.317004822894713
9.800954852232847
14.881470438707474
10.900619222573585
9.750647918516105


In [None]:
anomalies = [
    ''
]
