In [1]:

import json

import albumentations as A
import cv2
import matplotlib.pyplot as plt
import numpy as np
import torch
from PIL import Image
from torch import nn
from torch.utils.data import Dataset, DataLoader

from settings import BASE_DIR

/Users/akhil/code/ml_gallery/ml_py


In [2]:
class SelfAttention(nn.Module):
    """ Self attention Layer"""
    def __init__(self,in_dim):
        super(SelfAttention,self).__init__()
        self.chanel_in = in_dim

        self.query_conv = nn.Conv2d(in_channels = in_dim , out_channels = in_dim//8 , kernel_size= 1)
        self.key_conv = nn.Conv2d(in_channels = in_dim , out_channels = in_dim//8 , kernel_size= 1)
        self.value_conv = nn.Conv2d(in_channels = in_dim , out_channels = in_dim , kernel_size= 1)
        self.gamma = nn.Parameter(torch.zeros(1))

        self.softmax  = nn.Softmax(dim=-1) #
    def forward(self,x):
        """
            inputs :
                x : input feature maps( B X C X W X H)
            returns :
                out : self attention value + input feature
                attention: B X N X N (N is Width*Height)
        """
        m_batchsize,C,width ,height = x.size()
        proj_query  = self.query_conv(x).view(m_batchsize,-1,width*height).permute(0,2,1) # B X CX(N)
        proj_key =  self.key_conv(x).view(m_batchsize,-1,width*height) # B X C x (*W*H)
        energy =  torch.bmm(proj_query,proj_key) # transpose check
        attention = self.softmax(energy) # BX (N) X (N)
        proj_value = self.value_conv(x).view(m_batchsize,-1,width*height) # B X C X N

        out = torch.bmm(proj_value,attention.permute(0,2,1) )
        out = out.view(m_batchsize,C,width,height)

        out = self.gamma*out + x
        return out

In [3]:

def plot_img(image, landmarks=None, circles=None, circles2=None, landmarks2=None):
    """
    image: np.array of shape (c, h, w)
    landmarks: np.array of shape (n, 2)
    circles: np.array of shape (n, 3)
    """
    plt.imshow(np.moveaxis(np.array(image), 0, -1))

    _, h, w = image.shape

    if landmarks is not None and len(landmarks) > 0:
        x = landmarks[:, 0] * w
        y = landmarks[:, 1] * h
        plt.scatter(x, y)

    if circles is not None and len(circles) > 0:
        for circle in circles:
            xc, yc, r = circle
            plt.gca().add_patch(plt.Circle((xc, yc), r, fill=False))

    if landmarks2 is not None and len(landmarks2) > 0:
        x = landmarks2[:, 0] * w
        y = landmarks2[:, 1] * h
        plt.scatter(x, y, color='red')

    if circles2 is not None and len(circles2) > 0:
        for circle in circles2:
            xc, yc, r = circle
            plt.gca().add_patch(plt.Circle((xc, yc), r, color='red', fill=False))



In [4]:
transform = A.Compose (
    [
        # A.Resize(height=300, width=400),
        A.RandomSizedCrop(min_max_height=(250, 250), height=300, width=400, p=0.5),
        # A.CenterCrop(height=200, width=200),
        A.ToGray(p=0.2),
        A.ChannelDropout(channel_drop_range=(1, 2), p=0.2),
        A.ChannelShuffle(p=0.2),
        A.HueSaturationValue(p=0.2),
        A.ImageCompression(quality_lower=60, p=0.1),
        A.Posterize(p=0.2),
        # A.RandomSunFlare(p=1),
        A.Rotate(limit=40, p=0.5, border_mode=cv2.BORDER_CONSTANT),
        A.HorizontalFlip(p=0.5),
        # A.RandomScale(p=1),
        # A.Lambda(image=lambda x: x/255, keypoint=lambda x: x/255),
        A.Normalize(mean=[0,0,0], std=[1,1,1], max_pixel_value=255),
        # ToTensorV2(),
    ],
    keypoint_params=A.KeypointParams(format='xy', remove_invisible=False)
)

In [5]:
def normalize_inner_width(inner_widths: np.array):
    return inner_widths / (400 * 0.25)

def normalize_outer_width(widths: np.array):
    return widths / (400 * 0.50)

def denormalize_inner_width(inner_widths: np.array):
    return inner_widths * (400 * 0.25)

def denormalize_outer_width(widths: np.array):
    return widths * (400 * 0.50)



In [6]:
class IrisImageDataset(Dataset):
    def __init__(self, images_dir, labels_path, transform=None):
        super(IrisImageDataset, self).__init__()
        self.data = []
        self.images_dir = images_dir
        self.labels_path = labels_path
        self.transform = transform
        self.height = 300
        self.width = 400

        with open(labels_path) as json_file:
            self.labels = json.load(json_file)

        self.image_names = sorted(list(self.labels.keys()))

    def __len__(self):
        return len(self.image_names)

    def __getitem__(self, index):
        image_name = self.image_names[index]
        label = self.labels[image_name]
        image = Image.open(f'{self.images_dir}/{image_name}.tiff')
        image = np.array(image)

        inner_landmarks = label['inner']['landmarks']
        outer_landmarks = label['outer']['landmarks']
        inner_circle = label['inner'].get('circles')
        center = [[inner_circle['xc'], inner_circle['yc']]] if inner_circle else []

        landmarks = inner_landmarks + outer_landmarks + center

        if self.transform:
            augmentations = self.transform(image=image, keypoints=landmarks)
            image = augmentations['image']
            landmarks = augmentations['keypoints']

        n_inner = len(inner_landmarks)
        n_outer = len(outer_landmarks)

        inner = np.array(landmarks[:n_inner])
        outer = np.array(landmarks[n_inner: n_inner + n_outer])
        
        labels = {
            'inner': self.normalize_landmarks(inner).tolist(),
            'outer': self.normalize_landmarks(outer).tolist(),
            'center': landmarks[-1],
            'inner_width': normalize_inner_width(self.get_width(inner)),
            'outer_width': normalize_outer_width(self.get_width(outer)),
            'name': image_name
        }

        # Covert from channels last to channels first
        image = np.moveaxis(image, -1, 0)

        return image, labels

    @staticmethod
    def get_width( landmarks: np.array) -> float:
            xs = landmarks[:, 0]
            width = float(np.max(xs) - np.min(xs))
            return width

    def normalize_landmarks(self, landmarks):
        return landmarks / [self.width, self.height]

class IrisWidthsDataset(IrisImageDataset):
    def __getitem__(self, index):
        image, labels = super().__getitem__(index)
        return image, (labels['inner_width'], labels['outer_width'])


In [7]:
data_dir = f'{BASE_DIR}/data/pupil'
images_dir = f'{data_dir}/train/images'
labels_path = f'{data_dir}/train/labels.json'

In [8]:
dataset = IrisWidthsDataset(images_dir=images_dir, labels_path=labels_path, transform=transform)

In [9]:
train_loader = DataLoader(dataset, batch_size=64, shuffle=True)  #, collate_fn=lambda x: x)

In [16]:
model = nn.Sequential(
            nn.Conv2d(3, 8, padding=1, kernel_size=3),
            nn.ReLU(),
            nn.Conv2d(8, 8, padding=1, kernel_size=3, stride=2),
            nn.ReLU(),

            nn.Conv2d(8, 16, padding=1, kernel_size=3),
            nn.ReLU(),
            nn.Conv2d(16, 16, padding=1, kernel_size=3, stride=2),
            nn.ReLU(),

            nn.Conv2d(16, 32, padding=1, kernel_size=3),
            nn.ReLU(),
            nn.Conv2d(32, 32, padding=1, kernel_size=3, stride=2),
            nn.ReLU(),
            # 38, 50

            nn.Conv2d(32, 64, padding=1, kernel_size=3),
            nn.ReLU(),
            nn.Conv2d(64, 64, padding=1, kernel_size=3, stride=2),
            nn.ReLU(),
            # 19, 25

            nn.Conv2d(64, 128, padding=1, kernel_size=3),
            nn.ReLU(),
            nn.Conv2d(128, 128, padding=1, kernel_size=3, stride=2),
            nn.ReLU(),
            # 10, 13

            SelfAttention(128),

            # nn.Conv2d(128, 256, padding=1, kernel_size=3),
            # nn.ReLU(),
            # nn.Conv2d(256, 256, padding=1, kernel_size=3, stride=2),
            # nn.ReLU(),
            # # 5, 7
            #
            # nn.Conv2d(256, 512, padding=1, kernel_size=3),
            # nn.ReLU(),
            # nn.Conv2d(512, 64, padding=1, kernel_size=3, stride=2),
            # nn.ReLU(),
            # # 3, 4

            nn.Flatten(),

            nn.Linear(128 * 10 * 13, 512),
            nn.ReLU(),
            nn.Dropout(0.3),

            nn.Linear(512, 2)
        )

In [24]:
optim = torch.optim.Adam(model.parameters(), lr=1e-5)

In [25]:
epochs = 1

for epoch in range(epochs):
    for images, labels in train_loader:
        labels = torch.stack(labels).T.float()
        yh = model(images.float())
        # loss = torch.nn.functional.l1_loss(yh, labels)
        loss = torch.nn.functional.mse_loss(yh, labels)

        optim.zero_grad()
        loss.backward()
        optim.step()

        print(loss.item())
        print(' ', yh[0][0].item(), labels[0][0].item())


1930.1240234375
  35.689876556396484 20.0
1357.2366943359375
  36.61941909790039 32.0
1595.23486328125
  33.84495544433594 65.5999984741211
1948.83642578125
  40.5289192199707 21.0
1790.56787109375
  39.96446990966797 30.123605728149414
1486.63232421875
  43.38674545288086 22.0
1545.799072265625
  47.88535690307617 84.375732421875
2098.64501953125
  44.93120574951172 38.29927444458008
1742.48388671875
  39.9792594909668 67.79517364501953
1093.671875
  52.542030334472656 30.144432067871094
1909.7720947265625
  32.50450134277344 24.0
1439.74658203125
  32.930877685546875 56.0
2015.6708984375
  33.28092956542969 41.599998474121094
1285.7032470703125
  42.20025634765625 42.52890396118164
1511.5137939453125
  40.94729232788086 23.0
1744.6983642578125
  34.44169616699219 62.57959747314453
1909.86767578125
  31.84551239013672 50.464820861816406
1383.80419921875
  35.61973190307617 18.0
1517.3529052734375
  38.111427307128906 26.0
1573.6337890625
  34.9378776550293 78.38623046875
1551.58129882

In [12]:
anomalies = [
    ''
]
