In [1]:
import matplotlib.pyplot as plt

import skimage.io as io
from skimage import data, color
from skimage.transform import rescale, resize, downscale_local_mean
from skimage.filters import threshold_mean, sobel
from skimage.color import rgb2gray
import numpy as np
import pandas as pd
from typing import Tuple, List, Union
import sys
sys.path.insert(0, '..')
from utils.classic_image_utils import *
from utils.eye_dataset import *
from typing import Callable, Tuple, List
from collections import defaultdict, Counter
import torchvision.transforms as transforms
from sklearn import svm

import pywt
import scipy

base_dir = "../../data"
image_dir_training = f"{base_dir}/ODIR-5K/training"
image_dir_testing = f"{base_dir}/ODIR-5K/testing"
csv_file = f'{base_dir}/ODIR-5K/data.csv'


In [2]:
def print_diagnostics_from_tensor(labels, diagnostics: torch.Tensor):
    first = True
    for i in range(0, len(labels)):
        if not first:
            print(",", end="")
        if diagnostics[i] == 1:
            print(labels[i], end="")
    print()
    
def print_img(title, image):
    plt.title(title)
    if len(image.shape) >= 3 and min(image.shape) > 1:
        plt.imshow(image)
    else:
        plt.imshow(image, cmap=plt.cm.gray)
    plt.show()

#### SVM features building for SVM classification

We choose to follow the work of (taspinar) presented on this repository aimed to use SVM as an image labeling tool.

https://github.com/taspinar/siml/blob/master/notebooks/WV4%20-%20Classification%20of%20ECG%20signals%20using%20the%20Discrete%20Wavelet%20Transform.ipynb

It consists on extracting some feature information from the image file by calculating statistics, crossing and entropy from
data generated by a wavelet haar transformation.

Here we choose also to ignore the cases in which two or more diseases were detected for the same eye (not the same patient).

Therefore, if a patient has, for example, glaucoma and cataract on the left eye, this eye will be ignored from this testing. But, if the patient has glaucoma in one eye and cataract on the other, both eyes will be taken into consideration.

convert_label() is responsible for converting the Torch tensor having a 8-pos vector, which the 0 pos is Normal, 1 pos is diabetes, 2 pos is glaucoma etc. into the same numeration as presented on Enum TargetLabel (please refer to the Readme and eye_dataset.py)

In [3]:
def feature_statistics(data):
    n5 = np.nanpercentile(data, 5)
    n25 = np.nanpercentile(data, 25)
    n75 = np.nanpercentile(data, 75)
    n95 = np.nanpercentile(data, 95)
    median = np.nanpercentile(data, 50)
    mean = np.nanmean(data)
    std = np.nanstd(data)
    var = np.nanvar(data)
    rms = np.nanmean(np.sqrt(data**2))

    return [n5, n25, n75, n95, median, mean, std, var, rms]

def feature_crossings(data):
    zero_crossing_indices = np.nonzero(np.diff(np.array(data) > 0))[0]
    no_zero_crossings = len(zero_crossing_indices)
    mean_crossing_indices = np.nonzero(np.diff(np.array(data) > np.nanmean(data)))[0]
    no_mean_crossings = len(mean_crossing_indices)
    return [no_zero_crossings, no_mean_crossings]

def feature_entropy(data):
    counter_values = Counter(data).most_common()
    probabilities = [elem[1]/len(data) for elem in counter_values]
    entropy=scipy.stats.entropy(probabilities)
    return entropy

def get_features(data):
    entropy = feature_entropy(data)
    crossings = feature_crossings(data)
    statistics = feature_statistics(data)
    return [entropy] + crossings + statistics

def convert_label(label: torch.Tensor) -> Tuple[int, bool]:
    choosen = None
    for i in range(0, len(label)):
        if label[i] == 1.0:
            if choosen != None:
                return [choosen, True]
            choosen = i  

    return [choosen, False]


def build_features(dataset: EyeImageDataset, limit_read: int  = 0):
    features = []
    labels = []

    if limit_read <= 0:
        limit_read = len(dataset)

    i = 0
    while limit_read > 0:
        feature = []
        img, label = dataset.__getitem__(i)
        i += 1
        limit_read -= 1

        img = np.array(img).ravel()
        label, multilabel = convert_label(label)
        if multilabel:
            # we'll ignore multilabel classification problems
            continue

        list_coeff = pywt.wavedec(img, 'haar')
 
        for coeff in list_coeff:
            feature += get_features(coeff)
 
        features.append(feature)
        labels.append(label)
   
    f = np.array(features)
    l = np.array(labels)

    return [f, l]


In [4]:
def evaluate(features, labels, label_names):
    result = {}
    for j in label_names:
        result[j] = {
            'total' : 0,
            'correct' : 0
        }

    for i in range(0, len(features)):    
        real_label = label_names[labels[i]]
        k = classifier.predict([features[i]])
        result[real_label]['total'] += 1
        if k == labels[i]:
            result[real_label]['correct'] += 1

    for j in label_names:
        correct = result[j]['correct']
        total = result[j]['total']
        if total == 0:
            print(f'class: {j}, no samples')
        else:
            print(f'class: {j}, prediction: [{correct} / {total}] {100*correct/total}%')


### The Training Dataset

We use the utility class EyeImageDataset() to load the training dataset based on the metadata CSV file and
the target image folder

The training images will be loaded as needed due to memory constraints.

We decide to resize our images to 224x224 and normalize them before we apply de feature extraction.

This could be changed as a tool to evaluate this classification model response

In [5]:
input_size = 224

apply_transforms = transforms.Compose([
    transforms.Resize(size=input_size),
    transforms.CenterCrop(size=input_size),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])


ds = EyeImageDataset(root=image_dir_training, data_info_csv_file=csv_file, transform=apply_transforms)

classifier = svm.SVC(class_weight='balanced')
features, labels = build_features(ds)
classifier.fit(features, labels)


### Results against the training set

Here we test our SVM against the training set, to evaluate it's ability to detect eye problems on our images

In [None]:
evaluate(features=features, labels=labels, label_names=ds.classes)

In [None]:
input_size = 224

apply_transforms = transforms.Compose([
    transforms.Resize(size=input_size),
    transforms.CenterCrop(size=input_size),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])


ds = EyeImageDataset(root=image_dir_testing, data_info_csv_file=csv_file, transform=apply_transforms)

classifier = svm.SVC(class_weight='balanced')
features, labels = build_features(ds)
classifier.fit(features, labels)

### Results against the test set

Here we test our SVM against the test set, a different set of images not used to train the model.

In [None]:
evaluate(features=features, labels=labels, label_names=ds.classes)