# Stepped classifier
Since the classes have different demographic frequency, attempt to classify one vs all, in the order of most common to least common.

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from typing import List
import cv2
import sklearn
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from functools import partial
import pickle as pk

from tensorflow import keras
from keras.models import Sequential, load_model
from keras.layers import Input, Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import RandomFlip, RandomRotation
from tensorflow.keras.layers import RandomContrast, RandomBrightness
from keras.callbacks import EarlyStopping

from cascid.configs.config import DATA_DIR

IMAGE_SIZE = (256,256,3)
RANDOM_STATE = 42
METRICS = ['loss', 'acc', 'auc']

EXPERIMENT_DIR = DATA_DIR / 'experiments'
MODEL_PATH = DATA_DIR / 'dip' / 'model_resnet34_isic_noreg_aug_raw'
MODEL_PATH.mkdir(exist_ok=True, parents=True)

2022-10-20 09:12:33.347312: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-10-20 09:12:33.508839: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /home/cedip/.local/lib/python3.8/site-packages/cv2/../../lib64:/usr/lib/cuda/include:/usr/lib/cuda/lib64:
2022-10-20 09:12:33.508855: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2022-10-20 09:12:33.535881: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory

## Dataset

In [2]:
from cascid.datasets.isic import database

x_train, x_test, y_train, y_test = database.get_train_test_images_raw()
categories = set(y_train.flatten().tolist())

# OHE = OneHotEncoder(sparse=False)
# y_train=np.array(list(map(lambda x: "Cancer" if x in ['basal cell carcinoma', 'melanoma', 'squamous cell carcinoma'] else "Not", y_train))).reshape(-1,1)
# y_test=np.array(list(map(lambda x: "Cancer" if x in ['basal cell carcinoma', 'melanoma', 'squamous cell carcinoma'] else "Not", y_test))).reshape(-1,1)
# y_train = OHE.fit_transform(y_train)
# y_test = OHE.transform(y_test)

print("x_train shape: {0}".format(x_train.shape))
print("x_test shape: {0}".format(x_test.shape))
print("y_train shape: {0}".format(y_train.shape))
print("y_test shape: {0}".format(y_test.shape))

x_train shape: (7265, 256, 256, 3)
x_test shape: (1817, 256, 256, 3)
y_train shape: (7265, 1)
y_test shape: (1817, 1)


In [20]:
sample_size = 5
indexes = []
categories_dict = dict()
for c in categories:
    i = 0
    categories_dict[c] = []
    while len(categories_dict[c]) < sample_size:
        if y_train[i][0] == c:
            categories_dict[c].append(y_train[i][0])
            indexes.append(i)
        i+=1

In [21]:
x_train_small = x_train[indexes]
y_train_small = y_train[indexes]

In [22]:
print("x_train_small shape: {0}".format(x_train_small.shape))
print("y_train_small shape: {0}".format(y_train_small.shape))

x_train_small shape: (30, 256, 256, 3)
y_train_small shape: (30, 1)


In [23]:
y_train_small[np.random.randint(0, len(y_train_small)-1, 10)] # 10 samples of y

array([['squamous cell carcinoma'],
       ['nevus'],
       ['nevus'],
       ['seborrheic keratosis'],
       ['nevus'],
       ['seborrheic keratosis'],
       ['seborrheic keratosis'],
       ['actinic keratosis'],
       ['basal cell carcinoma'],
       ['nevus']], dtype=object)

## Model

In [17]:
def ResNet(amt_64, amt_128, amt_256, amt_512, augmentation = False):
    # Aurelien Geron, Hands-On Machine Learning with Scikit-Learn, Keras & Tensorflow.
    DefaultConv2D = partial(keras.layers.Conv2D, kernel_size=3, strides=1, padding="SAME", use_bias=False )# , kernel_regularizer=keras.regularizers.l1(l1=0.001)) 

    class ResidualUnit(keras.layers.Layer):
        def __init__(self, filters, strides=1, activation="relu", **kwargs):
            super().__init__(**kwargs)
            self.activation = keras.activations.get(activation)
            self.main_layers = [
                DefaultConv2D(filters, strides=strides), 
                keras.layers.BatchNormalization(),
                self.activation,
                DefaultConv2D(filters),
                keras.layers.BatchNormalization(),
                #keras.layers.SpatialDropout2D(0.2)
            ]
            self.skip_layers = []
            if strides > 1:
                self.skip_layers = [
                    DefaultConv2D(filters, kernel_size=1, strides=strides),
                    keras.layers.BatchNormalization()
                ]
        def call(self, inputs):
            Z = inputs
            for layer in self.main_layers:
                Z = layer(Z)
            skip_Z = inputs
            for layer in self.skip_layers:
                skip_Z = layer(skip_Z)
            return self.activation(Z + skip_Z)

    model = keras.models.Sequential()
    model.add(Input(shape=IMAGE_SIZE))
    model.add(keras.layers.Rescaling(scale=1./255))
    if augmentation:
        model.add(RandomBrightness(factor=(-0.2, 0.2), value_range=(0.0, 1.0), seed=RANDOM_STATE)) # Randomly change brightness anywhere from -30% to +30%
        model.add(RandomContrast(factor=0.5, seed=RANDOM_STATE)) # Randomly change contrast anywhere from -30% to +30%
        model.add(RandomFlip(mode="horizontal_and_vertical", seed=RANDOM_STATE)), # Randomly flip images either horizontally, vertically or both
        model.add(RandomRotation(factor=(-0.2, 0.2), fill_mode="nearest", interpolation="bilinear", seed=RANDOM_STATE)) # Randomly rotate anywhere from -30% * 2PI to +30% * 2PI, filling gaps by using 'nearest' strategy)
    model.add(DefaultConv2D(64, kernel_size=7, strides=2))
    model.add(keras.layers.BatchNormalization())
    model.add(keras.layers.Activation("relu"))
    model.add(keras.layers.MaxPool2D(pool_size=3, strides=2, padding="SAME"))
    prev_filters = 64
    for filters in [64] * amt_64 + [128] * amt_128 + [256] * amt_256 + [512] * amt_512:
        strides = 1 if filters == prev_filters else 2
        model.add(ResidualUnit(filters, strides=strides))
        prev_filters = filters
    model.add(keras.layers.SpatialDropout2D(0.2))
    model.add(keras.layers.GlobalAvgPool2D())
    model.add(keras.layers.Flatten())
    model.add(keras.layers.Dense(2, activation="softmax"))
    return model
def dump_results(model, history, path):
    model.save(path)

    with open(path / "history.pkl", "wb") as fl:
        pk.dump(history, fl)

def load_results(path):
    model= load_model(path)

    with open(path / "history.pkl", "rb") as fl:
        history = pk.load(fl)
    
    return model, history

In [19]:
# 'resnet18': (2, 2, 2, 2)
# 'resnet34': (3, 4, 6, 3)

# model = ResNet(3,4,6,3, augmentation=True)

class SteppingModel():
    '''
    For each step,
        Predict this step's class against rest
        if prediction is positive
            return predicted class,
        else
            pass to next step model 
    '''
    def __init__(self, steps: List[str]) -> None:
        self.steps = steps
        pass

    def _encode(self, y_train, target: str, target_name: str = None):
        if target_name is None:
            target_name = target
        s = pd.Series(y_train.flatten())
        s = s.apply(lambda x: target_name if x == target else "Not")
        return s.to_numpy().reshape(-1,1)

    def _drop(self, y_train, target: str):
        s = pd.Series(y_train.flatten())
        s = s[s!=target]
        return s.to_numpy().reshape(-1,1)

    def fit(self, x_train, y_train):
        '''
        Make one model for each step, predicting against all others (except maybe previous steps?).
        fit them using functions above to trim and adjust data.
        Store each model in an OrderedDict (collections), so as to maintain order of steps, while still reatining information about their classification goals. 
        '''
        pass
