
# Skin Issues

## Loading data

In [None]:
import glob
from PIL import Image
import numpy as np
import pandas
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from random import randint
from skimage.transform import resize
from sklearn.manifold import TSNE
from sklearn.metrics import balanced_accuracy_score
from sklearn.model_selection import StratifiedShuffleSplit
from pytorch_lightning import loggers as pl_loggers
from pytorch_lightning.callbacks import LearningRateMonitor
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
from sklearn.metrics import confusion_matrix , classification_report
import seaborn as sns
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import torch
import torchvision.models as models
import sys  
sys.path.insert(0, './code')
from util import AddGaussianNoise
import pytorch_lightning as pl
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models
from torch.utils.tensorboard import SummaryWriter
from efficientnet_pytorch import EfficientNet
from util import AddGaussianNoise
from sklearn.utils import shuffle
from center_loss import CenterLoss
from pytorch_lightning.callbacks import ModelCheckpoint

In [None]:
# Source code credit for this function: https://gist.github.com/shaypal5/94c53d765083101efc0240d776a23823
def print_confusion_matrix(confusion_matrix, class_names, figsize = (10,7), fontsize=14, name = "noName"):
    """Prints a confusion matrix, as returned by sklearn.metrics.confusion_matrix, as a heatmap.
    
    Arguments
    ---------
    confusion_matrix: numpy.ndarray
        The numpy.ndarray object returned from a call to sklearn.metrics.confusion_matrix. 
        Similarly constructed ndarrays can also be used.
    class_names: list
        An ordered list of class names, in the order they index the given confusion matrix.
    figsize: tuple
        A 2-long tuple, the first value determining the horizontal size of the ouputted figure,
        the second determining the vertical size. Defaults to (10,7).
    fontsize: int
        Font size for axes labels. Defaults to 14.
        
    Returns
    -------
    matplotlib.figure.Figure
        The resulting confusion matrix figure
    """
    df_cm = pandas.DataFrame(
        confusion_matrix, index=class_names, columns=class_names, 
    )
    fig = plt.figure(figsize=figsize)
    try:
        heatmap = sns.heatmap(df_cm, annot=True, fmt="d")
    except ValueError:
        raise ValueError("Confusion matrix values must be integers.")
    heatmap.yaxis.set_ticklabels(heatmap.yaxis.get_ticklabels(), rotation=0, ha='right', fontsize=fontsize)
    heatmap.xaxis.set_ticklabels(heatmap.xaxis.get_ticklabels(), rotation=45, ha='right', fontsize=fontsize)
    plt.ylabel('Truth')
    plt.xlabel('Prediction')
    plt.show()

In [None]:
def testAccuracy(model):
    PCAVector = []
    truth = []
    correctPred = 0
    model.to(device)
    
    for i in range(len(datasetTest)):
        output = model(datasetTest[i][0].unsqueeze(0).to(device))
        output = np.array(output.detach().to('cpu'))
        if datasetTest[i][1] == np.argmax(output[0]):
            correctPred += 1
        PCAVector.append(np.array(activation['avgpool'].to('cpu')).reshape(-1))
        truth.append(datasetTest[i][1])
        print("{:.2f} % ({:d} su {:d}) acc = {:.2f}".format(100*i/len(datasetTest), i, len(datasetTest), 100 * correctPred / (i + 1)), end="\r")
    print("Accuracy of prediction ("+ model.name+ ") "+str(correctPred/len(datasetTest)))
    
    tsne = TSNE(n_components=2)
    PCAtoplot = tsne.fit_transform(np.array(PCAVector))
    PCAtoplot = np.append(PCAtoplot, np.array(truth).reshape(-1, 1), axis=1)
    
    fig = plt.figure(figsize=(10, 7))
    firstLabel = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
    
    for x, y, color in PCAtoplot:
        color = int(color)
        if color == 0:
            if firstLabel[color] == 1:
                plt.plot(x, y, 'bo', label=labelName[color])
                firstLabel[color] = 0
            else:
                plt.plot(x, y, 'bo')
        if color == 1:
            if firstLabel[color] == 1:
                plt.plot(x, y, 'go', label=labelName[color])
                firstLabel[color] = 0
            else:
                plt.plot(x, y, 'go')
        if color == 2:
            if firstLabel[color] == 1:
                plt.plot(x, y, 'ro', label=labelName[color])
                firstLabel[color] = 0
            else:
                plt.plot(x, y, 'ro')
        if color == 3:
            if firstLabel[color] == 1:
                plt.plot(x, y, 'yo', label=labelName[color])
                firstLabel[color] = 0
            else:
                plt.plot(x, y, 'yo')
        if color == 4:
            if firstLabel[color] == 1:
                plt.plot(x, y, 'kd', label=labelName[color])
                firstLabel[color] = 0
            else:
                plt.plot(x, y, 'kd')
        if color == 5:
            if firstLabel[color] == 1:
                plt.plot(x, y, 'ch', label=labelName[color])
                firstLabel[color] = 0
            else:
                plt.plot(x, y, 'ch')
        if color == 6:
            if firstLabel[color] == 1:
                plt.plot(x, y, 'm*', label=labelName[color])
                firstLabel[color] = 0
            else:
                plt.plot(x, y, 'm*')
        if color == 7:
            if firstLabel[color] == 1:
                plt.plot(x, y, 'bs', label=labelName[color])
                firstLabel[color] = 0
            else:
                plt.plot(x, y, 'bs')
            
    plt.ylabel('PC1')
    plt.xlabel('PC2')
    plt.legend(loc="upper right")
    plt.show()

In [None]:
def verifyAccuracy(model, dataloader, test = True):
    with torch.no_grad():
        model.to(device)
        predictions = []
        truth = []
        
        n_correct = 0
        n_samples = 0
        n_class_correct = [0 for i in range(8)]
        n_class_samples = [0 for i in range(8)]
        for images, labels in dataloader:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            # max returns (value ,index)
            _, predicted = torch.max(outputs, 1)
            n_samples += labels.size(0)
            n_correct += (predicted == labels).sum().item()

            for i in range(images.shape[0]):
                label = labels[i]
                pred = predicted[i]
                predictions.append(np.array(pred.to('cpu')))
                truth.append(np.array(label.to('cpu')))
                if (label == pred):
                    n_class_correct[label] += 1
                n_class_samples[label] += 1

        acc = 100.0 * n_correct / n_samples
        print(f'Accuracy of the network {model.name}: {acc} %')

        truth = np.array(truth)
        predictions = np.array(predictions)
        
        balAcc = balanced_accuracy_score(truth, predictions)
        print(f'Balanced accuracy of the network {model.name}: {balAcc} %')
        if test:
            model.testAcc = balAcc
        elif model.maxValAcc < balAcc:
            model.maxValAcc = balAcc
        
        cm = confusion_matrix(truth,predictions)
        print_confusion_matrix(cm,labelName[:8],name = model.name)
        print(classification_report(truth, predictions, target_names=labelName[:8]))

        for i in range(8):
            acc = 100.0 * n_class_correct[i] / n_class_samples[i]
            print(f'Accuracy of {labelName[i]}: {acc} %')

In [None]:
def reduceDataframe(dataframe, perc):
    dfTrainReduced = pandas.DataFrame(columns=labelName)
    for label in labelName:
        dfTrainReduced = pandas.concat([dataframe[dataframe[label]==1.].iloc[:round(perc*len(dataframe[dataframe[label]==1.]))], dfTrainReduced], axis = 0)

    dfTrainReduced.reset_index(drop=True, inplace = True)
    return dfTrainReduced

In [None]:
IMAGE_SIZE = 600
gpus = [0, 3]
BATCH_SIZE = 80 * len(gpus)
batches = round(640 / BATCH_SIZE)

In [None]:
batches

In [None]:
skinDataset = []
labelName = ["MEL", "NV", "BCC", "AK", "BKL", "DF", "VASC", "SCC", "UNK"]

i = 0
#Reading the labels
df = pandas.read_csv("label.csv")
df = shuffle(df, random_state = 1234)
df = df.reset_index(drop=True)

dfTrain = df[df["MEL"]==1.].iloc[:round(0.9*len(df[df["MEL"]==1.]))]

for label in ["NV", "BCC", "AK", "BKL", "DF", "VASC", "SCC", "UNK"]:
    dfTrain = pandas.concat([dfTrain, df[df[label]==1.].iloc[:round(0.9*len(df[df[label]==1.]))]])
    
dfTrain = dfTrain.reset_index(drop=True)

dfTest = pandas.concat([df,dfTrain]).drop_duplicates(keep=False)
dfVal = dfTrain.copy()
dfTrain = dfVal[dfVal["MEL"]==1.].iloc[:round(0.9*len(dfVal[dfVal["MEL"]==1.]))]

for label in ["NV", "BCC", "AK", "BKL", "DF", "VASC", "SCC", "UNK"]:
    dfTrain = pandas.concat([dfTrain, dfVal[dfVal[label]==1.].iloc[:round(0.9*len(dfVal[dfVal[label]==1.]))]])
    

dfVal = pandas.concat([dfVal,dfTrain]).drop_duplicates(keep=False)
dfTest = dfTest.reset_index(drop=True)
dfVal = dfVal.reset_index(drop=True)
dfTrain = dfTrain.reset_index(drop=True)

In [None]:
#Per verificare che il dataset sia ben bilanciato
def isBalanced(df):
    MELCount = len(df[df['MEL']==1.])
    NVCount = len(df[df['NV']==1.])
    BCCCount = len(df[df['BCC']==1.])
    AKCount = len(df[df['AK']==1.])
    BKLCount = len(df[df['BKL']==1.])
    DFCount = len(df[df['DF']==1.])
    VASCCount = len(df[df['VASC']==1.])
    SCCCount = len(df[df['SCC']==1.])
    UNKCount = len(df[df['UNK']==1.])

    print("Casi di MEL: " + str(MELCount))
    print("Casi di NV: " + str(NVCount))
    print("Casi di BCC: " + str(BCCCount))
    print("Casi di AK: " + str(AKCount))
    print("Casi di BKL: " + str(BKLCount))
    print("Casi di DF: " + str(DFCount))
    print("Casi di VASC: " + str(VASCCount))
    print("Casi di SCC: " + str(SCCCount))
    print("Casi di UNK: " + str(UNKCount))

print("Le dimensioni del dataset di training sono : "+str(dfTrain.shape[0])+" , mentre le dimensioni del dataset di test sono "+str(dfTest.shape[0]))

In [None]:
def BalanceVector(df, lowLimit = 0):
    values = []
    
    for name in labelName[:-1]:
        values.append(len(df[df[name]==1.]))
        
    values = np.array(values)
    
    values = 1 / values
    
    values = values / values.sum() 
    
    for i in range(len(values)):
        if values[i] < lowLimit:
            values[i] = lowLimit
            
    values = values / values.sum() 
    
    return values

In [None]:
def BalanceVectorCB(df, beta = 0):
    values = []
    
    for name in labelName[:-1]:
        values.append(float(len(df[df[name]==1.])))
        
    values = np.array(values)
    
    for i in range(len(values)):
        values[i] = (1 - beta)/(1 - pow(beta, values[i]))
            
    values = values / values.sum() 
    
    return values

In [None]:
isBalanced(dfTrain)
w = BalanceVectorCB(dfTrain, beta = 0.99)

In [None]:
print(w)

In [None]:
pow(0.99, 194)

In [None]:
def showImage(image, isTensor = False):
    if isTensor:
        plt.imshow(image.permute(1, 2, 0), interpolation='nearest', aspect='equal')
    else:
        plt.imshow(image, interpolation='nearest', aspect='equal')
    plt.show()
    
def showLabel(label, prediction = False):
    if prediction:
        print("(Output della rete) La malattia è: " + labelName[label])
    else:
        print("La malattia è: " + labelName[label])
    
def showExample(example, isTensor = False):
    showLabel(example[1])
    showImage(example[0], isTensor)
    
def showLatent(label):
    print(label.size)

# Create Dataloader

In [None]:
imageTransform = transforms.Compose([
        transforms.ToTensor()
    ])


#transforms.GaussianBlur(5, sigma=(0.1, 1.0))

randomTransform = transforms.Compose([
        transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.05, hue=0.05),
        #AddGaussianNoise(0., .08),
        #transforms.RandomApply(torch.nn.ModuleList([
        #    transforms.GaussianBlur(7, sigma=(0.2, 2.0))
        #]), p=0.8),
        transforms.RandomHorizontalFlip(p=0.5),
        transforms.RandomVerticalFlip(p=0.5),
        transforms.RandomAffine((-180, 180), fill=0, scale = (0.7, 1.7), shear=(-30, 30))
    ])


In [None]:
class TotalDataset(Dataset):
    def __init__(self, label, imgSize = 224, aug = False):
        self.label = label
        self.lenght = self.label.shape[0]
        self.aug = transforms.Resize((imgSize, imgSize))
        
    def __getitem__(self, index):
        pathImage = 'ISIC_2019_Training_Input/' + self.label['image'][index] + '.jpg'
        label = np.argmax(np.array(self.label.loc[index][1:], dtype = 'float32' )[:-1])
        image = imageTransform(Image.open(pathImage))
        image = self.aug(image)
        return (image, torch.tensor(label))
        
    def __len__(self):
        return self.lenght

In [None]:
datasetTrain = TotalDataset(dfTrain)
datasetVal = TotalDataset(dfVal)
datasetTest = TotalDataset(dfTest)

dataloaderTrain = DataLoader(dataset=datasetTrain, batch_size=BATCH_SIZE , shuffle=True, num_workers=8 )
dataloaderTest = DataLoader(dataset=datasetTest, batch_size=BATCH_SIZE , num_workers=8 )
dataloaderVal = DataLoader(dataset=datasetVal, batch_size=BATCH_SIZE, num_workers=8 )

In [None]:
showImage(datasetTest[0][0], isTensor= True)
showImage(randomTransform(datasetTest[0][0]), isTensor= True)

In [None]:
for i in range(10):
    example = randint(0, len(datasetTrain))
    showExample(datasetTrain[example], True)

## Models pythorch Lightning

In [None]:
def makePrediction(image, Model, label = False, latent = False, o = True):
    Model.to('cpu')
    #Predict with the NN
    output = Model(image.unsqueeze(0))
    output = np.array(output.detach())
    if o:
        showLabel(np.argmax(output), prediction = True)
    if latent:
        showLatent(output)

# BYOL

In [None]:
from PLModel import PLModel
from BYOL import BYOL

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

Models = []

In [None]:
model = PLModel('EfficientNetB0', EfficientNet.from_pretrained('efficientnet-b0', num_classes=8))
Models.append(model)

In [None]:
model = PLModel('EfficientNetB1', EfficientNet.from_pretrained('efficientnet-b1', num_classes=8))
Models.append(model)

In [None]:
model = PLModel('EfficientNetB2', EfficientNet.from_pretrained('efficientnet-b2', num_classes=8))
Models.append(model)

In [None]:
model = PLModel('EfficientNetB3', EfficientNet.from_pretrained('efficientnet-b3', num_classes=8))
Models.append(model)

In [None]:
model = PLModel('EfficientNetB5', EfficientNet.from_pretrained('efficientnet-b5', num_classes=8))
Models.append(model)

In [None]:
model = PLModel('EfficientNetB6', EfficientNet.from_pretrained('efficientnet-b6', num_classes=8))
Models.append(model)

In [None]:
model = PLModel('EfficientNetB7', EfficientNet.from_pretrained('efficientnet-b7', num_classes=8))
Models.append(model)

In [None]:
resnext = torch.hub.load('pytorch/vision:v0.9.0', 'resnext50_32x4d', pretrained=True)
num_f = resnext.fc.in_features
resnext.fc = nn.Linear(num_f, 8)
Models.append(PLModel('Resnext50', resnext))

In [None]:
resnext = torch.hub.load('pytorch/vision:v0.9.0', 'resnet152', pretrained=True)
num_f = resnext.fc.in_features
resnext.fc = nn.Linear(num_f, 8)
Models.append(PLModel('Resnext152', resnext))

## Downstream Task

In [None]:

classificationModel = []

In [None]:
optimName = 'MADGRAD'
num_epochs = 150

In [None]:

w = BalanceVector(dfTrain)
loss = nn.CrossEntropyLoss(weight = torch.tensor(w, dtype=torch.float))
lr_monitor = LearningRateMonitor(logging_interval='step')

resolutions = [224, 240, 260, 300, 456, 528, 600, 600, 600]

for model, resolution,layer in zip(Models, resolutions, layers):
    print("Train to downstream task network " + model.name+" at resolution "+ str(resolution))
    lr = 0.00025
    
    datasetTrain = TotalDataset(dfTrain, imgSize = resolution )
    datasetVal = TotalDataset(dfVal, imgSize = resolution)
    datasetTest = TotalDataset(dfTest, imgSize = resolution)
    
    BATCH_SIZE = 50
    batches = round(640 / BATCH_SIZE)
        

    dataloaderTrain = DataLoader(dataset=datasetTrain, batch_size=BATCH_SIZE , shuffle=True, num_workers=8 )
    dataloaderTest = DataLoader(dataset=datasetTest, batch_size=BATCH_SIZE , num_workers=8 )
    dataloaderVal = DataLoader(dataset=datasetVal, batch_size=BATCH_SIZE, num_workers=8 )

    newModel = PLModel(model.name, model.model, datasetTrain, datasetVal,batch_size = BATCH_SIZE,
                           loss = loss, lr = lr, optimName = optimName)
    
        
    checkpoint_callback = ModelCheckpoint(
        monitor='balValAcc',
        dirpath='skin/supervised/'+newModel.writer+'/',
        filename= '380pixel'+'-{epoch:02d}-{balValAcc:.2f}',
        save_top_k=3,
        mode='max',
    )
    tb_logger = pl_loggers.TensorBoardLogger('logs/', name =newModel.writer)

    trainer = pl.Trainer(gpus=gpus, accelerator='dp', max_epochs=num_epochs,
                            accumulate_grad_batches = batches,
                             logger=tb_logger, callbacks=[lr_monitor, checkpoint_callback]
                            )
    trainer.fit(newModel)
    newModel.recoverBestModel()
    newModel.eval()
    verifyAccuracy(newModel, dataloaderVal, test=False)
    verifyAccuracy(newModel, dataloaderTest)                                    
    classificationModel.append(newModel)

In [None]:
activation = {}
def get_activation(name):
    def hook(model, input, output):
        activation[name] = output.detach()
    return hook

for model in classificationModel:
    if model.name.find('EfficientNet') > -1:
        print(model.model._avg_pooling)
        model.model._avg_pooling.register_forward_hook(get_activation('avgpool'))
    else:
        print(model.model.avgpool)
        model.model.avgpool.register_forward_hook(get_activation('avgpool'))

In [None]:
#TSNE
for model, resolution in zip(classificationModel, resolutions):
    torch.save(model.state_dict(), "LastEfficientNetFamily" + model.name + ".cpkt")
    datasetTest = TotalDataset(dfTest, imgSize = resolution)
    dataloaderTest = DataLoader(dataset=datasetTest, batch_size=BATCH_SIZE , num_workers=8 )
    testAccuracy(model)