In [None]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from datetime import datetime
from tqdm import tqdm
from sklearn.metrics import roc_auc_score
import efficientnet_pytorch
import gc

In [None]:
# load CSV's
df_train = pd.read_csv('/home/malmason/datasets/siim-isic-melanoma-classification/train.csv')
df_test = pd.read_csv('/home/malmason/datasets/siim-isic-melanoma-classification/test.csv')

df_train_ham = pd.read_csv('/home/malmason/datasets/siim-isic-melanoma-classification/ham_10000_mel_isic_add.csv')

In [None]:
df_train  = df_train.append(df_train_ham)

In [None]:
df_train = df_train.append(df_train.loc[df_train['target'] == 1])
df_train = df_train.append(df_train.loc[df_train['target'] == 1])

In [None]:
df_train['sex'] = df_train['sex'].fillna('na')
df_train['age_approx'] = df_train['age_approx'].fillna(0)
df_train['anatom_site_general_challenge'] = df_train['anatom_site_general_challenge'].fillna('na')

df_test['sex'] = df_test['sex'].fillna('na')
df_test['age_approx'] = df_test['age_approx'].fillna(0)
df_test['anatom_site_general_challenge'] = df_test['anatom_site_general_challenge'].fillna('na')

In [None]:
print(len(df_train), len(df_train.loc[df_train['target'] == 0]), len(df_train.loc[df_train['target'] == 1]))

In [None]:
df_train = df_train.sample(frac=1).reset_index(drop=True)

In [None]:
X_img = df_train['image_name']

In [None]:
X_csv = df_train.drop(['target', 'image_name'], axis=1).values
y_csv = df_train['target'].values

In [None]:
print(X_csv.shape, y_csv.shape)

In [None]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        
        self.base_model = efficientnet_pytorch.EfficientNet.from_pretrained('efficientnet-b4')
        self.base_model._fc = nn.Linear(1792, 32)
        self.last_layer = nn.Linear(32,2)
        self.soft = nn.Softmax(dim=1)
        
    def forward(self, x):
        x = self.base_model(x)
        x = self.last_layer(x)
        x = self.soft(x)
        
        return x

In [None]:
model = CNN()

In [None]:
for param in model.base_model.parameters(): param.requires_grad = True

In [None]:
model.base_model._fc.weight.requires_grad = True

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

model.to(device)

In [None]:
X_image = []
for image_get in X_img:
    img = cv2.imread(img_train_folder + '{}.jpg'.format(image_get))
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    X_image.append(img)

In [None]:
Y = np.array(y_csv)

In [None]:
preprocess = transforms.Compose([
    transforms.ToPILImage(),
    transforms.RandomVerticalFlip(p=0.5),
    transforms.RandomRotation(90),
    transforms.RandomResizedCrop(380, scale=(0.8, 1.0)),
    transforms.ToTensor(),
    transforms.Normalize((0.485,0.456,0.406), (0.229,0.224,0.225)),
    transforms.RandomErasing(p=0.2, scale=(0.02, 0.33)),
    transforms.ToPILImage()

])

In [None]:
batch_size = 4
n_epochs = 20
train_losses = []
val_losses = []

In [None]:
def transform_images(X_train_image, X_val_image, Y_train, Y_val):
    # Train images
    train_images = []
    
    gc.collect()
        
    for image_get in X_train_image:
        image_trans = preprocess(image_get)
        image_trans = np.array(image_trans)
        train_images.append(image_trans)         

    train_images = np.array(train_images).astype(np.float32)
  
    X_train_image_t = np.transpose(train_images, (0,3,1,2))
    input_train = torch.from_numpy(X_train_image_t)
    
    X_train_image_t = [] 

    target_train = torch.from_numpy(Y_train).reshape(-1,1).long()

    # Val Images
    val_images = []
    
    gc.collect()
    
    for image_get in X_val_image:
        image_trans = preprocess(image_get)
        image_trans = np.array(image_trans)
        val_images.append(image_trans)

            
    val_images = np.array(val_images).astype(np.float32)
 
    X_val_image_t = np.transpose(val_images, (0,3,1,2))
    input_val = torch.from_numpy(X_val_image_t)

    X_val_image_t = []
    
    target_val = torch.from_numpy(Y_val).reshape(-1,1).long()
    
    training_set = torch.utils.data.TensorDataset(input_train,  target_train)
    train_loader = torch.utils.data.DataLoader(dataset=training_set, 
                                               batch_size=batch_size,
                                               num_workers=4,
                                               shuffle=True)
    val_set = torch.utils.data.TensorDataset(input_val, target_val)
    val_loader = torch.utils.data.DataLoader(dataset=val_set, 
                                               batch_size=batch_size,
                                             num_workers=4,
                                             shuffle=False)
    
    return train_loader, val_loader

In [None]:
nSamples = [len(df_train.loc[df_train['target'] == 0]), len(df_train.loc[df_train['target'] == 1])]
normedWeights = [1 - (x / sum(nSamples)) for x in nSamples]

In [None]:
class_weights = torch.FloatTensor(normedWeights).cuda()

In [None]:
criterion = nn.CrossEntropyLoss(weight = class_weights)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [None]:
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.5)

In [None]:
def binary_acc(output_pred, target):

    correct_results_sum = (torch.argmax(output_pred, axis=1) == target).sum().float()
    acc = correct_results_sum/target.shape[0] 
    
    return acc

In [None]:
X_split = (X_img.shape[0]/6)
XF = [0, round(X_split), round(X_split*2), round(X_split*3), round(X_split*4), round(X_split*5), round(X_split*6)]

In [None]:
def batch_gd(model, criterion, optimizer, X_image, Y, n_epochs, XF):
    train_losses = np.zeros(n_epochs)
    val_losses = np.zeros(n_epochs)
    train_accuracy = np.zeros(n_epochs)
    val_accuracy = np.zeros(n_epochs)
        
    for it in range (n_epochs):
        t0 = datetime.now()
        
        for count in range(6):

            train_loss = []
            train_acc = []
            
            if count == 0:
                X_train_image, Y_train = X_image[:XF[5]],                    Y[:XF[5]]
                X_val_image,   Y_val   = X_image[XF[5]:],                    Y[XF[5]:]
            elif count == 1:
                X_train_image, Y_train = X_image[:XF[4]] + X_image[XF[5]:],  np.concatenate((Y[:XF[4]], Y[XF[5]:]),axis=0)
                X_val_image,   Y_val   = X_image[XF[4]:XF[5]],               Y[XF[4]:XF[5]]
            elif count == 2:
                X_train_image, Y_train = X_image[:XF[3]] + X_image[XF[4]:],  np.concatenate((Y[:XF[3]], Y[XF[4]:]),axis=0)
                X_val_image,   Y_val   = X_image[XF[3]:XF[4]],               Y[XF[3]:XF[4]]
            elif count == 3:
                X_train_image, Y_train = X_image[:XF[2]] + X_image[XF[3]:],  np.concatenate((Y[:XF[2]], Y[XF[3]:]),axis=0)
                X_val_image,   Y_val   = X_image[XF[2]:XF[3]],               Y[XF[2]:XF[3]]
            elif count == 4:
                X_train_image, Y_train = X_image[:XF[1]] +  X_image[XF[2]:], np.concatenate((Y[:XF[1]], Y[XF[2]:]),axis=0)
                X_val_image,   Y_val   = X_image[XF[1]:XF[2]],               Y[XF[1]:XF[2]]
            else:
                X_train_image, Y_train = X_image[XF[1]:],                    Y[XF[1]:]
                X_val_image,   Y_val   = X_image[:XF[1]],                    Y[:XF[1]]

            train_loader, val_loader = transform_images(X_train_image, X_val_image, Y_train, Y_val)

            for inputs, targets in tqdm(train_loader):
                inputs, targets = inputs.to(device), targets.to(device)

                optimizer.zero_grad()
                outputs = model(inputs)
                
                targets = targets.squeeze_()

                loss = criterion(outputs, targets)
                acc = binary_acc(outputs, targets)

                loss.backward()
                optimizer.step()

                train_loss.append(loss.item())
                train_acc.append(acc.item())

            train_loss = np.mean(train_loss)
            train_acc = np.mean(train_acc)

            val_loss = []
            val_acc = []
            val_auc = []
            targets_auc = []
            outputs_auc = []
            outputs_auc_temp = []
            auc_val_fold = []
            
            for inputs, targets in tqdm(val_loader):
                inputs, targets = inputs.to(device),  targets.to(device)
                outputs = model(inputs)
                
                targets = targets.squeeze_()

                loss = criterion(outputs, targets)
                acc = binary_acc(outputs, targets)
                
                targets_auc.append(targets.detach().cpu().numpy())
                outputs_auc_temp = outputs.detach().cpu().numpy()
                outputs_auc_temp = np.squeeze(outputs_auc_temp[:,1:2])
                outputs_auc.append(outputs_auc_temp)

                val_loss.append(loss.item())
                val_acc.append(acc.item())

            val_loss = np.mean(val_loss)
            val_acc = np.mean(val_acc)
            targets_auc = np.array(targets_auc, dtype = object)
            outputs_auc = np.array(outputs_auc, dtype = object)
            targets_auc = np.hstack(targets_auc)
            outputs_auc = np.hstack(outputs_auc)
            #print(targets_auc, np.round(outputs_auc))
            auc_val_fold.append(roc_auc_score(targets_auc, outputs_auc))
        
        auc_val = np.mean(auc_val_fold)
            
        train_losses[it] = train_loss
        val_losses[it] = val_loss
        train_accuracy[it] = train_acc
        val_accuracy[it] = val_acc

        scheduler.step()
        dt = datetime.now() -t0
        
        torch.save(model.state_dict(), '/home/malmason/datasets/siim-isic-melanoma-classification/lr0-001 gamma0-05 300 rgb B4-1/skin_train_concat_rgb_eff_net_b4_train_all_model_all_preproc' + str(it) + '.pt')

        print(f'Epoch {it+1}/{n_epochs}, Time: {dt}, Train Loss: {train_loss:.4f}, Train_acc: {train_acc}, Val Loss: {val_loss:.4f}, Val acc: {val_acc}, Val AUC: {auc_val}')
    
    return train_losses, val_losses, train_accuracy, val_accuracy

In [None]:
train_losses, val_losses, train_accuracy, val_accuracy = batch_gd(
    model, criterion, optimizer, X_image, Y, n_epochs, XF)

In [None]:
plt.plot(train_losses, label='train loss')
plt.plot(val_losses, label='val loss')
plt.legend()
plt.show()

In [None]:
plt.plot(train_accuracy, label='train accuracy')
plt.plot(val_accuracy, label='val accuracy')
plt.legend()
plt.show()

## Test upload

In [None]:
df_test_csv = df_test
X_test_img = df_test_csv['image_name']

In [None]:
test_submission = pd.DataFrame({'image_name':X_test_img})

In [None]:
model.load_state_dict(torch.load('/home/malmason/datasets/siim-isic-melanoma-classification/lr0-001 gamma0-05 300 rgb B4/skin_train_concat_rgb_eff_net_b4_train_all_model_all_preproc16.pt'))

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

model.to(device)

In [None]:
X_test_image = []
for image_get in X_test_img:
    img_test = cv2.imread(img_test_folder + '{}.jpg'.format(image_get))
    img_test = cv2.cvtColor(img_test, cv2.COLOR_BGR2RGB)
    X_test_image.append(img_test)

In [None]:
preprocess_val = transforms.Compose([
    transforms.ToPILImage(),
    transforms.RandomVerticalFlip(p=0.5),
    transforms.RandomRotation(90),
    transforms.RandomResizedCrop(380, scale=(0.8, 1.0)),
    transforms.ToTensor(),
    transforms.Normalize((0.485,0.456,0.406), (0.229,0.224,0.225)),
    transforms.ToPILImage()

])

In [None]:
model.eval()
Y_dummy = np.zeros(len(X_test_image)).astype(np.float32)
print(Y_dummy.shape)
Y_dummy = torch.from_numpy(Y_dummy).reshape(-1,1).long()

for count in range(6):
    outputs_auc = []
    test_images = []
    outputs_auc_temp = []

    for image_get in tqdm(X_test_image):
        image_trans = preprocess_val(image_get)
        image_trans = np.array(image_trans)
        test_images.append(image_trans)

    test_images = np.asarray(test_images).astype(np.float32)

    X_test_image_t = np.transpose(test_images, (0,3,1,2))

    input_test = torch.from_numpy(X_test_image_t)

    test_set = torch.utils.data.TensorDataset(input_test, Y_dummy)
    test_loader = torch.utils.data.DataLoader(dataset=test_set, batch_size=4,num_workers=0,shuffle=False)

    outputs_auc = []
    outputs_auc_temp = []

    with torch.no_grad():    
        for inputs, targets in tqdm(test_loader):
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            outputs_auc_temp = outputs.detach().cpu().numpy()
            outputs_auc_temp = np.squeeze(outputs_auc_temp[:,1:2])
            outputs_auc.append(outputs_auc_temp)

    outputs_auc = np.array(outputs_auc, dtype = object)
    outputs_auc = np.hstack(outputs_auc)

    test_submission[count] = outputs_auc

In [None]:
test_submission.head()

In [None]:
test_submission['target'] = test_submission.apply((lambda x: (x[0] + x[1] + x[2] + x[3] + x[4] + x[5])/6), axis=1)

In [None]:
del test_submission[0], test_submission[1],test_submission[2],test_submission[3],test_submission[4],test_submission[5]

In [None]:
test_submission[test_submission['target'] > 0.5]

In [None]:
test_submission.to_csv('/home/malmason/datasets/siim-isic-melanoma-classification/tests/B4_preds.csv')

In [None]:
targets_auc = df_test_csv['target'].values
outputs_auc = test_submission['target'].values

In [None]:
roc_auc_score(targets_auc, outputs_auc)

## Merge using probabilities

In [None]:
df_train = pd.read_csv('/home/malmason/datasets/siim-isic-melanoma-classification/train.csv')
df_test = pd.read_csv('/home/malmason/datasets/siim-isic-melanoma-classification/test.csv')

In [None]:
feat = ['sex','age_approx','anatom_site_general_challenge']
M = df_train.target.mean()
te = df_train.groupby(feat)['target'].agg(['mean','count']).reset_index()
te['ll'] = ((te['mean']*te['count'])+(M))/(te['count'])
del te['mean'], te['count']

df_test = df_test.merge( te, on=feat, how='left' )
df_test['ll'] = df_test['ll'].fillna(M)

In [None]:
pred_csv = df_test

In [None]:
del pred_csv['patient_id'], pred_csv['sex'], pred_csv['age_approx'], pred_csv['anatom_site_general_challenge']

In [None]:
test_submission_csv = pred_csv['ll']

In [None]:
test_submission = test_submission.join(test_submission_csv)

In [None]:
test_submission.rename(columns = {'target':'image_pred'}, inplace = True)

In [None]:
test_submission_comb = test_submission

In [None]:
test_submission_comb['target'] = (test_submission_comb['image_pred'] *0.9) + (test_submission_comb['ll'] *0.1)

In [None]:
del test_submission_comb['image_pred'], test_submission_comb['ll']

In [None]:
test_submission_comb.rename(columns = {'image_pred':'target'}, inplace = True)

In [None]:
test_submission_comb[test_submission['target'] > 0.5]

In [None]:
test_submission_comb.to_csv('/home/malmason/datasets/siim-isic-melanoma-classification/lr0-001 gamma0-05 300 rgb B4-1/test_submission_comb.csv')