In [None]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
from sklearn.preprocessing import MinMaxScaler
from datetime import datetime
from tqdm import tqdm
from sklearn.metrics import roc_curve
from sklearn.metrics import roc_auc_score
from sklearn.metrics import auc
import efficientnet_pytorch
import gc

In [None]:
# load CSV's
df_train = pd.read_csv('/home/malmason/datasets/siim-isic-melanoma-classification/cleaned_csv/train_pp.csv')
df_test = pd.read_csv('/home/malmason/datasets/siim-isic-melanoma-classification/cleaned_csv/test_pp.csv')

In [None]:
# Point to image folders
img_train_folder = '/home/malmason/datasets/siim-isic-melanoma-classification/rgb300/train/'
img_test_folder = '/home/malmason/datasets/siim-isic-melanoma-classification/rgb300/test/'

In [None]:
df_train = df_train.sample(frac=1).reset_index(drop=True)

In [None]:
df_train.shape

In [None]:
# Used for looping through image columns
X_img = df_train['image_name']

In [None]:
X_csv = df_train.drop(['target', 'image_name'], axis=1).values
y_csv = df_train['target'].values

In [None]:
print(X_csv.shape, y_csv.shape)

In [None]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        
        self.base_model = efficientnet_pytorch.EfficientNet.from_pretrained('efficientnet-b3')
        self.base_model._fc = nn.Linear(1536, 16)
        
    def forward(self, x):
        x = self.base_model(x)
        
        return x

In [None]:
class ANN(nn.Module):
    def __init__(self):
        super(ANN, self).__init__()

        self.data1 = nn.Linear(15, 128)
        self.data2 = nn.Linear(128, 16)
        
    def forward(self, xd):
            
        xd = F.leaky_relu(self.data1(xd))
        xd = F.dropout(xd, p=0.3)
        xd = F.leaky_relu(self.data2(xd))
            
        return xd

In [None]:
image_model = CNN()
data_model = ANN()

In [None]:
class CONCAT(nn.Module):
    def __init__(self, image_model, data_model):
        super(CONCAT, self).__init__()

        self.data = data_model
        self.images = image_model


        self.concat  = nn.Linear(32,1)
    
    def forward(self, x, xd):

        xd = self.data(xd)
        xi = self.images(x)
        
        xc = torch.cat((xi, xd), dim=1)
        x = self.concat(xc)

        return x

In [None]:
model = CONCAT(image_model, data_model)

In [None]:
for param in image_model.base_model.parameters(): param.requires_grad = False

In [None]:
image_model.base_model._fc.weight.requires_grad = True

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

image_model.to(device)
data_model.to(device)
model.to(device)

In [None]:
X_image = []
for image_get in X_img:
    img = cv2.imread(img_train_folder + '{}.jpg'.format(image_get))

    X_image.append(img)

In [None]:
X_csv = X_csv.astype(np.float32)

In [None]:
Y = np.array(y_csv)

In [None]:
preprocess = transforms.Compose([
    transforms.ToPILImage(),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomVerticalFlip(p=0.5),
    transforms.ToTensor(),
    transforms.Normalize((0.485,0.456,0.406), (0.229,0.224,0.225)),
    transforms.ToPILImage(),
    transforms.RandomRotation(25)
])

In [None]:
batch_size = 64
n_epochs = 20
train_losses = []
val_losses = []

In [None]:
def transform_images(X_train_image, X_val_image, X_train_csv, X_val_csv, Y_train, Y_val):

    print('Train images')
    train_images = []
    
    gc.collect()
        
    for image_get in tqdm(X_train_image):
        image_trans = preprocess(image_get)
        image_trans = np.array(image_trans)
        train_images.append(image_trans)

 
    train_images = np.array(train_images).astype(np.float32)
  
    X_train_image_t = np.transpose(train_images, (0,3,1,2))
    input_train = torch.from_numpy(X_train_image_t)
    
    X_train_image_t = [] 

    input_train_data = torch.from_numpy(X_train_csv)

    target_train = torch.from_numpy(Y_train).reshape(-1,1).float()
        
    print('Val images')
    val_images = []
    
    gc.collect()
    
    for image_get in tqdm(X_val_image):
        image_trans = preprocess(image_get)
        image_trans = np.array(image_trans)
        val_images.append(image_trans)
  
    val_images = np.array(val_images).astype(np.float32)
 
    X_val_image_t = np.transpose(val_images, (0,3,1,2))
    input_val = torch.from_numpy(X_val_image_t)

    X_val_image_t = []
    
    input_val_data   = torch.from_numpy(X_val_csv)
    
    target_val = torch.from_numpy(Y_val).reshape(-1,1).float()
    
    training_set = torch.utils.data.TensorDataset(input_train, input_train_data, target_train)
    train_loader = torch.utils.data.DataLoader(dataset=training_set, 
                                               batch_size=batch_size,
                                               num_workers=4,
                                               shuffle=True)
    val_set = torch.utils.data.TensorDataset(input_val, input_val_data, target_val)
    val_loader = torch.utils.data.DataLoader(dataset=val_set, 
                                               batch_size=batch_size,
                                             num_workers=4,
                                             shuffle=False)
    
    return train_loader, val_loader

In [None]:
criterion = nn.BCEWithLogitsLoss()
#optimizer = torch.optim.Adam(model.parameters(), lr=0.00002)

optimizer = torch.optim.Adam([{"params": filter(lambda p: p.requires_grad, data_model.parameters()), 'lr' : 0.00005},
                             {"params": filter(lambda p: p.requires_grad, image_model.parameters()), 'lr' : 0.00001},
                            {"params": filter(lambda p: p.requires_grad, model.concat.parameters()), 'lr' : 0.00005, "weight_decay" : 0.9}])


In [None]:
def binary_acc(output_pred, target):
    output_pred_tag = torch.round(torch.sigmoid(output_pred))

    correct_results_sum = (output_pred_tag == target).sum().float()
    acc = correct_results_sum/target.shape[0]
    acc = torch.round(acc * 100)
    
    return acc

In [None]:
def batch_gd(model, criterion, optimizer, X_image, X_csv, Y, n_epochs):
    train_losses = np.zeros(n_epochs)
    val_losses = np.zeros(n_epochs)
    train_accuracy = np.zeros(n_epochs)
    val_accuracy = np.zeros(n_epochs)
        
    for it in range(n_epochs):
        t0 = datetime.now()
        
        for count in range(2):

            train_loss = []
            train_acc = []
            
            if count == 0:
                X_train_image = X_image[:27126]
                X_train_csv = X_csv[:27126]
                Y_train = Y[:27126]
                X_val_image = X_image[27126:]
                X_val_csv = X_csv[27126:]
                Y_val = Y[27126:]
            else:
                X_train_image = X_image[6000:]
                X_train_csv = X_csv[6000:]
                Y_train = Y[6000:]
                X_val_image = X_image[:6000]
                X_val_csv = X_csv[:6000]
                Y_val = Y[:6000]

            train_loader, val_loader = transform_images(X_train_image, X_val_image, X_train_csv, X_val_csv, Y_train, Y_val)

            print('Training')

            for inputs, inputs_data, targets in tqdm(train_loader):
                inputs, inputs_data, targets = inputs.to(device), inputs_data.to(device), targets.to(device)

                optimizer.zero_grad()
                outputs = model(inputs, inputs_data)

                loss = criterion(outputs, targets)
                acc = binary_acc(outputs, targets)

                loss.backward()
                optimizer.step()

                train_loss.append(loss.item())
                train_acc.append(acc.item())

            train_loss = np.mean(train_loss)
            train_acc = np.mean(train_acc)

            val_loss = []
            val_acc = []
            
            for inputs, inputs_data, targets in tqdm(val_loader):
                inputs, inputs_data, targets = inputs.to(device), inputs_data.to(device), targets.to(device)
                outputs = model(inputs, inputs_data)

                loss = criterion(outputs, targets)
                acc = binary_acc(outputs, targets)

                val_loss.append(loss.item())
                val_acc.append(acc.item())

            val_loss = np.mean(val_loss)
            val_acc = np.mean(val_acc)
        
        train_losses[it] = train_loss
        val_losses[it] = val_loss
        train_accuracy[it] = train_acc
        val_accuracy[it] = val_acc

        dt = datetime.now() -t0
        
        torch.save(image_model.state_dict(), '/home/malmason/datasets/siim-isic-melanoma-classification/skin_train_concat_rgb_eff_net_b3_image' + str(it) + '.pt')
        torch.save(data_model.state_dict(), '/home/malmason/datasets/siim-isic-melanoma-classification/skin_train_concat_rgb_eff_net_b3_data' + str(it) + '.pt')
        torch.save(model.state_dict(), '/home/malmason/datasets/siim-isic-melanoma-classification/skin_train_concat_rgb_eff_net_b3_model' + str(it) + '.pt')

        print(f'Epoch {it+1}/{n_epochs}, Time: {dt}, Train Loss: {train_loss:.4f}, Train_acc: {train_acc}, Val Loss: {val_loss:.4f}, Val acc: {val_acc}')
    
    return train_losses, val_losses, train_accuracy, val_accuracy

In [None]:
train_losses, val_losses, train_accuracy, val_accuracy = batch_gd(
    model, criterion, optimizer, X_image, X_csv, Y, n_epochs)

In [None]:
plt.plot(train_losses, label='train loss')
plt.plot(val_losses, label='val loss')
plt.legend()
plt.show()

In [None]:
plt.plot(train_accuracy, label='train accuracy')
plt.plot(val_accuracy, label='val accuracy')
plt.legend()
plt.show()

## Test Model

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

image_model.to(device)
data_model.to(device)
model.to(device)

In [None]:
image_model.load_state_dict(torch.load('/home/malmason/datasets/siim-isic-melanoma-classification/skin_train_concat_rgb_eff_net_b3_image7.pt'))
data_model.load_state_dict(torch.load('/home/malmason/datasets/siim-isic-melanoma-classification/skin_train_concat_rgb_eff_net_b3_data7.pt'))
model.load_state_dict(torch.load('/home/malmason/datasets/siim-isic-melanoma-classification/skin_train_concat_rgb_eff_net_b3_model7.pt'))

In [None]:
preprocess_val = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.485,0.456,0.406), (0.229,0.224,0.225)),
    transforms.ToPILImage()
])

In [None]:
def get_images(X_val_image, X_val_csv, Y_val):
    
    val_images = []
    for image_get in X_val_image:
        image_trans = preprocess_val(image_get)
        image_trans = np.array(image_trans)
        val_images.append(image_trans)
    
    val_images = np.array(val_images)
    val_images = val_images.astype(np.float32)

    X_val_image_t = np.transpose(val_images, (0,3,1,2))
    input_val = torch.from_numpy(X_val_image_t)
    
    input_val_data = torch.from_numpy(X_val_csv)
    
    target_val = torch.from_numpy(Y_val).reshape(-1,1).float()
    

    val_set = torch.utils.data.TensorDataset(input_val, input_val_data, target_val)
    val_loader = torch.utils.data.DataLoader(dataset=val_set, 
                                               batch_size=64,
                                             num_workers=2,
                                             shuffle=False)
    
    return val_loader

In [None]:
#model.eval()
n_correct_val = 0.
n_total_val = 0.
val_predictions_all = []
val_predictions_all_value = []
val_targets_all = []

X_val_image = X_image[27126:]
X_val_csv = X_csv[27126:]
Y_val = Y[27126:]

val_loader = get_images(X_val_image, X_val_csv, Y_val)

for inputs, inputs_data, targets in val_loader:
    
    val_targets_np = targets.numpy()
    val_targets_all.append(val_targets_np)
    
    inputs, inputs_data, targets = inputs.to(device), inputs_data.to(device), targets.to(device)
    val_outputs = model(inputs, inputs_data)
    val_outputs = torch.sigmoid(val_outputs)
    _, val_predictions = torch.max(val_outputs, 1)
    val_predictions_np = val_predictions.cpu().numpy()
    val_predictions_all.append(val_predictions_np)

    val_predictions_np_value = val_outputs.cpu().detach().numpy()
    val_predictions_all_value.append(val_predictions_np_value)
 
val_predictions_np_out = []
val_predictions_np_out_value = []
val_targets_np_out = []
val_count = len(val_predictions_all)

for z in range(val_count):
    
    for a in val_predictions_all[z]:
        val_predictions_np_out.append(a)

    for a in val_predictions_all_value[z]:
        val_predictions_np_out_value.append(a)
        
    for a in val_targets_all[z]:
        val_targets_np_out.append(a)

val_predictions_np_out = np.asarray(val_predictions_np_out)
val_targets_np_out = np.asarray(val_targets_np_out)

val_count = len(val_predictions_np_out)

for z in range(val_count):
    if val_predictions_np_out[z] == np.int(val_targets_np_out[z]):
        n_correct_val += 1

val_acc = n_correct_val / val_count
      
print(f"Val acc: {val_acc:.4f}")

In [None]:
fpr_val, tpr_val, _ = roc_curve(val_targets_np_out, val_predictions_np_out_value)
auc_pred_val = auc(fpr_val, tpr_val) 

In [None]:
plt.plot(fpr_val, tpr_val, label = auc_pred_val)
plt.legend()

## Test upload

In [None]:
df_test_csv = df_test
X_test_img = df_test_csv['image_name']
X_test_csv = df_test_csv.drop(['image_name'], axis=1).values

In [None]:
X_test_image = []
for image_get in X_test_img:
    img_test = cv2.imread(img_test_folder + '{}.jpg'.format(image_get))

    X_test_image.append(img_test)

In [None]:
X_test_csv = X_test_csv.astype(np.float32)

In [None]:
preprocess_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.485,0.456,0.406), (0.229,0.224,0.225)),
    transforms.ToPILImage()
])

In [None]:
def get_images(X_test_image, X_test_csv):
    
    test_images = []
    for image_get in X_test_image:
        image_trans = preprocess_test(image_get)
        image_trans = np.array(image_trans)
        test_images.append(image_trans)
    
    test_images = np.array(test_images)
    test_images = test_images.astype(np.float32)

    X_test_image_t = np.transpose(test_images, (0,3,1,2))
    input_test = torch.from_numpy(X_test_image_t)
    
    input_test_data = torch.from_numpy(X_test_csv)    

    test_set = torch.utils.data.TensorDataset(input_test, input_test_data)
    test_loader = torch.utils.data.DataLoader(dataset=test_set, 
                                               batch_size=batch_size,
                                             num_workers=2,
                                             shuffle=False)
    
    return test_loader

In [None]:
predicted_outputs = []
test_loader = get_images(X_test_image, X_test_csv)

for inputs, inputs_data in test_loader:
    
    inputs, inputs_data = inputs.to(device), inputs_data.to(device)
    test_outputs = model(inputs, inputs_data)
    test_outputs = torch.sigmoid(test_outputs)
    outputs_sigmoid_numpy = test_outputs.detach().cpu().numpy()
    predicted_outputs.append(outputs_sigmoid_numpy)

In [None]:
predicted_outputs = np.array(predicted_outputs)

In [None]:
predicted_outputs_total = []
for count in range(len(predicted_outputs)):
    for test in predicted_outputs[count]:
        predicted_outputs_total.append(test)
predicted_outputs_total = np.array(predicted_outputs_total)

In [None]:
predicted_outputs_total.dtype

In [None]:
X_test_image_submission = np.array(X_test_img)

In [None]:
X_test_image_submission.shape

In [None]:
predicted_outputs_total_submission = predicted_outputs_total.reshape(len(predicted_outputs_total))

In [None]:
print(predicted_outputs_total_submission.shape, X_test_image_submission.shape)

In [None]:
test_submission = pd.DataFrame({'image_name':X_test_img, 'target':predicted_outputs_total_submission})

In [None]:
test_submission.head()

In [None]:
test_submission.to_csv('/home/malmason/datasets/siim-isic-melanoma-classification/predictions_2f.csv')