In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.preprocessing import LabelEncoder

import random
import torch
import os
import cv2

import gc
import time
import datetime

from torch.utils.data import Dataset, DataLoader

import torchvision
from torchvision import transforms

from albumentations import (Compose, RandomResizedCrop, HorizontalFlip, 
                            VerticalFlip, HueSaturationValue,RandomBrightnessContrast,
                            Resize, Normalize )
from albumentations.pytorch import ToTensorV2, ToTensor

from torchvision.models import resnet34, resnet50
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim.lr_scheduler import ReduceLROnPlateau

from sklearn.model_selection import GroupKFold,StratifiedKFold, train_test_split
from sklearn.metrics import accuracy_score, roc_auc_score, confusion_matrix

In [None]:
def set_seed(seed = 1234):
    '''Sets the seed of the entire notebook so results are the same every time we run.
    This is for REPRODUCIBILITY.'''
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    # When running on the CuDNN backend, two further options must be set
    torch.backends.cudnn.deterministic = True
    # Set a fixed value for the hash seed
    os.environ['PYTHONHASHSEED'] = str(seed)
    
set_seed()

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Device available now:', device)

In [None]:
baseDir ='../input/siim-isic-melanoma-classification'
extDir ='../input/melanoma-external-malignant-256'
train_df = pd.read_csv(baseDir + '/train.csv')
test_df = pd.read_csv(baseDir + '/test.csv')
train_ext = pd.read_csv(extDir + '/train_concat.csv')
train_df.drop(['diagnosis','benign_malignant'], inplace=True, axis=1)
images_ids = train_df['image_name'].unique()
new_data = train_ext[~train_ext['image_name'].isin(images_ids)]
# train_df['image_path'] = baseDir+'/jpeg/train/'+ train_df['image_name']+'.jpg'
# test_df['image_path'] = baseDir+'/jpeg/test/'+ test_df['image_name']+'.jpg'
# new_data['image_path'] = extDir+'/train/train/'+ new_data['image_name']+'.jpg'

train_df['image_path'] = extDir+'/train/train/'+ train_df['image_name']+'.jpg'
test_df['image_path'] = extDir+'/test/test/'+ test_df['image_name']+'.jpg'
new_data['image_path'] = extDir+'/train/train/'+ new_data['image_name']+'.jpg'

train = pd.concat([train_df,new_data])

In [None]:
# Missing values filled with maximum values
train['patient_id'] =train['patient_id'].fillna("0")
train['age_approx'] =train['age_approx'].fillna(45)
train['sex'] =train['sex'].fillna('male')
train['anatom_site_general_challenge'] =train['anatom_site_general_challenge'].fillna('torso')

# Imputing for test as well
test_df['anatom_site_general_challenge'] =test_df['anatom_site_general_challenge'].fillna('torso')

cat_cols =['sex','anatom_site_general_challenge']
for col in cat_cols:
    le = LabelEncoder()
    train[col] = le.fit_transform(train[col])
    test_df[col] = le.transform(test_df[col])
train.rename(columns={"anatom_site_general_challenge":"anatomy","age_approx":"age" },inplace=True)
test_df.rename(columns={"anatom_site_general_challenge":"anatomy","age_approx":"age" },inplace=True)

In [None]:
img_path = train_df.iloc[0]['image_path']
org_img = cv2.imread(img_path)
# img= ToPILImage()(org_img)
img= HueSaturationValue(sat_shift_limit=[0.7, 1.3],hue_shift_limit=[-0.1, 0.1])(image=org_img)['image']
fig,axes =plt.subplots(1,2)
axes[0].imshow(org_img)
axes[1].imshow(img)

In [None]:
class MelanomaDataset(Dataset):
    def __init__(self, df,vert_flip, horz_flip, full_transform = True, isTest= False):
        self.df = df
        self.full_transform = full_transform
        self.isTest = isTest
        if full_transform:
            self.transforms = Compose([
                RandomResizedCrop(height= 224,width= 224, scale= (0.4,1)),
                HorizontalFlip(p=horz_flip),
                VerticalFlip(p = vert_flip),
                HueSaturationValue(sat_shift_limit=[0.7, 1.3],hue_shift_limit=[-0.1, 0.1]),
                RandomBrightnessContrast(brightness_limit=[0.7, 1.3],contrast_limit= [0.7, 1.3]),
                Normalize(),
                ToTensor()
            ])
        else:
            self.transforms = Compose([Normalize(),ToTensor()])
#             self.transforms = Compose([Resize(height= 1024,width= 1024),Normalize(),ToTensor()])

    
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        img_path = self.df['image_path'][idx]
        img_arr = cv2.imread(img_path)
        img_aug = self.transforms(image = img_arr)
        image = img_aug['image']
        csv_data = np.array(train.iloc[idx][['sex', 'age', 'anatomy']].values, dtype = np.float32)
        if self.isTest:
            return image,csv_data
        else:
            return (image,csv_data ) , self.df['target'][idx]
        

In [None]:
# dataset = MelanomaDataset(train[0:100],0.5, 0.5, full_transform = False)
# dataloader = DataLoader(dataset,batch_size=3, shuffle=False)

In [None]:
# sample_csv =None
# sample_images = None
# sample_labels = None
# for (images, csv_data), labels in dataloader:
#     sample_csv = csv_data 
#     sample_images = images
#     sample_labels = torch.tensor(labels, dtype=torch.float32)
#     print('images',images.shape)
#     print('csv_data',csv_data.shape)
#     print('label',labels)

  

In [None]:
class Resnet50Netwrok(nn.Module):
    def __init__(self):
        super().__init__()
        self.model = resnet50(pretrained = True)
        # Final output size - Probabiltiy if melonoma or not
        self.output_size = 1
        # Considering csv features age, sex
        self.csv_cols = 3
        self.csv_network = nn.Sequential(nn.Linear(self.csv_cols, 500), 
                                         nn.BatchNorm1d(500), 
                                         nn.ReLU(),
                                         nn.Dropout(p=0.2))
        self.final_layer = nn.Linear(1500,self.output_size)
    
    def forward(self, image, csv_data):
        image = self.model(image)
        #print("Shape of image out from RestNet", image.shape)
        csv_nn_data = self.csv_network(csv_data)
        #print("Shape of csv out from NN", csv_data.shape)
        
        # Concat out from csv data and image to pass to final
        # Classification layer
        data = torch.cat((image,csv_nn_data), dim=1)
        print("Final concat data shape", data.shape)
        
        out = self.final_layer(data)
        #print("Output shape", out.shape)
        return out
        

In [None]:
# sample_resenet = Resnet50Netwrok()
# sample_out = sample_resenet(sample_images,sample_csv )

In [None]:
# # Criterion example
# criterion_example = nn.BCEWithLogitsLoss()
# # Unsqueeze(1) from shape=[3] to shape=[3, 1]
# loss = criterion_example(sample_out, sample_labels.unsqueeze(1))   
# print('Loss:', loss)

In [None]:
!pip install -q efficientnet_pytorch             
from efficientnet_pytorch import EfficientNet

In [None]:
class EfficientNetwork(nn.Module):
    def __init__(self, modeltype):
        super().__init__()
        modelName = 'efficientnet-'+ modeltype
        self.model = EfficientNet.from_pretrained(modelName)
        # Final output size - Probabiltiy if melonoma or not
        self.output_size = 1
        # Considering csv features age, sex
        self.csv_cols = 3
        self.csv_network = nn.Sequential(nn.Linear(self.csv_cols, 250), 
                                         nn.BatchNorm1d(250), 
                                         nn.ReLU(),
                                         nn.Dropout(p=0.2),
                                         
                                         nn.Linear(250, 250), 
                                         nn.BatchNorm1d(250), 
                                         nn.ReLU(),
                                         nn.Dropout(p=0.2))
        nodel_node_count = 2560
        if modeltype == "b4":
            nodel_node_count = 1792
        if modeltype == "b2":
            nodel_node_count = 1408
        if modeltype == "b6":
            nodel_node_count = 2304
            
        
        final_layer_nodes = nodel_node_count+ 250
        self.final_layer = nn.Sequential(nn.Linear(final_layer_nodes,self.output_size ))

    def forward(self, images, csv_data):
        # Extract image features
        images_feature = self.model.extract_features(images)
        #print("Feature shape from model", images_feature.shape)
        feat_shape = images_feature.shape
        (m,n,fil1, fil2) = feat_shape
        
        #print('Feature count from model', n)
        
        # Average pooling by (7,7) filters
        # image = F.avg_pool2d(image, image.size()[2:]).reshape(-1, 1792)
        pooled_imgs = F.avg_pool2d(images_feature, (fil1, fil2)).reshape(m,n)
        #print('Pooled image shape', pooled_imgs.shape) 
   
        # Add csv layer
        #print('CSV data shape ',csv_data.shape)
        csv_nn_data = self.csv_network(csv_data)
        
        #print('CSV output shape ',csv_nn_data.shape)
        # Concat both layers
        data = torch.cat((pooled_imgs,csv_nn_data), dim=1)
        #print('Concat data shape ', data.shape)
        
        # Apply final layer
        out = self.final_layer(data)
        #print('Output shape ', out.shape)
        return out

In [None]:
# sample_effnet = EfficientNetwork("b2")
# sample_eff_out = sample_effnet(sample_images,sample_csv )

In [None]:
# # Criterion example
# criterion_example = nn.BCEWithLogitsLoss()
# # Unsqueeze(1) from shape=[3] to shape=[3, 1]
# loss = criterion_example(sample_eff_out, sample_labels.unsqueeze(1))   
# print('Loss:', loss)

In [None]:
# epochs = 15
# patience = 3
# TTA = 3
# num_workers = 8
# learning_rate = 0.0005
# weight_decay = 0.0
# lr_patience = 1            # 1 model not improving until lr is decreasing
# lr_factor = 0.4            # by how much the lr is decreasing

# batch_size1 = 32
# batch_size2 = 16

In [None]:
def getDataLoaoders(train_index,test_index):
    train_data = train.iloc[train_index].reset_index(drop= True)
    valid_data = train.iloc[test_index].reset_index(drop= True)
    #print('valid_data', valid_data['target'])
    train_ds= MelanomaDataset(train_data,0.5, 0.5, full_transform = True)
    valid_ds= MelanomaDataset(valid_data,0.5, 0.5, full_transform = False)
    # train_dl = DataLoader(train_ds, batch_size=32, shuffle=True, num_workers =8)
    # valid_dl = DataLoader(valid_ds, batch_size=16, shuffle=False, num_workers =8)
    train_dl = DataLoader(train_ds, batch_size=256, shuffle=True, num_workers =8)
    valid_dl = DataLoader(valid_ds, batch_size=64, shuffle=False, num_workers =8)
    return valid_dl,train_dl



def getBatchValues(batch, model):
    (images,csv_data ) , labels = batch
    
    #Add variables to GPU device
    images = torch.as_tensor(images, device= device, dtype = torch.float32)
    csv_data = torch.as_tensor(csv_data, device= device, dtype = torch.float32)
    labels = torch.as_tensor(labels, device= device, dtype = torch.float32)
    out_probs = model(images, csv_data) 
    actual_labels = labels.unsqueeze(1).float()
    # From log probabilities to actual probabilities to 0 and 1
    # Sigmoid Function for binary classification
    out_labels = torch.round(torch.sigmoid(out_probs))   
    return out_labels, actual_labels, out_probs
    

In [None]:
def trainData(folds_list, model_name):
    for fold, (train_index, test_index) in enumerate(folds_list):
        print(f'Fold-{fold +1} ----------------------------------------')
        valid_dl,train_dl = getDataLoaoders(train_index, test_index)
        model = None
        if model_name == "res":
            model = Resnet50Netwrok().to(device)
        else:
            model = EfficientNetwork(model_name).to(device)
        params = model.parameters()
        optimizer = torch.optim.Adam(params, lr =0.001, weight_decay= 0.0)
        
        # Reduce learning rate when no improvement in performance
        # Number of epochs with no improvement after which learning rate will be reduced.
        sceduler = ReduceLROnPlateau(optimizer=optimizer, mode="max", patience=1, verbose=True, factor=0.4)
        criterion = nn.BCEWithLogitsLoss()
        
        #Best ROC for the fold
        best_roc = None
        # If no ROC improvement in 3 epcohs, we stop training there
        roc_patience = 3
        
        for epoch in range(15):
            start_time = time.time()
            # Model in training mode
            model.train()
            train_loss = 0
            correct_labels = 0
            for k ,batch in enumerate(train_dl):
                # Set optimizer gradient to zero atfter each training
                optimizer.zero_grad()
                out_labels, labels, out_probs = getBatchValues(batch, model)
                
                loss = criterion(out_probs,labels)
                #Backward propgation through model
                loss.backward()
                optimizer.step()
                train_loss = train_loss + loss.cpu().item()
                correct_label_count = (out_labels == labels).sum().item()
                correct_labels = correct_labels + correct_label_count
                
            accuracy = correct_labels/len(train_index)*100
            #print("Training Accuracy is ", accuracy)
            end_time = time.time()
            # print(f'Training time for epoch {epoch+1} {(end_time-start_time)/60} mins. Accuracy {accuracy}. train_loss:{train_loss}')
            # Start model evaluation
            
            start_time = time.time()
            model.eval()
            valid_pred = None
            valid_target = None
            out_probs_auc = None
            valid_roc = 0
            # No backward propogation needed since we are wvaluating, so no gradients requires
            with torch.no_grad():
                for batch in valid_dl:
                    valid_roc = 0
                    out_labels, labels, out_probs = getBatchValues(batch, model)
#                     print('Out train', loss,out_labels.shape,labels.shape)
                    if valid_pred is None:
                        valid_pred = out_labels
                        valid_target = labels
                        out_probs_auc = out_probs
                    else:
                        valid_pred = torch.cat((valid_pred,out_labels), dim=0)
                        valid_target = torch.cat((valid_target,labels), dim=0)
                        out_probs_auc = torch.cat((out_probs_auc,out_probs), dim=0)
                valid_target = valid_target.cpu()
                valid_pred = valid_pred.cpu()
                valid_acc = accuracy_score(valid_target,valid_pred)
                out_probs_auc = torch.sigmoid(out_probs_auc).cpu()
                #print('valid Accuracy',valid_acc)
#                 try:
                valid_roc = roc_auc_score(valid_target,out_probs_auc)
                    #print('Valid roc',valid_roc)
#                 except:
#                     print("Cannot find valid ROC")
            end_time = time.time()
            #print(f'Valid time for epoch {epoch+1} {(end_time-start_time)/60} mins. Acc: {valid_acc} roc: {valid_roc}')
            
            if not best_roc:
                best_roc = valid_roc
                #print('Saving model')
                torch.save(model.state_dict(), f"{model_name}_fo_{fold+1}_ep{epoch+1}_acc_{valid_acc:.3f}_roc_{valid_roc:.3f}.pth")
            elif valid_roc > best_roc:
                #print('Saving model Again')
                best_roc = valid_roc
                # Reset patience becuase model has improvement
                roc_patience = 3
                torch.save(model.state_dict(), f"{model_name}_fo_{fold+1}_ep{epoch+1}_acc_{valid_acc:.3f}_roc_{valid_roc:.3f}.pth")
            else:
                #Model not improving so reduce number of epochs to consider
                roc_patience = roc_patience -1
            print(f'Epoch-{epoch+1}| Loss-{train_loss}| Train_acc-{accuracy}| valid_acc-{valid_acc}| ROC: {valid_roc}') 
            if roc_patience == 0:
                print(f'No preformance improving. Early stopping at epoch: {epoch+1},Best ROC: {best_roc}')
                break
            sceduler.step(valid_roc)
                
            

In [None]:
X = np.zeros(len(train))
y = train['target']
strat_kfold = StratifiedKFold(n_splits=6,shuffle= True)
folds = strat_kfold.split(X, y)
fold_list = list(folds)
trainData(fold_list,"b4")

# a = train_df[1:1000]
# sample_groups = a['patient_id']
# X = np.zeros(len(a))
# y= a['target']
# sample_group_kfold = StratifiedKFold(n_splits=6,shuffle= True)
# sample_folds = sample_group_kfold.split(X, y, sample_groups)
# # To have multiple iterations
# sample_fold_list = list(sample_folds)

# # sample_model = EfficientNetwork("b2").to(device)
# trainData(sample_fold_list,"b2")

In [None]:
groups = train['patient_id']
X = np.zeros(len(train))
y = train['target']
group_kfold = GroupKFold(n_splits=6)
folds = group_kfold.split(X, y, groups)
# To have multiple iterations
fold_list = list(folds)
trainData(fold_list,"res")


# a = train[0:1000]
# sample_groups = a['patient_id']
# X = np.zeros(len(a))
# y= a['target']
# sample_group_kfold = GroupKFold(n_splits=5)
# sample_folds = sample_group_kfold.split(X, y, sample_groups)
# # To have multiple iterations
# sample_fold_list = list(sample_folds)

# # sample_model = Resnet50Netwrok().to(device)
# trainData(sample_fold_list,"res")

In [None]:
# import os
# for f in os.listdir():
#     os.remove(f)


In [None]:
model_path ="../input/melonomamodel/b2.pth"
b2_model_best = EfficientNetwork("b2").to(device)
b2_model_best.load_state_dict(torch.load(model_path))
b2_model_best.eval()
test_ds= MelanomaDataset(test_df,0.5, 0.5, full_transform = True, isTest= True)
test_dl = DataLoader(test_ds, batch_size=16, shuffle=False, num_workers=8)
submission = torch.zeros(size = (len(test_df), 1), dtype=torch.float32, device=device)
with torch.no_grad():
    for i in range(3):
        for k,batch in enumerate(test_dl):
            images,csv_data = batch
            images = torch.as_tensor(images, device= device, dtype = torch.float32)
            csv_data = torch.as_tensor(csv_data, device= device, dtype = torch.float32)
            out_probs = b2_model_best(images, csv_data)
            m = images.shape[0]
            probs = torch.sigmoid(out_probs)
            submission[k*m: k*m+m] = submission[i*m: i*m+m] + probs
submission_df = pd.read_csv('/kaggle/input/siim-isic-melanoma-classification/sample_submission.csv')
test_target = submission/3
test_target = test_target.cpu().numpy()
submission_df['target'] = test_target
submission_df.to_csv(f'b2_sub.csv', index=False)

In [None]:
model_path ="../input/melres2/res-2.pth"
res_model_best = Resnet50Netwrok().to(device)
res_model_best.load_state_dict(torch.load(model_path))
res_model_best.eval()
test_ds= MelanomaDataset(test_df,0.5, 0.5, full_transform = False, isTest= True)
test_dl = DataLoader(test_ds, batch_size=16, shuffle=False, num_workers=8)
submission = torch.zeros(size = (len(test_df), 1), dtype=torch.float32, device=device)
with torch.no_grad():
    for k,batch in enumerate(test_dl):
        images,csv_data = batch
        images = torch.as_tensor(images, device= device, dtype = torch.float32)
        csv_data = torch.as_tensor(csv_data, device= device, dtype = torch.float32)
        out_probs = res_model_best(images, csv_data)
        m = images.shape[0]
        probs = torch.sigmoid(out_probs)
        submission[k*m: k*m+m] =  probs
submission_df = pd.read_csv('/kaggle/input/siim-isic-melanoma-classification/sample_submission.csv')
test_target = submission.cpu().numpy()
submission_df['target'] = test_target
submission_df.to_csv('res2_sub.csv', index=False)

In [None]:
model_path ="../input/melonomab4/b4.pth"
b4_model_best = EfficientNetwork("b4").to(device)
b4_model_best.load_state_dict(torch.load(model_path))
b4_model_best.eval()
test_ds= MelanomaDataset(test_df,0.5, 0.5, full_transform = False, isTest= True)
test_dl = DataLoader(test_ds, batch_size=16, shuffle=False, num_workers=8)
submission = torch.zeros(size = (len(test_df), 1), dtype=torch.float32, device=device)
with torch.no_grad():
    for k,batch in enumerate(test_dl):
        images,csv_data = batch
        images = torch.as_tensor(images, device= device, dtype = torch.float32)
        csv_data = torch.as_tensor(csv_data, device= device, dtype = torch.float32)
        out_probs = b4_model_best(images, csv_data)
        m = images.shape[0]
        probs = torch.sigmoid(out_probs)
        submission[k*m: k*m+m] =  probs
submission_df = pd.read_csv('/kaggle/input/siim-isic-melanoma-classification/sample_submission.csv')
test_target = submission.cpu().numpy()
submission_df['target'] = test_target
submission_df.to_csv('b4_sub.csv', index=False)

In [None]:
def trainSplitData(model_name,train_index, test_index):
    valid_dl,train_dl = getDataLoaoders(train_index, test_index)
    model = None
    if model_name == "res":
        model = Resnet50Netwrok().to(device)
    else:
        model = EfficientNetwork(model_name).to(device)
    params = model.parameters()
    optimizer = torch.optim.Adam(params, lr =0.001, weight_decay= 0.0)
    sceduler = ReduceLROnPlateau(optimizer=optimizer, mode="max", patience=1, verbose=True, factor=0.4)
    criterion = nn.BCEWithLogitsLoss()
    best_roc = None
    roc_patience = 8
    for epoch in range(15):
        model.train()
        train_loss = 0
        correct_labels = 0
        for k ,batch in enumerate(train_dl):
            # Set optimizer gradient to zero atfter each training
            optimizer.zero_grad()
            out_labels, labels, out_probs = getBatchValues(batch, model)
            loss = criterion(out_probs,labels)
            #Backward propgation through model
            loss.backward()
            optimizer.step()
            train_loss = train_loss + loss.cpu().item()
            correct_label_count = (out_labels == labels).sum().item()
            correct_labels = correct_labels + correct_label_count
                
        accuracy = correct_labels/len(train_index)*100

        model.eval()
        valid_pred = None
        valid_target = None
        out_probs_auc = None
        valid_roc = 0
        with torch.no_grad():
            for batch in valid_dl:
                valid_roc = 0
                out_labels, labels, out_probs = getBatchValues(batch, model)
                if valid_pred is None:
                    valid_pred = out_labels
                    valid_target = labels
                    out_probs_auc = out_probs
                else:
                    valid_pred = torch.cat((valid_pred,out_labels), dim=0)
                    valid_target = torch.cat((valid_target,labels), dim=0)
                    out_probs_auc = torch.cat((out_probs_auc,out_probs), dim=0)
            valid_target = valid_target.cpu()
            valid_pred = valid_pred.cpu()
            valid_acc = accuracy_score(valid_target,valid_pred)
            out_probs_auc = torch.sigmoid(out_probs_auc).cpu()
            valid_roc = roc_auc_score(valid_target,out_probs_auc)            
            if not best_roc:
                best_roc = valid_roc
                #print('Saving model')
            elif valid_roc > best_roc:
                #print('Saving model Again')
                best_roc = valid_roc
                # Reset patience becuase model has improvement
                roc_patience = 8
            else:
                #Model not improving so reduce number of epochs to consider
                roc_patience = roc_patience -1
            torch.save(model.state_dict(), f"{model_name}_ep{epoch+1}_acc_{valid_acc:.3f}_roc_{valid_roc:.3f}.pth")
        print(f'Epoch-{epoch+1}| Loss-{train_loss}| Train_acc-{accuracy}| valid_acc-{valid_acc}| ROC: {valid_roc}') 
        if roc_patience == 0:
            print(f'No preformance improving. Early stopping at epoch: {epoch+1},Best ROC: {best_roc}')
            break
        sceduler.step(valid_roc)  

In [None]:
X = train.drop(columns=['target'])
y = train['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.16, random_state=42, shuffle = True, stratify = y)
trainSplitData('b6',y_train.index, y_test.index)

In [None]:
c = train[train['target']== 1][0:50]
b = train[train['target']== 0][0:100]
a= pd.concat([c,b])
X = a.drop(columns=['target'])
y = a['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.16, random_state=42, shuffle = True, stratify = y)
trainSplitData('b4',y_train.index, y_test.index)

In [None]:
model_path ="./b4_ep14_acc_0.973_roc_0.983.pth"
b4_model = EfficientNetwork("b4").to(device)
b4_model.load_state_dict(torch.load(model_path))
b4_model.eval()
test_ds= MelanomaDataset(test_df,0.5, 0.5, full_transform = False, isTest= True)
test_dl = DataLoader(test_ds, batch_size=16, shuffle=False, num_workers=8)
submission = torch.zeros(size = (len(test_df), 1), dtype=torch.float32, device=device)
with torch.no_grad():
    for k,batch in enumerate(test_dl):
        images,csv_data = batch
        images = torch.as_tensor(images, device= device, dtype = torch.float32)
        csv_data = torch.as_tensor(csv_data, device= device, dtype = torch.float32)
        out_probs = b4_model(images, csv_data)
        m = images.shape[0]
        probs = torch.sigmoid(out_probs)
        submission[k*m: k*m+m] =  probs
submission_df = pd.read_csv('/kaggle/input/siim-isic-melanoma-classification/sample_submission.csv')
test_target = submission.cpu().numpy()
submission_df['target'] = test_target
submission_df.to_csv('b4_str_fold_14.csv', index=False)