# PADUFES20



Libraries

In [1]:
import pandas as pd
import numpy as np
import pickle
import os
import sys
import time
import gc
import warnings
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.models as models
import torchvision.transforms as transforms
from torchvision.models.feature_extraction import get_graph_node_names
from torchvision.models.feature_extraction import create_feature_extractor
import copy
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = [15, 7]

from torch.utils.data import Dataset, DataLoader
from PIL import Image
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score

from efficientnet_pytorch import EfficientNet
import torchextractor as tx

from itertools import chain, combinations

def get_combs(l):
    return list(chain.from_iterable(combinations(l, r) for r in range(1, len(l)+1)))

In [2]:
sys.path.append('..')

from utils.train import train
from utils.metrics import get_scores, get_metrics
from utils.dataset import get_data_loader
from utils.models import get_model, BaseMetaModel, MetaModel

# Dataset

In [4]:
df = pd.read_csv('train_meta.csv')
df

Unnamed: 0,img_id,patient_id,lesion_id,biopsed,diagnostic,diagnostic_number,age,smoke_False,smoke_True,drink_False,...,hurt_UNK,changed_False,changed_True,changed_UNK,bleed_False,bleed_True,bleed_UNK,elevation_False,elevation_True,elevation_UNK
0,PAT_1516_1765_530.png,PAT_1516,1765,False,NEV,3,8,0,0,0,...,0,1,0,0,1,0,0,1,0,0
1,PAT_46_881_939.png,PAT_46,881,True,BCC,1,55,1,0,1,...,0,0,1,0,0,1,0,0,1,0
2,PAT_1545_1867_547.png,PAT_1545,1867,False,ACK,0,77,0,0,0,...,0,1,0,0,1,0,0,1,0,0
3,PAT_1989_4061_934.png,PAT_1989,4061,False,ACK,0,75,0,0,0,...,0,1,0,0,1,0,0,1,0,0
4,PAT_1549_1882_230.png,PAT_1549,1882,False,SEK,5,53,0,0,0,...,0,1,0,0,1,0,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1910,PAT_273_421_905.png,PAT_273,421,True,BCC,1,41,1,0,1,...,0,0,0,1,0,1,0,0,1,0
1911,PAT_491_934_46.png,PAT_491,934,True,SCC,4,43,1,0,1,...,0,0,0,1,0,1,0,0,1,0
1912,PAT_1708_3156_175.png,PAT_1708,3156,False,ACK,0,73,0,0,0,...,0,1,0,0,1,0,0,1,0,0
1913,PAT_46_880_140.png,PAT_46,880,True,BCC,1,55,1,0,1,...,0,0,1,0,1,0,0,1,0,0


In [5]:
open_file = open('train_idcs', "rb")
train_folds = pickle.load(open_file)
open_file.close()

open_file = open('val_idcs', "rb")
val_folds = pickle.load(open_file)
open_file.close()

open_file = open('test_idcs', "rb")
test_idcs = pickle.load(open_file)
open_file.close()

# Testing

In [8]:
model_names = ['resnet18', 'resnet34', 'resnet50', 'resnet101', 'resnet152', 'effnetb0', 'effnetb1',
               'effnetb2', 'effnetb3', 'effnetb4', 'effnetb5', 'resnext50', 'resnext101', 'vgg11', 'vgg13', 
               'vgg16', 'vit_b_32', 'vit_l_32']
len(model_names)

18

In [None]:
fusion_methods   = ['no_meta', 'concat', 'metanet', 'metablock']

In [17]:
data_dir      = 'imgs'
metadata_cols = df.columns[6:]
batch_size    = 32
num_workers   = 16
input_size    = 224

train_transform = transforms.Compose([transforms.RandomResizedCrop(input_size),
                                transforms.RandomHorizontalFlip(),
                                transforms.ToTensor(),
                                transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

val_transform   = transforms.Compose([transforms.Resize((input_size, input_size)),
                                transforms.ToTensor(),
                                transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

In [18]:
fold            = 0
n_classes       = 6
n_reducer_block = 256

device    = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)


saved_models_folder      = 'saved_models'
saved_scores_folder      = 'saved_scores'
saved_base_models_folder = 'saved_basemodels'
saved_base_scores_folder = 'saved_basescores'

cuda


In [20]:
fusion_methods

['no_meta', 'concat', 'metanet', 'metablock']

In [21]:
# Dataloaders
test_imgs   = df.loc[test_idcs, 'img_id'].values
test_paths  = [f'{os.path.join(data_dir, img)}' for img in test_imgs]
test_labels = df.loc[test_idcs, 'diagnostic_number'].values

test_metadata   = df.loc[test_idcs, metadata_cols].values
test_dataloader = get_data_loader(test_paths, test_labels, metadata=test_metadata, transform=val_transform, batch_size=batch_size, num_workers=num_workers) 

# Training
n_classes  = 6
n_metadata = test_metadata.shape[1]

all_metrics_dict = dict()
for model_name in model_names:
    model_dict = dict()
    base_model = BaseMetaModel(get_model(model_name, n_classes=n_classes, pretrained=True)).to(device)
    
    for fusion_method in fusion_methods:
        print(f'{"*"*79}\n{model_name.upper()} {fusion_method.upper()}\n{"*"*79}\n')
                
        if fusion_method == 'no_meta':
            save_path = f'best_base_{model_name}_w_{fold}'
            model = BaseMetaModel(get_model(model_name, n_classes=n_classes, pretrained=True)).to(device)
            model.load_state_dict(torch.load(os.path.join(saved_base_models_folder, save_path)))
        else:
            save_path = f'best_{model_name}_{fusion_method}_{fold}'
            model = MetaModel(base_model, n_classes, n_metadata=n_metadata, fusion_method=fusion_method, n_reducer_block=n_reducer_block).to(device)
            model.load_state_dict(torch.load(os.path.join(saved_models_folder, save_path)))

        y_true, y_prob, y_pred = get_scores(model, test_dataloader, batch_size, device)
        np.save(f'test_scores/y_true_{model_name}_{fusion_method}_{fold}', y_true)
        np.save(f'test_scores/y_prob_{model_name}_{fusion_method}_{fold}', y_prob)
        np.save(f'test_scores/y_pred_{model_name}_{fusion_method}_{fold}', y_pred)
        
        metrics_dict = get_metrics(y_true, y_prob, y_pred)

        del model
        gc.collect()
        torch.cuda.empty_cache()
        
        model_dict[fusion_method] = metrics_dict
    all_metrics_dict[model_name]  = model_dict

*******************************************************************************
RESNET18 NO_META
*******************************************************************************

*******************************************************************************
RESNET18 CONCAT
*******************************************************************************

*******************************************************************************
RESNET18 METANET
*******************************************************************************

*******************************************************************************
RESNET18 METABLOCK
*******************************************************************************

*******************************************************************************
RESNET34 NO_META
*******************************************************************************

*******************************************************************************
RESNET34 CONCAT
*************

*******************************************************************************
EFFNETB5 METABLOCK
*******************************************************************************

*******************************************************************************
RESNEXT50 NO_META
*******************************************************************************

*******************************************************************************
RESNEXT50 CONCAT
*******************************************************************************

*******************************************************************************
RESNEXT50 METANET
*******************************************************************************

*******************************************************************************
RESNEXT50 METABLOCK
*******************************************************************************

*******************************************************************************
RESNEXT101 NO_META
****

OutOfMemoryError: CUDA out of memory. Tried to allocate 16.00 MiB (GPU 0; 23.70 GiB total capacity; 2.06 GiB already allocated; 16.56 MiB free; 2.12 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [22]:
import shutil
shutil.make_archive('test_scores', 'zip', 'test_scores')

'/home/gabriel/skin/PADUFES20/test_scores.zip'

In [12]:
import json

with open('metrics_fusion_final.json', 'w') as outfile:
    json.dump(all_metrics_dict, outfile)
    
all_metrics_dict

{'resnet18': {'no_meta': {'precision': 0.6649992479288678,
   'recall': 0.6910755148741419,
   'f1-score': 0.6701864433408058,
   'support': 437,
   'accuracy': 0.6910755148741419,
   'balanced_accuracy': 0.5775953132011243,
   'auc': 0.8841834685514142},
  'concat': {'precision': 0.6789797326932954,
   'recall': 0.7048054919908466,
   'f1-score': 0.6842040721485715,
   'support': 437,
   'accuracy': 0.7048054919908466,
   'balanced_accuracy': 0.5996484636639171,
   'auc': 0.9121503830932218},
  'metanet': {'precision': 0.6886314908787297,
   'recall': 0.7070938215102975,
   'f1-score': 0.6865155994767621,
   'support': 437,
   'accuracy': 0.7070938215102975,
   'balanced_accuracy': 0.5920763946520492,
   'auc': 0.8931159273145495},
  'metablock': {'precision': 0.7643597001637408,
   'recall': 0.7665903890160183,
   'f1-score': 0.7522280753158839,
   'support': 437,
   'accuracy': 0.7665903890160183,
   'balanced_accuracy': 0.6252500210405826,
   'auc': 0.927541194567614}},
 'resnet34'

In [20]:
pd.DataFrame(all_metrics_dict['vit_b_32'])

Unnamed: 0,no_meta,concat,metanet,metablock
precision,0.708831,0.710504,0.705732,0.728804
recall,0.700229,0.73913,0.736842,0.741419
f1-score,0.674825,0.718911,0.709441,0.718831
support,437.0,437.0,437.0,437.0
accuracy,0.700229,0.73913,0.736842,0.741419
balanced_accuracy,0.555576,0.594288,0.604353,0.568102
auc,0.901544,0.919212,0.912594,0.918096
