In [1]:
# !/usr/bin/env python
# coding: utf-8

# ---- Library import ----

import pickle
from time import gmtime, strftime

import albumentations
import math
import torch.nn as nn
from torch.utils.data import DataLoader
import torchy

# ---- My utils ----
from utils.train_arguments import *
from utils.utils_data import *
from utils.utils_training import *

# Primero necesitamos reescalar (si usamos los coeficientes de Efficientnet) la resolucion de las imagenes a usar
args.crop_size = math.ceil(args.crop_size * args.resolution_coefficient)
args.img_size = math.ceil(args.img_size * args.resolution_coefficient)

train_aug = albumentations.Compose([
    albumentations.PadIfNeeded(p=1, min_height=args.crop_size, min_width=args.crop_size),
    albumentations.Resize(args.img_size, args.img_size),
    albumentations.RandomCrop(p=1, height=args.crop_size, width=args.crop_size)
])

val_aug = albumentations.Compose([
    albumentations.PadIfNeeded(p=1, min_height=args.crop_size, min_width=args.crop_size),
    albumentations.Resize(args.img_size, args.img_size),
    albumentations.CenterCrop(p=1, height=args.crop_size, width=args.crop_size)
])

if args.data_augmentation:
    print("Data Augmentation to be implemented...")

train_dataset = ISIC2019_FromFolders(data_partition="train", albumentation=train_aug)

usage: ipykernel_launcher.py [-h] [--verbose] [--epochs EPOCHS]
                             [--batch_size BATCH_SIZE]
                             [--model_name {efficientnet}]
                             [--optimizer {adam,sgd,rmsprop}]
                             [--balanced_sampler BALANCED_SAMPLER]
                             [--depth_coefficient DEPTH_COEFFICIENT]
                             [--width_coefficient WIDTH_COEFFICIENT]
                             [--resolution_coefficient RESOLUTION_COEFFICIENT]
                             [--compound_coefficient COMPOUND_COEFFICIENT]
                             [--learning_rate LEARNING_RATE]
                             [--data_augmentation] [--img_size IMG_SIZE]
                             [--crop_size CROP_SIZE] [--output_dir OUTPUT_DIR]
                             [--path_extension PATH_EXTENSION]
ipykernel_launcher.py: error: unrecognized arguments: -f /run/user/1003/jupyter/kernel-e6eb7877-43a5-4759-a78a-47dce05711d6.j

Working with Jupyter notebook! (Default Arguments)


In [22]:
TRAIN_FILE = ISIC_PATH + "train.txt"
TRAIN_IMGS = open(TRAIN_FILE).read().split('\n')

In [24]:
len(TRAIN_IMGS)

21532

In [None]:
for indx, img in enumerate(TRAIN_IMGS):
    # Corregimos el path para que sea absoluto
    if '.jpg' in img:
        TRAIN_IMGS[indx] = ISIC_PATH + "Train/" + "/".join(img.split("/")[1:])
TRAIN_IMGS = list(filter(None, TRAIN_IMGS)) # Sanity check no empty lines/items in list

In [6]:
ISIC_TRAIN_DF_TRUTH.head()

Unnamed: 0.1,Unnamed: 0,image,MEL,NV,BCC,AK,BKL,DF,VASC,SCC,UNK,target
0,0,ISIC_0000000,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
1,1,ISIC_0000001,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
2,2,ISIC_0000002,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
3,3,ISIC_0000003,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
4,4,ISIC_0000004,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0


In [21]:
len(ISIC_TRAIN_DF_TRUTH)

25331

In [20]:
len(TRAIN_IMGS)

21531

In [14]:
TRAIN_REAL_INDEXES = []
for train_img in TRAIN_IMGS:
    img = train_img[train_img.find("ISIC_"):train_img.find(".jpg")]
    real_index = ISIC_TRAIN_DF_TRUTH.loc[ISIC_TRAIN_DF_TRUTH['image'] == img].index.values.astype(int)[0]
    TRAIN_REAL_INDEXES.append(real_index)

In [28]:
np.array(sampler_weights)[TRAIN_REAL_INDEXES]

array([1.97, 9.65, 1.97, ..., 1.97, 1.97, 9.65])

In [34]:
if not os.path.exists("weights_sampler.pickle"):
    torchy.utils.create_sampler_weights(ISIC_TRAIN_DF_TRUTH.loc[TRAIN_REAL_INDEXES], "target", "weights_sampler.pickle")
with open('weights_sampler.pickle', 'rb') as fp:
    sampler_weights = pickle.load(fp)
sampler = torch.utils.data.sampler.WeightedRandomSampler(sampler_weights, len(train_dataset))
train_loader = DataLoader(train_dataset, pin_memory=True, shuffle=False, sampler=sampler, batch_size=8)

In [41]:
x_batch, y_batch = iter(train_loader).next()
print("Training set: {} samples - Max value: {} - Min value: {}".format(len(train_loader.dataset), 
                                                                        x_batch.max(), x_batch.min()))

Training set: 21531 samples - Max value: 255 - Min value: 0


In [42]:
y_batch

tensor([7, 6, 5, 0, 0, 3, 1, 7])