In [81]:
import os
import sys
import cv2
import pandas as pd
from glob import glob
from PIL import Image
from enum import Enum
from typing import Optional, Tuple, List
from sklearn.model_selection import GroupShuffleSplit, train_test_split

import torch
import torch.nn.functional as F
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader

ROOT_DIR = 'D:/Projects/Skin-Disease-Detection-Pytorch'
sys.path.append(ROOT_DIR)
from src.utils.config import *

In [46]:
DERMNET_DATA_DIR = 'D:/Datasets/Skin-Disease-Detection/dataset/Dermnet'
DERMNET_LABEL_NAME = os.listdir(f'{DERMNET_DATA_DIR}/train')

In [97]:
class DermnetDataset(Dataset):
    def __init__(
            self, 
            data: Tuple[str, int], 
            transform: Optional[transforms.Compose] = None
    ) -> None:
        self.data = data
        self.transform = transform

    def __len__(self) -> int:
        return len(self.data)

    def __getitem__(self, index: int) -> Tuple[torch.Tensor, torch.Tensor]:
        image_path, label = self.data[index]
        image = Image.open(image_path)
        label = torch.tensor(label)
        label = F.one_hot(label, num_classes=len(DERMNET_LABEL_NAME)).float()
        

        if self.transform:
            image = self.transform(image)
        return image, label, image_path



def _transforms(resize_size, crop_size):
    train_transform = transforms.Compose(
            [
                # transforms.ToPILImage(),
                transforms.Resize(resize_size[:-1]),
                transforms.CenterCrop(crop_size[:-1]),
                transforms.RandomHorizontalFlip(),
                transforms.RandomVerticalFlip(),
                transforms.ToTensor(),
                transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
            ]
        )
    val_transform = transforms.Compose(
            [
                # transforms.ToPILImage(),
                transforms.Resize(resize_size[:-1]),
                transforms.CenterCrop(crop_size[:-1]),
                transforms.ToTensor(),
                transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
            ]
        )
    
    test_transform= transforms.Compose(
            [
                # transforms.ToPILImage(),
                transforms.Resize(resize_size[:-1]),
                transforms.CenterCrop(crop_size[:-1]),
                transforms.ToTensor(),
                transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
            ]
        )
    
    return train_transform, val_transform, test_transform
    



def prepare_dermnet_data(
        resize_size: Tuple[int, int, int], 
        crop_size: Tuple[int, int, int], 
        batch_size: int, 
        num_workers: int
) -> Tuple[DataLoader, DataLoader, DataLoader]:
    
    DERMNET_DATA_DIR = 'D:/Datasets/Skin-Disease-Detection/dataset/Dermnet'
    label_names = os.listdir(f'{DERMNET_DATA_DIR}/train')
    train_data = []
    val_data = []
    for label in label_names:
        file_paths = glob(f'{DERMNET_DATA_DIR}/train/{label}/*')
        train_paths, val_paths = train_test_split(file_paths, test_size=0.2, random_state=42)

        sparse_label = label_names.index(label)
        train_data += [(path, sparse_label) for path in train_paths]
        val_data += [(path, sparse_label) for path in val_paths]

    test_data = []
    for label in label_names:
        file_paths = glob(f'{DERMNET_DATA_DIR}/test/{label}/*')
        sparse_label = label_names.index(label)
        test_data += [(path, sparse_label) for path in file_paths]


    train_transform, val_transform, test_transform = _transforms(resize_size[:-1], crop_size[:-1])

    train_ds = DermnetDataset(data=train_data, transform=train_transform)
    val_ds = DermnetDataset(data=val_data, transform=val_transform)
    test_ds = DermnetDataset(data=test_data, transform=test_transform)

    train_dataloader = DataLoader(
        train_ds,
        batch_size=batch_size,
        num_workers=num_workers,
        shuffle=True,
        persistent_workers=True,
        pin_memory=True
    )

    val_dataloader = DataLoader(
        val_ds,
        batch_size=batch_size,
        num_workers=num_workers,
        shuffle=True,
        persistent_workers=True,
        pin_memory=True
    )

    test_dataloader = DataLoader(
        test_ds,
        batch_size=batch_size,
        num_workers=num_workers,
        shuffle=True,
        persistent_workers=True,
        pin_memory=True
    )

    return train_dataloader, val_dataloader, test_dataloader

In [100]:
DERMNET_DATA_DIR = 'D:/Datasets/Skin-Disease-Detection/dataset/Dermnet'
label_names = os.listdir(f'{DERMNET_DATA_DIR}/train')
train_data = []
val_data = []
for label in label_names:
    file_paths = glob(f'{DERMNET_DATA_DIR}/train/{label}/*')
    train_paths, val_paths = train_test_split(file_paths, test_size=0.2, random_state=42)

    sparse_label = label_names.index(label)
    train_data += [(path, sparse_label) for path in train_paths]
    val_data += [(path, sparse_label) for path in val_paths]

# test_data = []
# for label in label_names:
#     file_paths = glob(f'{DERMNET_DATA_DIR}/test/{label}/*')
#     sparse_label = label_names.index(label)
#     test_data += [(path, sparse_label) for path in file_paths]


train_transform, val_transform, test_transform = _transforms((256, 256), (224, 224))

train_ds = DermnetDataset(data=train_data, transform=train_transform)
# val_ds = DermnetDataset(data=val_data, transform=val_transform)
# test_ds = DermnetDataset(data=test_data, transform=test_transform)

img, label, image_path = train_ds.__getitem__(3200)
print(img.shape, label, image_path)

tensor(5)
torch.Size([3, 224, 224]) tensor([0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0.]) D:/Datasets/Skin-Disease-Detection/dataset/Dermnet/train/Eczema Photos\lichen-simplex-chronicus-114.jpg


In [91]:
import torch
import torch.nn.functional as F
import numpy as np

# Your continuous label tensor
label = torch.from_numpy(np.arange(5, 10))

# Convert to integer indices
label_indices = (label.float() - label.min().float()).long()
label_indices
# Apply F.one_hot
# one_hot_label = F.one_hot(label_indices, num_classes=7)

# print(one_hot_label)


tensor([0, 1, 2, 3, 4])

In [94]:
one_hot_label = F.one_hot(torch.Tensor(4), num_classes=7)

RuntimeError: one_hot is only applicable to index tensor.

In [90]:
import numpy as np
label = torch.from_numpy(np.arange(3, 10))
F.one_hot(label.long(), num_classes=7)





RuntimeError: Class values must be smaller than num_classes.