In [None]:
import numpy as np 
import pandas as pd 
import os
from PIL import Image, ImageFilter
import matplotlib.pyplot as plt
import random
from sklearn.model_selection import train_test_split
import shutil

# pytorch packages
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import datasets, transforms
from torchvision.models import resnet50, ResNet50_Weights, resnet101, ResNet101_Weights, densenet121, efficientnet_b0
from torchinfo import summary

# TODO optiona for imporvement - add more images from another database/ remove watermark

# Dermatological Conditions Classification
### Run a classification CNN model on different types of dermatological conditions 

In [None]:
os.makedirs('data')

## Copy Selected Data to a New Data Folder:

In [None]:
data_path = "/kaggle/input/skin-diseases-image-dataset/IMG_CLASSES/"
dest_path = '/kaggle/working/data/'
data_all = os.listdir(data_path)
print(data_all)


skin_targets = ["3. Atopic Dermatitis - 1.25k", #'seborrheic-dermatitis-79.jpg', 'Psoriasis-Guttate-71.jpg'
                "7. Psoriasis pictures Lichen Planus and related diseases - 2k", #'05AtopicWristq.jpg', '05keratosisPilaris080706.jpg'
                "1. Eczema 1677", 
                "9. Tinea Ringworm Candidiasis and other Fungal Infections - 1.7k"
               ] 
#0. Warts Molluscum and other Viral Infections - 2103
#9. Tinea Ringworm Candidiasis and other Fungal Infections - 1.7k

for target in skin_targets:
    target_path = data_path + target
    target_dest_path = dest_path + target
    print(f"\nTarget Name: {target}")
    print(f"Train Len: {len(os.listdir(target_path))}")
    shutil.copytree(target_path, target_dest_path)


In [None]:
!pip install split_folders
import splitfolders

In [None]:
# import shutil
# shutil.rmtree('/kaggle/working/data')

## Change Targets' Names:

In [None]:
path = "/kaggle/working/data/"

dir_list = os.listdir(path) 
print(len(dir_list))

os.rename(path + "1. Eczema 1677", path + "Eczema")
os.rename(path + "7. Psoriasis pictures Lichen Planus and related diseases - 2k", path + "Psoriasis + Lichen Planus")
os.rename(path + "3. Atopic Dermatitis - 1.25k", path + "Atopic Dermatitis")
os.rename(path + "9. Tinea Ringworm Candidiasis and other Fungal Infections - 1.7k", path + "Fungal Infections")

In [None]:
path = "/kaggle/working/data"

skin_targets_renamed = os.listdir(path)
skin_targets_renamed

## Split Data to Train, Test and Validation Folders

In [None]:
os.makedirs('split_data')
os.makedirs('split_data/train')
os.makedirs('split_data/val')
os.makedirs('split_data/test')

loc = "/kaggle/working/data/"

splitfolders.ratio(loc, output ="split_data", ratio = (0.80,.1,.1))
# splitfolders.ratio(loc, output ="split_data", ratio = (0.70,.15,.15))

In [None]:
path = "/kaggle/working/split_data/train/Atopic Dermatitis"
dir_list = os.listdir(path) 
print(len(dir_list))
# dir_list

# PNG is o.k??? next do jpg

In [None]:
path = "/kaggle/working/split_data/train/"
for target in skin_targets_renamed:
    dir_path = path + target
    dir_list = os.listdir(dir_path) 
    for image_name in dir_list:
        image_path = dir_path + "/" + image_name
        image = Image.open(image_path)
        # transform = transforms.RandomRotation(30)
        # flipped_30_image = transform(image)
        # flipped_30_image.save(f"{image_path.replace('.jpg', '')}_30_flipped.jpg")
        transform = transforms.RandomHorizontalFlip(p=1)
        horizontal_flipped_image = transform(image)
        horizontal_flipped_image.save(f"{image_path.replace('.jpg', '')}_horizontal_flipped.jpg")
        transform = transforms.ColorJitter(brightness=0.2, contrast=0.2)
        color_jitter_image = transform(image)
        color_jitter_image.save(f"{image_path.replace('.jpg', '')}_color_jitter.jpg")

In [None]:
from glob import glob
image_name = "t-neurotic-excoriations-12" #"t-Dyshidrosis-17"
glob(f"/kaggle/working/split_data/train/Eczema/{image_name}*")

In [None]:
fig, axes = plt.subplots(2, 2)
image = Image.open("/kaggle/working/split_data/train/Eczema/" + image_name + ".jpg")
axes[0][0].imshow(image)
axes[0][0].set_title(f"{image_name} \nOriginal Image", fontsize = 10)
axes[0][0].axis('off')
# flipped_30_image = Image.open("/kaggle/working/split_data/train/Eczema/" + image_name + "_30_flipped.jpg")
# axes[0][1].imshow(flipped_30_image)
# axes[0][1].set_title(f"{image_name} \nRandomly Flipped 30 Image", fontsize = 10)
axes[0][1].axis('off')
horizontal_flipped_image = Image.open("/kaggle/working/split_data/train/Eczema/" + image_name + "_horizontal_flipped.jpg")
axes[1][0].imshow(horizontal_flipped_image)
axes[1][0].set_title(f"{image_name} \nHorizontal Flipped Image", fontsize = 10)
axes[1][0].axis('off')
color_jitter_image = Image.open("/kaggle/working/split_data/train/Eczema/" + image_name + "_color_jitter.jpg")
axes[1][1].imshow(color_jitter_image)
axes[1][1].set_title(f"{image_name} \nColor Jitter Image", fontsize = 10)
axes[1][1].axis('off')
plt.show()

## Create pytorch dataset

In [None]:
train_path = "/kaggle/working/split_data/train/"
test_path = "/kaggle/working/split_data/test/"

# implement pytorch dataset:
class SkinDataset(Dataset):

    def __init__(self, mode = 'train', transform = transforms.ToTensor()):
        self.mode = mode 
        self.target_dict = {}
        self.skin_targets = skin_targets_renamed
        img_paths, img_targets = self.get_data()
        self.img_paths = img_paths
        self.img_targets = img_targets
        self.transform = transform
        
    def __len__(self): # == len(dataset)
        return len(self.img_paths)
        
    def __getitem__(self, index): # == dataset[index] 
        img = Image.open(self.img_paths[index])
        # img = img.filter(ImageFilter.GaussianBlur(radius=2)) #reduce noises
        img = self.transform(img)
        return {"img": img, "target": self.img_targets[index]}

    def get_data(self):
        dir_path = train_path if self.mode == 'train' else test_path
        image_paths = []
        image_target = []
        for i in range(len(skin_targets)):
            target_path = dir_path + self.skin_targets[i]
            self.target_dict[i] = self.skin_targets[i]
            print(f"target_path: {target_path}")
            for path in os.listdir(target_path):
                img_path = target_path + "/" + path
                image_paths.append(img_path)
                image_target.append(i) # target index
        return image_paths, image_target


# train_transform = transforms.Compose([
#     transforms.RandomHorizontalFlip(p=0.5),  # Flip images horizontally
#     transforms.RandomRotation(degrees=30),  # Rotate slightly
#     transforms.ColorJitter(brightness=0.2, contrast=0.2),  # Change brightness & contrast
#     transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),  # Crop randomly
#     transforms.RandomAffine(degrees=0, shear=10),  # Apply small shearing
#     transforms.ToTensor(),
#     transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize
# ])

train_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(p=0.5),   # Flip images randomly
    transforms.RandomRotation(degrees=20),    # Rotate more (10 → 20 degrees)
    transforms.RandomResizedCrop(224, scale=(0.6, 1.0)),  # More aggressive cropping
    transforms.RandomAffine(degrees=0, shear=20),  # Increase shear for perspective shifts
    transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4, hue=0.1),  # Stronger distortions
    transforms.RandomPerspective(distortion_scale=0.3, p=0.5),  # Apply perspective transformation
    transforms.GaussianBlur(kernel_size=(3, 3), sigma=(0.1, 2.0)),  # Blur to prevent memorization
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), # Standard normalization
    # transforms.RandomErasing(p=0.5, scale=(0.02, 0.2), ratio=(0.3, 3.3))  # Cutout to hide parts of the image
    transforms.RandomErasing(p=0.5)  # Cutout to hide parts of the image
])


test_transform = transforms.Compose([
    transforms.Resize((224, 224)),  # No augmentation for validation
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])


train = SkinDataset(mode = 'train', transform = train_transform)
print(f"train len: {len(train)}") 

test = SkinDataset(mode = 'test', transform = test_transform)
print(f"test len: {len(test)}") 

In [None]:
example_list = [train[i] for i in random.sample(range(len(train)), k=12)]

fig, axes = plt.subplots(3, 4, figsize=(10, 8))
for ax, img_item in zip(axes.ravel(), example_list):
    img = transforms.functional.to_pil_image(img_item['img'])
    ax.imshow(img)
    ax.axis('off')
    ax.set_title(train.target_dict[img_item['target']], fontsize=10, color="blue")

plt.show()

In [None]:
# Set the device
c_device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
pin_memory = c_device.type == 'cuda'
print(f"device: {c_device}")

# Create dataloaders for training and test sets
train_loader = torch.utils.data.DataLoader(train, batch_size=64, shuffle=True, pin_memory=pin_memory)
test_loader = torch.utils.data.DataLoader(test, batch_size=64, pin_memory=pin_memory)

batch = next(iter(train_loader))
print(batch['img'].shape)
print(batch['target'].shape)

In [None]:
def train_loop(dataloader, model, criterion, optimizer, device):
    size = len(dataloader.dataset)
    model.train()
    running_loss, running_corrects = 0, 0
    # iterate through all batches
    for batch in dataloader:
        X = batch['img'].to(device=device)
        y = batch['target'].to(device=device)
        pred = model(X)
        loss = criterion(pred, y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        running_corrects += (pred.argmax(1) == y).type(torch.float).sum().item()
    epoch_loss = running_loss / size
    epoch_accuracy = 100 * running_corrects / size
    return epoch_loss, epoch_accuracy


def inference_loop(dataloader, model, criterion, device):
    size = len(dataloader.dataset)
    model.eval()
    running_loss, running_corrects = 0, 0
    # disregard gradients when not training
    with torch.no_grad():
        # iterate through all batches
        for batch in dataloader:
            X = batch['img'].to(device=device)
            y = batch['target'].to(device=device)
            pred = model(X)
            running_loss += criterion(pred, y).item()
            running_corrects += (pred.argmax(1) == y).type(torch.float).sum().item()
    epoch_loss = running_loss / size
    epoch_accuracy = 100 * running_corrects / size
    return epoch_loss, epoch_accuracy

## Add dropout layer to the network
#### Allowing "shot down" layers randomly to reduce overfitting

In [None]:
# Load the pre-trained ResNet-50 model
# model = resnet50(weights=None)
# model = resnet101(weights=None)
# model = resnet50(weights="IMAGENET1K_V1")
# model = densenet121(weights=None)

# summary(model, input_size=(64, 3, 224, 224))  # Batch size 1, 3 channels, 224x224 image


# class CustomDenseNet(nn.Module):
#     def __init__(self, num_classes):
#         super(CustomDenseNet, self).__init__()
#         self.base_model = densenet121(weights=None)
        
#         # Correct classifier replacement
#         self.base_model.classifier = nn.Sequential(
#             nn.Dropout(p=0.5),  # Dropout for regularization, change to 0.6??
#             nn.Linear(1024, num_classes) 
#         )

#     def forward(self, x):
#         x = self.base_model(x)  # DenseNet already includes feature extraction
#         return x

# model = CustomDenseNet(num_classes)

In [None]:
# model = densenet121(weights=None)
num_classes = 4
model = efficientnet_b0(weights="IMAGENET1K_V1")  # Load pre-trained weights
model.classifier = nn.Sequential(
    nn.Dropout(0.6),
    nn.Linear(model.classifier[1].in_features, num_classes)  # Adjust output layer
)

# Transfer it to device
model = model.to(device=c_device)


# Define the loss function
criterion = nn.CrossEntropyLoss()

# define optimizer & schedulers
# optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)  # Use all parameters
# optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)
# optimizer = optim.Adam(model.parameters(), lr=1e-4)
optimizer = optim.Adam(model.parameters(), lr=1e-4, weight_decay=1e-4)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.5)  # Reduce LR every n epochs by a factor of 0.5

epochs = 40
for epoch in range(1, epochs + 1):
    train_loss, train_acc = train_loop(train_loader, model, criterion, optimizer, c_device)
    test_loss, test_acc = inference_loop(test_loader, model, criterion, c_device)
    if True or epoch % 5 == 0:
        print('Train', f'Epoch: {epoch:03d} / {epochs:03d}',  f'Loss: {train_loss:7.4g}', f'Accuracy: {train_acc:.3f}',  sep='   ')
        print(' Test', f'Epoch: {epoch:03d} / {epochs:03d}',  f'Loss: {test_loss:7.4g}', f'Accuracy: {test_acc:.3f}', sep='   ')
    scheduler.step()


# 20 epochs, step_size=10, 20: train: 0.75, test: 0.55
# densenet121 model: 90 epochs, step_size=20, adam, lr=0.001, normalization, train: 97.920, test: 62.948
# CustomDenseNet model without augmentation: 30-40 epochs, step_size=20, adam, lr=0.001, normalization, train: 98.9, test: 73-75
# when doing augmentation lr should be small
# CustomDenseNet model with saved augmentation: 34 epochs, step_size=20, adam, lr=0.001, normalization, train: 91, test: 64 - similar results with more weak augmentation train: 99, test: 66
# efficientnet_b0 trained model - much better!! 14 epochs, train: 97, test: 76
# efficientnet_b0 trained model with fly augmentation best!! 11 epochs, train: 94, test: 78
# with 30-degree rot Train   Epoch: 026 / 040   Accuracy: 96.190 Test Accuracy: 78-80.677
# with erase : Train   Epoch: 025 / 040  Accuracy: 86.591 Test Accuracy: 75.498 - less overfitting but not very good performence

# with fugal : Epoch: 025 / 040  Accuracy: 84 Test Accuracy: 71-73

### TODO add confusion_matrix (actual vs predicted) and loss and accurcy graph 
https://www.nature.com/articles/s41598-024-80013-0