In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import albumentations as A
from albumentations.pytorch import ToTensorV2
from torchvision import models
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, LabelBinarizer
import random
from imutils import paths
import os
import numpy as np
from PIL import Image

  check_for_updates()


In [2]:
class SkinCancer(Dataset):
    def __init__(self, image_paths, labels=None, transform=None):
        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform
    
    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image_path = self.image_paths[idx]
        image = Image.open(image_path).convert('RGB')
        
        if self.transform:
            image = self.transform(image=np.array(image))['image']
        
        label = self.labels[idx]
        return image, label

In [3]:
train_path = list(paths.list_images('/kaggle/input/skincancer-isic2019/archive/archive'))
random.shuffle(train_path)
label_train = [p.split(os.path.sep)[-2] for p in train_path]


In [4]:
le = LabelEncoder()
train_encode_labels = le.fit_transform(label_train)

# One-hot encode labels
lb = LabelBinarizer()
train_onehot_labels = lb.fit_transform(train_encode_labels)

image_size = 380
train_transform = A.Compose([
    A.Resize(image_size, image_size),
    A.Transpose(p=0.5),
    A.VerticalFlip(p=0.5),
    A.HorizontalFlip(p=0.5),
    A.Rotate(limit=15, p=0.5),
    A.RandomBrightnessContrast(p=0.5),
    A.CLAHE(clip_limit=4.0, p=0.7),
    A.GaussNoise(var_limit=(5.0, 30.0), p=0.7),
    A.CoarseDropout(max_holes=1, max_height=int(image_size * 0.375), max_width=int(image_size * 0.375), p=0.7),
    A.Normalize(),
    ToTensorV2()
])

valid_transform = A.Compose([
    A.Resize(image_size, image_size),
    A.Normalize(),
    ToTensorV2()
])

In [5]:
X_train, X_valid, y_train, y_valid = train_test_split(train_path, train_onehot_labels, test_size=0.2, random_state=42)
train_dataset = SkinCancer(X_train, y_train, transform=train_transform)
valid_dataset = SkinCancer(X_valid, y_valid, transform=valid_transform)
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
val_loader = DataLoader(valid_dataset, batch_size=8, shuffle=False)

In [6]:
class CustomEfficientNet(nn.Module):
    def __init__(self, num_classes=8):
        super(CustomEfficientNet, self).__init__()
        self.base_model = models.efficientnet_b4(pretrained=True)
        self.base_model.classifier = nn.Sequential(
            nn.Linear(1792, 500),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(500, 256),
            nn.ReLU(),
            nn.Linear(256, num_classes)
        )

    def forward(self, x):
        return self.base_model(x)

In [7]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = CustomEfficientNet(num_classes=8).to(device)
optimizer = optim.Adam(model.parameters(), lr=3e-5)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=15)
criterion = nn.CrossEntropyLoss()

Downloading: "https://download.pytorch.org/models/efficientnet_b4_rwightman-23ab8bcd.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_b4_rwightman-23ab8bcd.pth
100%|██████████| 74.5M/74.5M [00:00<00:00, 172MB/s] 


In [8]:
def train_and_val_model(model, criterion, optimizer, scheduler, train_loader, val_loader, num_epochs=15):
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        running_corrects = 0
        for inputs, labels in train_loader:
            inputs = inputs.to(device)
            labels = labels.argmax(dim=1).to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            _, preds = torch.max(outputs, 1)
            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)
        scheduler.step()
        epoch_loss = running_loss / len(train_loader.dataset)
        epoch_acc = running_corrects.double() / len(train_loader.dataset)
        print(f'Epoch {epoch}/{num_epochs - 1}, Loss: {epoch_loss:.4f}, Accuracy: {epoch_acc:.4f}')

        model.eval()
        running_corrects = 0
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs = inputs.to(device)
                labels = labels.argmax(dim=1).to(device)
                outputs = model(inputs)
                _, preds = torch.max(outputs, 1)
                running_corrects += torch.sum(preds == labels.data)
        val_acc = running_corrects.double() / len(val_loader.dataset)
        print(f'Validation Accuracy: {val_acc:.4f}')


In [9]:
train_and_val_model(model, criterion, optimizer, scheduler, train_loader, val_loader, num_epochs=10)

Epoch 0/9, Loss: 1.2379, Accuracy: 0.5657
Validation Accuracy: 0.6414
Epoch 1/9, Loss: 0.9004, Accuracy: 0.6815
Validation Accuracy: 0.7178
Epoch 2/9, Loss: 0.7647, Accuracy: 0.7253
Validation Accuracy: 0.7462
Epoch 3/9, Loss: 0.6803, Accuracy: 0.7528
Validation Accuracy: 0.7636
Epoch 4/9, Loss: 0.6226, Accuracy: 0.7763
Validation Accuracy: 0.7774
Epoch 5/9, Loss: 0.5676, Accuracy: 0.7958
Validation Accuracy: 0.7839
Epoch 6/9, Loss: 0.5360, Accuracy: 0.8048
Validation Accuracy: 0.7967
Epoch 7/9, Loss: 0.5016, Accuracy: 0.8176
Validation Accuracy: 0.8017
Epoch 8/9, Loss: 0.4691, Accuracy: 0.8281
Validation Accuracy: 0.8017
Epoch 9/9, Loss: 0.4425, Accuracy: 0.8394
Validation Accuracy: 0.8078


In [10]:
torch.save(model.state_dict(), 'efficientnet_skin_cancer1.pth')

# TEST

In [14]:
import pandas as pd
test_data = pd.read_csv('/kaggle/input/model-datatest-skincancer/test_data_remove_unk.csv')
test_data.head()

Unnamed: 0,image,age_approx,anatom_site_general,sex,path_jpg
0,ISIC_0034321,60.0,7,0,/kaggle/input/isic-2019-challenge/ISIC_2019_Te...
1,ISIC_0034322,70.0,0,1,/kaggle/input/isic-2019-challenge/ISIC_2019_Te...
2,ISIC_0034323,70.0,3,1,/kaggle/input/isic-2019-challenge/ISIC_2019_Te...
3,ISIC_0034324,70.0,3,1,/kaggle/input/isic-2019-challenge/ISIC_2019_Te...
4,ISIC_0034325,30.0,8,0,/kaggle/input/isic-2019-challenge/ISIC_2019_Te...


In [15]:
test1 = test_data.drop(['age_approx', 'anatom_site_general', 'sex'], axis=1)


In [16]:
test1.head()

Unnamed: 0,image,path_jpg
0,ISIC_0034321,/kaggle/input/isic-2019-challenge/ISIC_2019_Te...
1,ISIC_0034322,/kaggle/input/isic-2019-challenge/ISIC_2019_Te...
2,ISIC_0034323,/kaggle/input/isic-2019-challenge/ISIC_2019_Te...
3,ISIC_0034324,/kaggle/input/isic-2019-challenge/ISIC_2019_Te...
4,ISIC_0034325,/kaggle/input/isic-2019-challenge/ISIC_2019_Te...


In [11]:
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# model = CustomEfficientNet(num_classes=9).to(device)
# model.load_state_dict(torch.load("/kaggle/input/model-datatest-skincancer/efficientnet_skin_cancer.pth", map_location=device))
model.eval()

CustomEfficientNet(
  (base_model): EfficientNet(
    (features): Sequential(
      (0): Conv2dNormActivation(
        (0): Conv2d(3, 48, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (1): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): SiLU(inplace=True)
      )
      (1): Sequential(
        (0): MBConv(
          (block): Sequential(
            (0): Conv2dNormActivation(
              (0): Conv2d(48, 48, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=48, bias=False)
              (1): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (2): SiLU(inplace=True)
            )
            (1): SqueezeExcitation(
              (avgpool): AdaptiveAvgPool2d(output_size=1)
              (fc1): Conv2d(48, 12, kernel_size=(1, 1), stride=(1, 1))
              (fc2): Conv2d(12, 48, kernel_size=(1, 1), stride=(1, 1))
              (activation): SiLU(inplace=True)
          

In [17]:
image_size = 380
test_transform = A.Compose([
    A.Resize(image_size, image_size),
    A.Normalize(),
    ToTensorV2()
])
test_images = test1["path_jpg"].values

In [18]:
def predict_image(image_path, model, transform, device):
    image = Image.open(image_path).convert("RGB")
    image = transform(image=np.array(image))["image"]
    image = image.unsqueeze(0).to(device)  

    with torch.no_grad():
        outputs = model(image)
        _, predicted = torch.max(outputs, 1) 
    return predicted.item()

predictions = [predict_image(img, model, test_transform, device) for img in test_images]

In [19]:
test1["prediction"] = predictions

In [20]:
test1["disease_name"] = le.inverse_transform(test1["prediction"])

In [16]:
test1

Unnamed: 0,image,path_jpg,prediction,disease_name
0,ISIC_0034321,/kaggle/input/isic-2019-challenge/ISIC_2019_Te...,5,NV
1,ISIC_0034322,/kaggle/input/isic-2019-challenge/ISIC_2019_Te...,5,NV
2,ISIC_0034323,/kaggle/input/isic-2019-challenge/ISIC_2019_Te...,1,BCC
3,ISIC_0034324,/kaggle/input/isic-2019-challenge/ISIC_2019_Te...,5,NV
4,ISIC_0034325,/kaggle/input/isic-2019-challenge/ISIC_2019_Te...,5,NV
...,...,...,...,...
6186,ISIC_0073226,/kaggle/input/isic-2019-challenge/ISIC_2019_Te...,1,BCC
6187,ISIC_0073234,/kaggle/input/isic-2019-challenge/ISIC_2019_Te...,5,NV
6188,ISIC_0073236,/kaggle/input/isic-2019-challenge/ISIC_2019_Te...,1,BCC
6189,ISIC_0073243,/kaggle/input/isic-2019-challenge/ISIC_2019_Te...,1,BCC


In [21]:
test_gt = pd.read_csv('/kaggle/input/model-datatest-skincancer/test_gt_remove_unk.csv')
test_gt.head()

Unnamed: 0,image,diagnosis
0,ISIC_0034321,NV
1,ISIC_0034322,NV
2,ISIC_0034323,BCC
3,ISIC_0034324,NV
4,ISIC_0034325,NV


In [22]:
accuracy = (test1['disease_name'] == test_gt['diagnosis']).mean()
print(f"Accuracy: {accuracy:.4f}")

Accuracy: 0.7086
