In [1]:
import os
import pandas as pd
from PIL import Image
from torchvision import transforms
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import torch

In [2]:
images_path_1 = '/kaggle/input/skin-cancer-mnist-ham10000/HAM10000_images_part_1'
images_path_2 = '/kaggle/input/skin-cancer-mnist-ham10000/HAM10000_images_part_2'

metadata_path = '/kaggle/input/skin-cancer-mnist-ham10000/HAM10000_metadata.csv'

In [3]:
metadata_df = pd.read_csv(metadata_path)

In [4]:
metadata_df.tail(5)

Unnamed: 0,lesion_id,image_id,dx,dx_type,age,sex,localization
10010,HAM_0002867,ISIC_0033084,akiec,histo,40.0,male,abdomen
10011,HAM_0002867,ISIC_0033550,akiec,histo,40.0,male,abdomen
10012,HAM_0002867,ISIC_0033536,akiec,histo,40.0,male,abdomen
10013,HAM_0000239,ISIC_0032854,akiec,histo,80.0,male,face
10014,HAM_0003521,ISIC_0032258,mel,histo,70.0,female,back


In [5]:
def get_image_path(image_id):
    image_file = image_id + '.jpg'
    if os.path.exists(os.path.join(images_path_1, image_file)):
        return os.path.join(images_path_1, image_file)
    elif os.path.exists(os.path.join(images_path_2, image_file)):
        return os.path.join(images_path_2, image_file)
    else:
        return None

In [6]:
metadata_df['image_path'] = metadata_df['image_id'].apply(get_image_path)

In [7]:
metadata_df.tail(5)

Unnamed: 0,lesion_id,image_id,dx,dx_type,age,sex,localization,image_path
10010,HAM_0002867,ISIC_0033084,akiec,histo,40.0,male,abdomen,/kaggle/input/skin-cancer-mnist-ham10000/HAM10...
10011,HAM_0002867,ISIC_0033550,akiec,histo,40.0,male,abdomen,/kaggle/input/skin-cancer-mnist-ham10000/HAM10...
10012,HAM_0002867,ISIC_0033536,akiec,histo,40.0,male,abdomen,/kaggle/input/skin-cancer-mnist-ham10000/HAM10...
10013,HAM_0000239,ISIC_0032854,akiec,histo,80.0,male,face,/kaggle/input/skin-cancer-mnist-ham10000/HAM10...
10014,HAM_0003521,ISIC_0032258,mel,histo,70.0,female,back,/kaggle/input/skin-cancer-mnist-ham10000/HAM10...


In [8]:
print(metadata_df[metadata_df['image_path'].isnull()])

Empty DataFrame
Columns: [lesion_id, image_id, dx, dx_type, age, sex, localization, image_path]
Index: []


In [9]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize the image to the size 224x224 which ViTMAEModel required
    transforms.ToTensor(),
])

In [10]:
label_encoder = LabelEncoder()
metadata_df['label'] = label_encoder.fit_transform(metadata_df['dx'])

- bkl: Benign keratosis-like lesions (tổn thương lành tính giống như sừng hóa)
- nv: Melanocytic nevi (nốt ruồi hắc tố lành tính)
- mel: Melanoma (ung thư da)
- akiec: Actinic keratoses (tổn thương tiền ung thư da)
- bcc: Basal cell carcinoma (ung thư tế bào đáy)
- df: Dermatofibroma (u xơ da)
- vasc: Vascular lesions (tổn thương mạch máu)

In [11]:
label_mapping = dict(zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_)))
print(label_mapping)

{'akiec': 0, 'bcc': 1, 'bkl': 2, 'df': 3, 'mel': 4, 'nv': 5, 'vasc': 6}


In [12]:
class SkinCancerDataset(torch.utils.data.Dataset):
    def __init__(self, dataframe, transform=None):
        self.dataframe = dataframe
        self.transform = transform
    
    def __len__(self):
        return len(self.dataframe)
    
    def __getitem__(self, idx):
        image_path = self.dataframe.iloc[idx]['image_path']
        image = Image.open(image_path).convert("RGB")
        
        label = self.dataframe.iloc[idx]['label']
        
        if self.transform:
            image = self.transform(image)
        label = torch.tensor(label, dtype=torch.long)
        
        return image, label

In [13]:
# Chia dữ liệu thành tập huấn luyện và kiểm tra
train_df, test_df = train_test_split(metadata_df, test_size=0.2, stratify=metadata_df['dx'], random_state=42)

# Tạo Dataset và DataLoader
train_dataset = SkinCancerDataset(train_df, transform=transform)
test_dataset = SkinCancerDataset(test_df, transform=transform)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=16, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=16, shuffle=False)

In [14]:
from transformers import ViTMAEModel

In [15]:
vit_mae_model = ViTMAEModel.from_pretrained("facebook/vit-mae-base")

config.json:   0%|          | 0.00/676 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/448M [00:00<?, ?B/s]

In [16]:
class ViTMAEForClassification(torch.nn.Module):
    def __init__(self, vit_mae_model, num_classes):
        super(ViTMAEForClassification, self).__init__()
        self.vit_mae = vit_mae_model
        # Add classifier fully connected
        self.classifier = torch.nn.Linear(768, num_classes)

    def forward(self, x):
        # Feature extraction ViT-MAE
        outputs = self.vit_mae(pixel_values=x)
        cls_token = outputs.last_hidden_state[:, 0]  # Lấy token CLS từ ViT
        logits = self.classifier(cls_token)
        return logits

In [17]:
model = ViTMAEForClassification(vit_mae_model, num_classes=7)

In [18]:
if torch.cuda.is_available():       
    device = torch.device("cuda")

    print('There are %d GPU(s) available.' % torch.cuda.device_count())

    print('We will use the GPU:', torch.cuda.get_device_name(0))
else:
    print('No GPU available, using the CPU instead.')
    device = torch.device("cpu")

There are 1 GPU(s) available.
We will use the GPU: Tesla P100-PCIE-16GB


In [43]:
vit_mae_model.to(device)

ViTMAEModel(
  (embeddings): ViTMAEEmbeddings(
    (patch_embeddings): ViTMAEPatchEmbeddings(
      (projection): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
    )
  )
  (encoder): ViTMAEEncoder(
    (layer): ModuleList(
      (0-11): 12 x ViTMAELayer(
        (attention): ViTMAESdpaAttention(
          (attention): ViTMAESdpaSelfAttention(
            (query): Linear(in_features=768, out_features=768, bias=True)
            (key): Linear(in_features=768, out_features=768, bias=True)
            (value): Linear(in_features=768, out_features=768, bias=True)
            (dropout): Dropout(p=0.0, inplace=False)
          )
          (output): ViTMAESelfOutput(
            (dense): Linear(in_features=768, out_features=768, bias=True)
            (dropout): Dropout(p=0.0, inplace=False)
          )
        )
        (intermediate): ViTMAEIntermediate(
          (dense): Linear(in_features=768, out_features=3072, bias=True)
          (intermediate_act_fn): GELUActivation()
        

In [19]:
model.to(device)

ViTMAEForClassification(
  (vit_mae): ViTMAEModel(
    (embeddings): ViTMAEEmbeddings(
      (patch_embeddings): ViTMAEPatchEmbeddings(
        (projection): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
      )
    )
    (encoder): ViTMAEEncoder(
      (layer): ModuleList(
        (0-11): 12 x ViTMAELayer(
          (attention): ViTMAESdpaAttention(
            (attention): ViTMAESdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ViTMAESelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ViTMAEIntermediate(
            (dense): Linear(in_features=768, out_fe

In [20]:
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
num_epochs = 10

In [21]:
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for i, (images, labels) in enumerate(train_loader):
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()

        outputs = model(images)
        loss = criterion(outputs, labels)

        loss.backward()
        optimizer.step()

        running_loss += loss.item()
    
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss / len(train_loader):.4f}")

Epoch [1/10], Loss: 1.0376
Epoch [2/10], Loss: 0.8853
Epoch [3/10], Loss: 0.8070
Epoch [4/10], Loss: 0.7527
Epoch [5/10], Loss: 0.7084
Epoch [6/10], Loss: 0.6821
Epoch [7/10], Loss: 0.6534
Epoch [8/10], Loss: 0.6404
Epoch [9/10], Loss: 0.6158
Epoch [10/10], Loss: 0.5918


In [24]:
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for i, (images, labels) in enumerate(train_loader):
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()

        outputs = model(images)
        loss = criterion(outputs, labels)

        loss.backward()
        optimizer.step()

        running_loss += loss.item()
    
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss / len(train_loader):.4f}")

Epoch [1/10], Loss: 0.5713
Epoch [2/10], Loss: 0.5521
Epoch [3/10], Loss: 0.5394
Epoch [4/10], Loss: 0.5086
Epoch [5/10], Loss: 0.4884
Epoch [6/10], Loss: 0.4632
Epoch [7/10], Loss: 0.4426
Epoch [8/10], Loss: 0.4209
Epoch [9/10], Loss: 0.3891
Epoch [10/10], Loss: 0.3765


In [25]:
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for i, (images, labels) in enumerate(train_loader):
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()

        outputs = model(images)
        loss = criterion(outputs, labels)

        loss.backward()
        optimizer.step()

        running_loss += loss.item()
    
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss / len(train_loader):.4f}")

Epoch [1/10], Loss: 0.3579
Epoch [2/10], Loss: 0.3367
Epoch [3/10], Loss: 0.3056
Epoch [4/10], Loss: 0.3000
Epoch [5/10], Loss: 0.2739
Epoch [6/10], Loss: 0.2562
Epoch [7/10], Loss: 0.2537
Epoch [8/10], Loss: 0.2416
Epoch [9/10], Loss: 0.2239
Epoch [10/10], Loss: 0.2083


In [26]:
for epoch in range(7):
    model.train()
    running_loss = 0.0
    for i, (images, labels) in enumerate(train_loader):
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()

        outputs = model(images)
        loss = criterion(outputs, labels)

        loss.backward()
        optimizer.step()

        running_loss += loss.item()
    
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss / len(train_loader):.4f}")

Epoch [1/10], Loss: 0.1990
Epoch [2/10], Loss: 0.1907
Epoch [3/10], Loss: 0.1793
Epoch [4/10], Loss: 0.1844
Epoch [5/10], Loss: 0.1579
Epoch [6/10], Loss: 0.1609
Epoch [7/10], Loss: 0.1523


In [28]:
model.eval()
correct = 0
total = 0

with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f"Độ chính xác của mô hình trên tập kiểm tra: {accuracy:.2f}%")

Độ chính xác của mô hình trên tập kiểm tra: 79.63%


In [35]:
def pred(image_path):
    image = Image.open(image_path).convert("RGB")
    image = transform(image)

    image = image.unsqueeze(0)
    image = image.to(device)

    model.eval()

    with torch.no_grad():
        output = model(image)
        _, predicted = torch.max(output.data, 1)

    predicted_label = predicted.item()
    predicted_class = label_encoder.inverse_transform([predicted_label])[0]

    if predicted_class == 'nv':
        predicted_class = 'Melanocytic nevi (nốt ruồi hắc tố lành tính)'
    if predicted_class == 'bkl':
        predicted_class = 'Benign keratosis-like lesions (tổn thương lành tính giống như sừng hóa)'
    if predicted_class == 'mel':
        predicted_class = 'Melanoma (ung thư da)'
    if predicted_class == 'akiec':
        predicted_class = 'Actinic keratoses (tổn thương tiền ung thư da)'
    if predicted_class == 'bcc':
        predicted_class = 'Basal cell carcinoma (ung thư tế bào đáy)'
    if predicted_class == 'df':
        predicted_class = 'Dermatofibroma (u xơ da)'
    if predicted_class == 'vasc':
        predicted_class = 'Vascular lesions (tổn thương mạch máu)'
    print(f"Dự đoán: {predicted_class}")

In [36]:
image_path = '/kaggle/input/preddataset/nv.jpg'
pred(image_path)

Dự đoán: Melanocytic nevi (nốt ruồi hắc tố lành tính)


In [37]:
image_path = '/kaggle/input/preddataset/mel.jpg'
pred(image_path)

Dự đoán: Melanoma (ung thư da)


In [39]:
image_path = '/kaggle/input/preddataset/nv2.jpg'
pred(image_path)

Dự đoán: Melanocytic nevi (nốt ruồi hắc tố lành tính)


In [40]:
image_path = '/kaggle/input/preddataset/akiec.jpg'
pred(image_path)

Dự đoán: Actinic keratoses (tổn thương tiền ung thư da)
