# Getting the data from kaggle
## Merging it 

In [3]:
!ls



Untitled.ipynb	archive


In [7]:
# installations
import os


In [9]:
import shutil

# Create a directory for all images
images_dir = "HAM10000_images_all"
os.makedirs(images_dir, exist_ok=True)

# Move images from both parts into the new folder
for part in ["HAM10000_images_part_1", "HAM10000_images_part_2"]:
    for file_name in os.listdir(part):
        shutil.move(os.path.join(part, file_name), images_dir)

print(f"All images moved to {images_dir}.")


All images moved to HAM10000_images_all.


In [10]:
import pandas as pd

# Load the metadata CSV file
metadata_path = "HAM10000_metadata.csv"
metadata = pd.read_csv(metadata_path)

# Preview the metadata
print(metadata.head())


     lesion_id      image_id   dx dx_type   age   sex localization
0  HAM_0000118  ISIC_0027419  bkl   histo  80.0  male        scalp
1  HAM_0000118  ISIC_0025030  bkl   histo  80.0  male        scalp
2  HAM_0002730  ISIC_0026769  bkl   histo  80.0  male        scalp
3  HAM_0002730  ISIC_0025661  bkl   histo  80.0  male        scalp
4  HAM_0001466  ISIC_0031633  bkl   histo  75.0  male          ear


In [11]:
# Add full paths to image files
metadata["image_path"] = metadata["image_id"].apply(lambda x: os.path.join(images_dir, f"{x}.jpg"))

# Verify that paths are correct
print(metadata[["image_id", "image_path"]].head())


       image_id                            image_path
0  ISIC_0027419  HAM10000_images_all/ISIC_0027419.jpg
1  ISIC_0025030  HAM10000_images_all/ISIC_0025030.jpg
2  ISIC_0026769  HAM10000_images_all/ISIC_0026769.jpg
3  ISIC_0025661  HAM10000_images_all/ISIC_0025661.jpg
4  ISIC_0031633  HAM10000_images_all/ISIC_0031633.jpg


In [12]:
metadata["age"].fillna(metadata["age"].median(), inplace=True)
metadata["sex"].fillna("unknown", inplace=True)


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  metadata["age"].fillna(metadata["age"].median(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  metadata["sex"].fillna("unknown", inplace=True)


In [16]:
from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()
metadata["label"] = label_encoder.fit_transform(metadata["dx"])
print(label_encoder.classes_)  # List of classes (for reference)


['akiec' 'bcc' 'bkl' 'df' 'mel' 'nv' 'vasc']


In [17]:
from sklearn.model_selection import train_test_split

train_df, val_df = train_test_split(metadata, test_size=0.2, stratify=metadata["label"], random_state=42)
print(f"Training samples: {len(train_df)}, Validation samples: {len(val_df)}")


Training samples: 8012, Validation samples: 2003


In [22]:
from torchvision import transforms
from PIL import Image

# Define image transforms
image_transforms = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize to match model input
    transforms.ToTensor(),          # Convert to Tensor
    transforms.Normalize([0.5], [0.5])  # Normalize values (if grayscale)
])

# Load an example image
sample_image_path = train_df.iloc[0]["image_path"]
image = Image.open(sample_image_path)
image_tensor = image_transforms(image)

print(image_tensor.shape)  # Should output: (3, 224, 224)


torch.Size([3, 224, 224])


In [36]:
import torch
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from torchvision import transforms
import os
import pandas as pd

# Define a custom Dataset class
class SkinCancerDataset(Dataset):
    def __init__(self, dataframe, img_dir, transform=None):
        """
        Args:
            dataframe (pd.DataFrame): DataFrame with metadata.
            img_dir (str): Directory containing images.
            transform (callable, optional): Optional transform to be applied on a sample.
        """
        self.dataframe = dataframe
        self.img_dir = img_dir
        self.transform = transform
        self.label_map = {'bkl': 0, 'mel': 1, 'nv': 2, 'basal': 3, 'akiec': 4, 'df': 5}  # Map string labels to integers

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        img_name = self.dataframe.iloc[idx]['image_id']  # Assuming 'image_id' is the correct column
        img_path = os.path.join(self.img_dir, img_name + '.jpg')  # Assuming the images are in JPG format
        image = Image.open(img_path).convert("RGB")  # Open image and convert to RGB
        
        label_str = self.dataframe.iloc[idx]['dx']  # Assuming 'dx' is the diagnosis column
        label = self.label_map.get(label_str, -1)  # Convert label to integer using label_map

        if self.transform:
            image = self.transform(image)

        return image, label

# Define the transformations (e.g., resize, tensor conversion)
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize to match model input
    transforms.ToTensor(),          # Convert to Tensor
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])  # Normalize based on ImageNet stats
])

# Create Dataset instances
train_dataset = SkinCancerDataset(dataframe=train_df, img_dir='HAM10000_images_all', transform=transform)
val_dataset = SkinCancerDataset(dataframe=val_df, img_dir='HAM10000_images_all', transform=transform)

# Create DataLoader instances
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=4)

# Check one batch from the DataLoader
for images, labels in train_loader:
    print(images.shape, labels.shape)
    break


torch.Size([32, 3, 224, 224]) torch.Size([32])


In [48]:
import torch
import torch.nn as nn
from torchvision import models

# Define the VisionTransformer (ViT) model
model = models.vision_transformer.vit_b_16(pretrained=True)

# Check the existing architecture to ensure where the classifier part is
print(model)

# Assuming the dataset has a column 'dx' in train_df, which indicates the number of classes
num_classes = len(train_df['dx'].unique())  # Adjust the column name as needed

# Modify the classifier (ViT's classifier is a Sequential model)
model.heads = nn.Sequential(
    nn.Linear(model.heads[0].in_features, 512),  # Adjust the input features of the first layer
    nn.ReLU(),
    nn.Dropout(0.5),
    nn.Linear(512, num_classes)  # Output layer for multi-class classification
)

# If using CUDA, ensure model is on the correct device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

# Example of training loop (basic structure, adjust as necessary)
# Assuming you have dataloaders for training and validation

num_epochs = 10  # Number of epochs to train

for epoch in range(num_epochs):
    model.train()  # Set the model to training mode
    running_loss = 0.0
    correct_preds = 0
    total_preds = 0
    print(f"Started epoch {
    
    for images, labels in train_loader:  # Replace with your actual DataLoader
        images, labels = images.to(device), labels.to(device)
        
        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(images)
        
        # Calculate loss
        loss = criterion(outputs, labels)
        
        # Backward pass and optimization
        loss.backward()
        optimizer.step()

        # Track running loss and accuracy
        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        correct_preds += (predicted == labels).sum().item()
        total_preds += labels.size(0)

    epoch_loss = running_loss / len(train_dataloader)
    epoch_accuracy = 100 * correct_preds / total_preds
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}, Accuracy: {epoch_accuracy:.2f}%")

    # Validate the model after every epoch (optional)
    model.eval()  # Set model to evaluation mode
    with torch.no_grad():
        val_loss = 0.0
        correct_preds = 0
        total_preds = 0
        for images, labels in val_loader:  # Replace with your actual validation DataLoader
            images, labels = images.to(device), labels.to(device)
            
            # Forward pass
            outputs = model(images)
            
            # Calculate loss
            loss = criterion(outputs, labels)
            val_loss += loss.item()

            _, predicted = torch.max(outputs, 1)
            correct_preds += (predicted == labels).sum().item()
            total_preds += labels.size(0)

        val_loss /= len(val_dataloader)
        val_accuracy = 100 * correct_preds / total_preds
        print(f"Validation Loss: {val_loss:.4f}, Accuracy: {val_accuracy:.2f}%")


VisionTransformer(
  (conv_proj): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
  (encoder): Encoder(
    (dropout): Dropout(p=0.0, inplace=False)
    (layers): Sequential(
      (encoder_layer_0): EncoderBlock(
        (ln_1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (self_attention): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
        )
        (dropout): Dropout(p=0.0, inplace=False)
        (ln_2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (mlp): MLPBlock(
          (0): Linear(in_features=768, out_features=3072, bias=True)
          (1): GELU(approximate='none')
          (2): Dropout(p=0.0, inplace=False)
          (3): Linear(in_features=3072, out_features=768, bias=True)
          (4): Dropout(p=0.0, inplace=False)
        )
      )
      (encoder_layer_1): EncoderBlock(
        (ln_1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (self_a

IndexError: Target -1 is out of bounds.