### Download dataset from link given below
- images are stored in "images" folder and ground truth is stored in "GroundTruth.csv" file.

In [1]:
# !kaggle datasets download -d surajghuwalewala/ham1000-segmentation-and-classification

In [2]:
# !unzip -q ham1000-segmentation-and-classification.zip -d ham1000-segmentation-and-classification

### Import libraries

In [9]:
import torch
import torch.nn as nn
import os 
import pandas as pd
import numpy as np
import cv2
import torchvision.transforms as transforms
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import classification_report, accuracy_score
from torch.autograd import Variable
import warnings
warnings.filterwarnings("ignore")

#random seed
torch.manual_seed(0)
np.random.seed(0)
torch.cuda.manual_seed(0)


### Dataset loading

In [4]:
class customdataset(torch.utils.data.Dataset):
    def __init__(self,images_path,gt_path,transform=None):
        self.root_dir=images_path
        self.transform=transform
        self.imgs = os.listdir(self.root_dir)
        self.gt = pd.read_csv(gt_path)

    def __len__(self):
        return len(self.imgs)-2
    
    def __getitem__(self,idx):
        label = self.gt.iloc[idx,1:].values
        label = label.astype('float').reshape(-1,7)
        name = self.gt.iloc[idx,0]
        img_path = os.path.join(self.root_dir,name+'.jpg')
        image = cv2.imread(img_path)
        if self.transform:
            image = self.transform(image)
            label = torch.from_numpy(label)
        return image,label


 
data = customdataset(images_path='images',          # change this to the path where you have extracted the images
                    gt_path='GroundTruth.csv',      # change this to the path where you have extracted the GroundTruth.csv
                    transform=transforms.Compose(
                        [transforms.ToTensor(),transforms.Resize((224,224)),transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5)),
                                transforms.RandomHorizontalFlip(p=0.5),
                    transforms.RandomVerticalFlip(p=0.5),
                    transforms.RandomRotation(90),]))

# Splitting the dataset into train and test into 80:20 ratio
train_data, test_data = torch.utils.data.random_split(data, [int(0.8*len(data)), len(data)-int(0.8*len(data))])
train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
test_loader = DataLoader(test_data, batch_size=32, shuffle=True)


### CNN model 

In [5]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN,self).__init__()
        self.conv1 = nn.Conv2d(3, 9, 3, padding=1)
        self.conv2 = nn.Conv2d(9, 18, 3, padding=1)
        self.conv3 = nn.Conv2d(18, 36, 3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(28224, 1000)
        self.fc2 = nn.Linear(1000, 1000)
        self.fc3 = nn.Linear(1000, 7)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        x = torch.flatten(x, 1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

### Train Model

In [6]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = CNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# train the model for 14 epochs
for epoch in range(14):
    running_loss = 0.0
    for i, data in enumerate(train_loader):
        inputs, labels = data
        labels = labels.view(-1, 7)
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        if i % 20 == 9:    # print every 20 mini-batches
            print('[%d, %5d] loss: %.3f' %(epoch + 1, i + 1, running_loss / 10))
            running_loss = 0.0
torch.save(model.state_dict(), 'model+14.pth')


<All keys matched successfully>

### Testing

In [7]:
plot_labels = ['MEL','NV','BCC','AKIEC','BKL','DF','VASC']
model.eval()
y_label = []
y_predict = []
with torch.no_grad():
    for i, data in enumerate(test_loader):
        images, labels = data
        labels = labels.view(-1, 7)
        N = images.size(0)
        images = Variable(images).to(device)
        gt = torch.argmax(labels.data, 1)
        outputs = model(images)
        prediction = outputs.max(1, keepdim=True)[1]
        y_label.extend(gt.cpu().numpy())
        y_predict.extend(np.squeeze(prediction.cpu().numpy().T))

all_metrics = classification_report(y_label, y_predict, target_names=plot_labels,zero_division=0)
print(all_metrics)

              precision    recall  f1-score   support

          nv       0.57      0.39      0.47       223
         mel       0.86      0.93      0.89      1358
         bkl       0.52      0.57      0.55        95
         bcc       0.40      0.41      0.41        76
       akiec       0.53      0.47      0.50       208
        vasc       0.17      0.04      0.06        25
          df       0.75      0.33      0.46        18

    accuracy                           0.77      2003
   macro avg       0.54      0.45      0.48      2003
weighted avg       0.75      0.77      0.75      2003



In [10]:
acc = accuracy_score(y_label, y_predict)
print('Accuracy: {:.2f}%'.format(acc*100))

Accuracy: 76.78%
