In [1]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
import os
import pandas as pd 
import numpy as np 
import cv2
from tqdm import tqdm
import matplotlib.pyplot as plt

os.chdir("/content/drive/MyDrive/HAM10000")
!ls

HAM10000_images_augmented	 HAM10000_metadata.csv
HAM10000_images_part_1		 HAM10000_segmentations_lesion_tschandl
HAM10000_images_part_2		 training_data.csv
HAM10000_metadata_augmented.csv  training_data.npy


In [4]:
#get metadata
mdata = pd.read_csv("/content/drive/MyDrive/HAM10000/HAM10000_metadata.csv")
mdata
mdata.iloc[2, 1]

'ISIC_0026769'

In [5]:
#transform the function according to the pytorch docs
from torchvision import transforms
from PIL import Image
img_size = 224
preprocess = transforms.Compose([
    transforms.Resize((img_size, img_size)),
    # transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# preprocess2 = preprocess = transforms.Compose([
    
# ])
# input_tensor = preprocess(input_image)
# input_batch = input_tensor.unsqueeze(0) # create a mini-batch as expected by the model

In [6]:
from pyparsing.helpers import identbodychars
class DermClassHelper(): 
  #make images 50 x 50 to start 
  mdata = pd.read_csv("/content/drive/MyDrive/HAM10000/HAM10000_metadata.csv").append(pd.read_csv("/content/drive/MyDrive/HAM10000/HAM10000_metadata_augmented.csv"))
  IMG_SIZE = 224
  labels = {}
  img_map = {}
  #training data 
  training_data = []
  # track balance 
  balance = {}
  #relavent directories 
  sds = ["/HAM10000_images_part_1/", "/HAM10000_images_part_2/", "/HAM10000_images_augmented/"]
  root = "/content/drive/MyDrive/HAM10000"

  def __init__(self):
    #assign labels
    self.make_labels()
    #make the map
    self.make_image_map()
    print(self.labels, self.img_map)
  """ Assign numbers to the diagnosis types"""
  def make_labels(self):
    i = 0 
    for d in self.mdata["dx"].unique():
      self.labels[d] = i 
      i += 1
  """ Assign to each image the corresponding label """
  def make_image_map(self):
    for im, dx in zip(list(self.mdata["image_id"]), list(self.mdata["dx"])):
      self.img_map[im] = self.labels[dx]

  def make_training_data(self):
    for sd in self.sds:
      for f in tqdm(os.listdir(self.root+sd)):
        # extract the image id from the file path
        id = f.split(".")[0].strip(" ").split(" ")[0]
        if self.img_map[id] not in self.balance:
          self.balance[self.img_map[id]] = 0
        self.balance[self.img_map[id]] += 1
    print(self.balance)

In [None]:
dch = DermClassHelper()
dch.make_training_data()

In [8]:
from skimage import io

In [70]:
#write a different data loader class 
from torch.utils.data import DataLoader
class DermClass():
  def __init__(self, helper, csv_file, root_dir, sds, transform1=None):
    self.derm_frame = pd.read_csv(csv_file)
    self.root_dir = root_dir
    self.sds = sds 
    self.transform1 = transform1
    #pass in derm class helper here
    self.helper = helper

  def __len__(self):
    return len(self.derm_frame)

  def __getitem__(self, index):
    #1 is the image file col 
    filename = self.derm_frame.iloc[index, 1]
    img_path = None
    #find the image path  
    if os.path.exists(str(self.root_dir+self.sds[0]+filename+".jpg")):
      img_path = str(self.root_dir+self.sds[0]+filename+".jpg")
    elif os.path.exists(str(self.root_dir+self.sds[1]+filename+".jpg")):
      img_path = str(self.root_dir+self.sds[1]+filename+".jpg")

    #get the image using PIL 
    img = Image.open(img_path)
    # y_label = torch.tensor(np.eye(len(self.helper.labels))[self.helper.img_map[filename]]) #this is good for adam optim 
    y_label = torch.tensor(int(self.helper.img_map[filename]))

    if self.transform1: 
      image = self.transform1(img)
    return (image, y_label)

In [71]:
class AugmentedDermClass():
  def __init__(self, helper, csv_file, root_dir, sd, transform1=None):
    self.derm_frame = pd.read_csv(csv_file)
    self.root_dir = root_dir
    self.sd = sd
    self.transform1 = transform1
    #pass in derm class helper here
    self.helper = helper

  def __len__(self):
    return len(self.derm_frame)

  def __getitem__(self, index):
    #1 is the image file col 
    filename = self.derm_frame.iloc[index, 1]
    # dataset = self.derm_frame.iloc[index, 7]
    img_path = None
    #find the image path  
    if os.path.exists(str(self.root_dir+self.sd+filename+".jpg")):
      img_path = str(self.root_dir+self.sd+filename+".jpg")

    #get the image using PIL 
    img = Image.open(img_path)
    # y_label = torch.tensor(np.eye(len(self.helper.labels))[self.helper.img_map[filename]]) #this is good for adam optim 
    y_label = torch.tensor(int(self.helper.img_map[filename]))

    if self.transform1: 
      image = self.transform1(img)
    return (image, y_label)

In [40]:
# g = DermClass(helper=dch, csv_files=["/content/drive/MyDrive/HAM10000/HAM10000_metadata.csv", "/content/drive/MyDrive/HAM10000/HAM10000_metadata_augmented.csv"], root_dir="/content/drive/MyDrive/HAM10000", sds=["/HAM10000_images_part_1/", "/HAM10000_images_part_2/", "/HAM10000_images_augmented/"], transform1=preprocess)
# g.__getitem__(11000)


In [72]:
#hyperparams
in_channel = 3
num_classes = 7
learning_rate = 1e-3
batch_size = 64
num_epochs = 20

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


In [75]:
dataset = DermClass(helper=dch, csv_file="/content/drive/MyDrive/HAM10000/HAM10000_metadata.csv", root_dir="/content/drive/MyDrive/HAM10000", sds=["/HAM10000_images_part_1/", "/HAM10000_images_part_2/"], transform1=preprocess)

augmented_dataset = AugmentedDermClass(helper=dch, csv_file="/content/drive/MyDrive/HAM10000/HAM10000_metadata_augmented.csv", root_dir="/content/drive/MyDrive/HAM10000", sd= "/HAM10000_images_augmented/", transform1=preprocess)

In [76]:
train_set, test_set = torch.utils.data.random_split(dataset, [int(len(dataset)*.75), int(len(dataset)*.25)+1])

In [77]:
#here, we need to find a way to add augmented images to training and exclude them from testing 
train_set, test_set = torch.utils.data.random_split(dataset, [int(len(dataset)*.75), int(len(dataset)*.25)+1])

#add the augmented data to the train set
train_set = torch.utils.data.ConcatDataset([train_set, augmented_dataset])


train_loader = DataLoader(dataset = train_set, batch_size = batch_size, shuffle=True)
test_loader = DataLoader(dataset = test_set, batch_size = batch_size, shuffle=True)

In [78]:
len(train_loader)

144

In [58]:
# dataset.__getitem__(0)

In [79]:
#import pretrained 
import torch
# model = torch.hub.load('pytorch/vision:v0.10.0', 'resnet18',  pretrained=True)
# or any of these variants
# model = torch.hub.load('pytorch/vision:v0.10.0', 'resnet34', pretrained=True)
# model = torch.hub.load('pytorch/vision:v0.10.0', 'resnet50', pretrained=True)
model = torch.hub.load('pytorch/vision:v0.10.0', 'resnet101', pretrained=True)
# model = torch.hub.load('pytorch/vision:v0.10.0', 'resnet152', pretrained=True)


Using cache found in /root/.cache/torch/hub/pytorch_vision_v0.10.0
  f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "


In [None]:
#fine tuning - set all of the model gradients to false
for param in model.parameters():
  param.requires_grad = False
#change the model architecture a bit
model
model.fc = nn.Sequential(nn.Linear(2048, 100), 
                         nn.ReLU(), 
                         nn.Dropout(p=.5), 
                         nn.Linear(100,7))

 
#change mode and put on device
model.eval()
model.to(device)

#example of how to add layers, where net_add is basically the new network that we are interested in: https://discuss.pytorch.org/t/add-layers-on-pretrained-model/88760
# net_add=net()
# model = nn.Sequential(resnet50, net_add)
# model

In [None]:
# Loss and optimizer
import torch.optim as optim  # For all Optimization algorithms, SGD, Adam, etc.

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Train Network
for epoch in range(num_epochs):
    losses = []

    for batch_idx, (data, targets) in tqdm(enumerate(train_loader)):
        # Get data to cuda if possible
        data = data.to(device=device)
        targets = targets.to(device=device)
        # forward
        scores = model(data)
        loss = criterion(scores, targets)
        print("Batch: %d. Loss: %f" %(batch_idx, loss))

        losses.append(loss.item())

        # backward
        optimizer.zero_grad()
        loss.backward()

        # gradient descent or adam step
        optimizer.step()
    print(f"Cost at epoch {epoch} is {sum(losses)/len(losses)}")


In [82]:

# Check accuracy on training to see how good our model is
def check_accuracy(loader, model):
    num_correct = 0
    num_samples = 0
    model.eval()
    correct = {0:0, 1:0, 2:0, 3:0, 4:0, 5:0, 6:0}
    total = {0:0, 1:0, 2:0, 3:0, 4:0, 5:0, 6:0}
    with torch.no_grad():
        for x, y in tqdm(loader):
            x = x.to(device=device)
            y = y.to(device=device)

            scores = model(x)
            _, predictions = scores.max(1)
            for i,j in zip(predictions, y):
              if i.item() == j.item():
                correct[i.item()] +=1
              total[j.item()] += 1
            num_correct += (predictions == y).sum()
            num_samples += predictions.size(0)

        print(
              f"Got {num_correct} / {num_samples} with accuracy {float(num_correct)/float(num_samples)*100:.2f}"
          )
        #find the accuracies for each class 
        return correct, total

    model.train()


# print("Checking accuracy on Training Set")
# check_accuracy(train_loader, model)

print("Checking accuracy on Test Set")
correct, total = check_accuracy(test_loader, model)

for k in correct:
  print("for class %d, the accuracy is: %f" %(k, correct[k]/total[k]))

Checking accuracy on Test Set


100%|██████████| 40/40 [00:32<00:00,  1.21it/s]

Got 2002 / 2504 with accuracy 79.95
for class 0, the accuracy is: 0.547244
for class 1, the accuracy is: 0.965290
for class 2, the accuracy is: 0.121212
for class 3, the accuracy is: 0.370107
for class 4, the accuracy is: 0.485714
for class 5, the accuracy is: 0.578231
for class 6, the accuracy is: 0.481928





In [83]:
print(correct, total)

{0: 139, 1: 1613, 2: 4, 3: 104, 4: 17, 5: 85, 6: 40} {0: 254, 1: 1671, 2: 33, 3: 281, 4: 35, 5: 147, 6: 83}
