In [24]:
import os
import matplotlib.pyplot as plt
import numpy as np
import cv2
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import sys

In [7]:
img_size = 50 # serve a riscalare tutte le immagini a 50x50 pixels

# one-hot vectors: [1,0] = begign
#                  [0,1] = melanoma

ben_training_folder = "melanoma_cancer_dataset/train/benign/"
mal_training_folder = "melanoma_cancer_dataset/train/malignant/"
ben_testing_folder = "melanoma_cancer_dataset/test/benign/"
mal_testing_folder = "melanoma_cancer_dataset/test/malignant/"

ben_training_data = []
mal_training_data = []
ben_testing_data = []
mal_testing_data = []

for filename in os.listdir(ben_training_folder):
    try:
        path = ben_training_folder+filename
        img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
        # here a judgment call is needed in order to assess if color is necessary
        # in this case it is not and so is grayscaled
        img = cv2.resize(img, (img_size, img_size))
        img_array = np.array(img)
        ben_training_data.append([img_array, np.array([1,0])])
    except:
        pass

for filename in os.listdir(mal_training_folder):
    try:
        path = mal_training_folder+filename
        img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
        # here a judgment call is needed in order to assess if color is necessary
        # in this case it is not and so is grayscaled
        img = cv2.resize(img, (img_size, img_size))
        img_array = np.array(img)
        mal_training_data.append([img_array, np.array([0,1])])
    except:
        pass

for filename in os.listdir(ben_testing_folder):
    try:
        path = ben_testing_folder+filename
        img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
        # here a judgment call is needed in order to assess if color is necessary
        # in this case it is not and so is grayscaled
        img = cv2.resize(img, (img_size, img_size))
        img_array = np.array(img)
        ben_testing_data.append([img_array, np.array([1,0])])
    except:
        pass

for filename in os.listdir(mal_testing_folder):
    try:
        path = mal_testing_folder+filename
        img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
        # here a judgment call is needed in order to assess if color is necessary
        # in this case it is not and so is grayscaled
        img = cv2.resize(img, (img_size, img_size))
        img_array = np.array(img)
        mal_testing_data.append([img_array, np.array([0,1])])
    except:
        pass

print("Benign training count: ", len(ben_training_data))
print("Malignant training count: ", len(mal_training_data))
print("Benign testing count: ", len(ben_testing_data))
print("Malignant testing count: ", len(mal_testing_data))

trim_training = min([len(ben_training_data), len(mal_training_data)])
print(trim_training)
trim_testing = min([len(ben_testing_data), len(mal_testing_data)])
print(trim_testing)
ben_training_data = ben_training_data[0:trim_training]
#mal_training_data = mal_training_data[0:trim_training]
#ben_testing_data = ben_testing_data[0:trim_testing]
#mal_testing_data = mal_testing_data[0:trim_testing]

Benign training count:  5000
Malignant training count:  4605
Benign testing count:  500
Malignant testing count:  500
4605
500


In [17]:
training_data = ben_training_data + mal_training_data
np.random.shuffle(training_data)
#np.save("melanoma_training_data.npy", training_data)
testing_data = ben_testing_data + mal_testing_data
np.random.shuffle(testing_data)
#np.save("melanoma_testing_data.npy", testing_data)

In [27]:
class Net(nn.Module): # making a classs called "Net" inheriting from parent class nn.Module
    # constructor
    def __init__(self):
        super().__init__() # calling the constructro of the parent class
        
        self.conv1 = nn.Conv2d(1, 32, kernel_size=5) # input size = 1, output size = 32
        self.conv2 = nn.Conv2d(32, 64, kernel_size=5) # the input size of the next conv layer must be the output of the previous
        self.conv3 = nn.Conv2d(64, 128, kernel_size=5)
        
        # the input size of the first fully connected layer MUST be chosen by attempts
        self.fc1 = nn.Linear(128*2*2, 512) # fully connected layers
        self.fc2 = nn.Linear(512, 2) # THE FINAL OUTPUT IS BINARY (MELANOMA VS BENIGN)
        
    def forward(self, x):
        
        x = F.max_pool2d(F.relu(self.conv1(x)), (2,2))
        x = F.max_pool2d(F.relu(self.conv2(x)), (2,2))
        x = F.max_pool2d(F.relu(self.conv3(x)), (2,2))
        
        x = x.view(-1,128*2*2)
        
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        x = F.softmax(x) # it reports the numbers in the output as a probability to have or to not have a melanoma
        
        return(x)

In [25]:
train_X = torch.Tensor([item[0] for item in training_data])
train_X = train_X/255 # to have inputs between 0 and 1 (greyscale goes from 0 to 255)

train_y = torch.Tensor([item[1] for item in training_data])

  train_X = torch.Tensor([item[0] for item in training_data])


In [28]:
net = Net()
optimizer = optim.Adam(net.parameters(), lr=0.001)
loss_function = nn.MSELoss() # MEAN SQUARED ERROR

batch_size = 100 # how may images are fed through the net at once
epochs = 2 # how many times the data are fed through the net

for epoch in range(epochs):
    for i in range(0, len(train_X), batch_size):
        print(f"EPOCH {epoch+1}, percentage complete: {(i/len(train_X))*100}%")
        
        batch_X = train_X[i:i+batch_size].view(-1, 1, img_size, img_size) # -1 gives flexibility about batching
        batch_y = train_y[i:i+batch_size]
        
        optimizer.zero_grad() # reset gradients of model parameters to 0
        
        outputs = net(batch_X) # the output are of this type [0.33 0.67]
        loss = loss_function(outputs, batch_y)
        
        loss.backward() # backpropagation computes the gradients of the loss wrt the parameters
        optimizer.step() # updates parameters based on the gradients we have just computed
        
torch.save(net.state_dict(), "saved_model.pth")

EPOCH 1, percentage complete: 0.0%


  x = F.softmax(x) # it reports the numbers in the output as a probability to have or to not have a melanoma


EPOCH 1, percentage complete: 1.0857763300760044%
EPOCH 1, percentage complete: 2.1715526601520088%
EPOCH 1, percentage complete: 3.257328990228013%
EPOCH 1, percentage complete: 4.3431053203040175%
EPOCH 1, percentage complete: 5.428881650380022%
EPOCH 1, percentage complete: 6.514657980456026%
EPOCH 1, percentage complete: 7.600434310532031%
EPOCH 1, percentage complete: 8.686210640608035%
EPOCH 1, percentage complete: 9.77198697068404%
EPOCH 1, percentage complete: 10.857763300760045%
EPOCH 1, percentage complete: 11.943539630836048%
EPOCH 1, percentage complete: 13.029315960912053%
EPOCH 1, percentage complete: 14.115092290988057%
EPOCH 1, percentage complete: 15.200868621064062%
EPOCH 1, percentage complete: 16.286644951140065%
EPOCH 1, percentage complete: 17.37242128121607%
EPOCH 1, percentage complete: 18.458197611292075%
EPOCH 1, percentage complete: 19.54397394136808%
EPOCH 1, percentage complete: 20.629750271444085%
EPOCH 1, percentage complete: 21.71552660152009%
EPOCH 1, p

In [30]:
net.eval() # now we do not change the model, but we test it

test_X = torch.Tensor([item[0] for item in testing_data])
test_X = test_X/255 # to have inputs between 0 and 1 (greyscale goes from 0 to 255)

test_y = torch.Tensor([item[1] for item in testing_data])

In [36]:
correct = 0
total = 0

with torch.no_grad(): # this speeds up the algorithm since it tells PyTorch to not store the gradients here computed
    for i in range(len(test_X)):
        output = net(test_X[i].view(-1, 1, img_size, img_size))
        
        if output[0][0] > output[0][1]:
            guess = "B"
        else:
            guess = "M"
        
        real_label = test_y[i]
        if real_label[0] > real_label[1]:
            real_class = "B"
        else:
            real_class = "M"
            
        if guess == real_class:
            correct +=1
        total += 1
        
precision = correct/total
print(f"Precision: {precision*100}%")

  x = F.softmax(x) # it reports the numbers in the output as a probability to have or to not have a melanoma


Precision: 80.30000000000001%
