<a href="https://colab.research.google.com/github/Rocco000/OncoVision/blob/main/Scripts/ModelsScripts/Demo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Import the required libraries

In [None]:
import torch
import torchvision.transforms as transforms
import torch.nn.functional as F

#To take the real time image
from IPython.display import display, Javascript, Image
from google.colab.output import eval_js
from base64 import b64decode, b64encode
import cv2
from google.colab.patches import cv2_imshow
import PIL
import io
import html
import time
import numpy as np

#To authenticate the user
from google.colab import auth
from googleapiclient.discovery import build

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device: ",device)

Define the correct paths

In [None]:
from google.colab import drive
drive.mount('/content/drive') #Connect to Google Drive

#To authenticate the user that run the script in order to use the correct path
auth.authenticate_user()
drive_service = build('drive', 'v3')

#Get user information
about = drive_service.about().get(fields='user').execute()
user_email = about['user']['emailAddress']

path_prefix = ""
path_base_model = path_ga1_model = path_ga2_model = path_ga2_solution = ""

if user_email =="rocco.iul2000@gmail.com":
  path_prefix = "/content/drive/MyDrive/SE4AI/Model/"
  path_base_model = "/content/drive/MyDrive/SE4AI/Model/EvaluationFirstApproach/model_parameters.pth"
  path_ga1_model = "/content/drive/MyDrive/SE4AI/Model/EvaluationGAFirstApproach/ModelsConfigurations/best_solution.pth"
  path_ga2_model = "/content/drive/MyDrive/SE4AI/Model/EvaluationGASecondApproach/ModelsConfigurations/best_solution.pth"
  path_ga2_solution = "/content/drive/MyDrive/SE4AI/Model/EvaluationGASecondApproach/BestSolutionGA2.csv"

  %run '/content/drive/MyDrive/SE4AI/Scripts/ModelArchitecture1.ipynb'
  %run '/content/drive/MyDrive/SE4AI/Scripts/ModelArchitecture2.ipynb'
  %run '/content/drive/MyDrive/SE4AI/Scripts/Explainability.ipynb'
else:
  path_prefix = "/content/drive/MyDrive/LinkToOncoVision/SE4AI/Model/"
  path_base_model = "/content/drive/MyDrive/LinkToOncoVision/SE4AI/Model/EvaluationFirstApproach/model_parameters.pth"
  path_ga1_model = "/content/drive/MyDrive/LinkToOncoVision/SE4AI/Model/EvaluationGAFirstApproach/ModelsConfigurations/best_solution.pth"
  path_ga2_model = "/content/drive/MyDrive/LinkToOncoVision/SE4AI/Model/EvaluationGASecondApproach/ModelsConfigurations/best_solution.pth"
  path_ga2_solution = "/content/drive/MyDrive/LinkToOncoVision/SE4AI/Model/EvaluationGASecondApproach/BestSolutionGA2.csv"

  %run '/content/drive/MyDrive/LinkToOncoVision/SE4AI/Scripts/ModelArchitecture1.ipynb'
  %run '/content/drive/MyDrive/LinkToOncoVision/SE4AI/Scripts/ModelArchitecture2.ipynb'
  %run '/content/drive/MyDrive/LinkToOncoVision/SE4AI/Scripts/Explainability.ipynb'

To obtain the input size of the first linear layer in GA2

In [3]:
def check_validity(solution):
  #Computing the input size of the first nn.Linear

  width_in, height_in, size = size_nn_linear_calculator(layer_type=5, width=450, height=600, channels=None) # first conv2d
  i = 0
  for element in solution:
    if element == 1:
      #conv-128
      #To avoid the presence of 4 consecutive conv0 layers (loss.backward() out of memory)
      if i>2:
        if (solution[i-1]>=1 and solution[i-1]<=5) and (solution[i-2]>=1 and solution[i-2]<=5) and (solution[i-3]>=1 and solution[i-3]<=5):
          return False,0
      elif i==2:
        if (solution[i-1]>=1 and solution[i-1]<=5) and (solution[i-2]>=1 and solution[i-2]<=5):
          return False,0

      if (i-1)>=0:
        j=i-1
        flag=False
        while j>=0 and (not flag):
          if solution[j]==6 or solution[j]==7 or solution[j]==8 or solution[j]==9: #Due to Cuda out of memory, we can't have two conv-128 without a pooling layer
            flag=True
          j=j-1
        if not flag:
          return False,0

      width_in, height_in, size = size_nn_linear_calculator(layer_type=1, width=width_in, height=height_in, channels=None)
    elif element == 2:
      #conv-64
      #To avoid the presence of 4 consecutive conv0 layers (loss.backward() out of memory)
      if i>2:
        if (solution[i-1]>=1 and solution[i-1]<=5) and (solution[i-2]>=1 and solution[i-2]<=5) and (solution[i-3]>=1 and solution[i-3]<=5):
          return False,0
      elif i==2:
        if (solution[i-1]>=1 and solution[i-1]<=5) and (solution[i-2]>=1 and solution[i-2]<=5):
          return False,0

      width_in, height_in, size = size_nn_linear_calculator(layer_type=2, width=width_in, height=height_in, channels=None)
    elif element == 3:
      #conv-32
      #To avoid the presence of 4 consecutive conv0 layers (loss.backward() out of memory)
      if i>2:
        if (solution[i-1]>=1 and solution[i-1]<=5) and (solution[i-2]>=1 and solution[i-2]<=5) and (solution[i-3]>=1 and solution[i-3]<=5):
          return False,0
      elif i==2:
        if (solution[i-1]>=1 and solution[i-1]<=5) and (solution[i-2]>=1 and solution[i-2]<=5):
          return False,0

      width_in, height_in, size = size_nn_linear_calculator(layer_type=3, width=width_in, height=height_in, channels=None)
    elif element == 4:
      #conv-16
      #To avoid the presence of 4 consecutive conv0 layers (loss.backward() out of memory)
      if i>2:
        if (solution[i-1]>=1 and solution[i-1]<=5) and (solution[i-2]>=1 and solution[i-2]<=5) and (solution[i-3]>=1 and solution[i-3]<=5):
          return False,0
      elif i==2:
        if (solution[i-1]>=1 and solution[i-1]<=5) and (solution[i-2]>=1 and solution[i-2]<=5):
          return False,0

      width_in, height_in, size = size_nn_linear_calculator(layer_type=4, width=width_in, height=height_in, channels=None)
    elif element == 5:
      #conv-8
      #To avoid the presence of 4 consecutive conv0 layers (loss.backward() out of memory)
      if i>2:
        if (solution[i-1]>=1 and solution[i-1]<=5) and (solution[i-2]>=1 and solution[i-2]<=5) and (solution[i-3]>=1 and solution[i-3]<=5):
          return False,0
      elif i==2:
        if (solution[i-1]>=1 and solution[i-1]<=5) and (solution[i-2]>=1 and solution[i-2]<=5):
          return False,0

      width_in, height_in, size = size_nn_linear_calculator(layer_type=5, width=width_in, height=height_in, channels=None)
    elif element == 6:
      #max-3
      if (i-1)>=0:
        if solution[i-1]!=12 and solution[i-1]!=13 and solution[i-1]!=1 and solution[i-1]!=2 and solution[i-1]!=3 and solution[i-1]!=4 and solution[i-1]!=5: #if before the pooling layer there isn't a activation layer or a convolutional layer
          return False, 0

      #Find the last convolutional layer before the actual layer to define the number of output channels
      j = i-1
      num_channels = 8 #because our first layer is a conv-8
      flag = False
      while j>=0 and (not flag):
        if solution[j] == 1:
          num_channels = 128
          flag = True
        elif solution[j] == 2:
          num_channels = 64
          flag = True
        elif solution[j] == 3:
          num_channels = 32
          flag = True
        elif solution[j] == 4:
          num_channels = 16
          flag = True
        elif solution[j] == 5:
          num_channels = 8
          flag = True
        j = j-1
      width_in, height_in, size = size_nn_linear_calculator(layer_type=6, width=width_in, height=height_in, channels=num_channels)
    elif element == 7:
      #max-2
      if (i-1)>=0:
        if solution[i-1]!=12 and solution[i-1]!=13 and solution[i-1]!=1 and solution[i-1]!=2 and solution[i-1]!=3 and solution[i-1]!=4 and solution[i-1]!=5: #if before the pooling layer there isn't a activation layer or a convolutional layer
          return False, 0

      #Find the last convolutional layer before the actual layer to define the number of output channels
      j = i-1
      num_channels = 8 #because our first layer is a conv-8
      flag = False
      while j>=0 and (not flag):
        if solution[j] == 1:
          num_channels = 128
          flag = True
        elif solution[j] == 2:
          num_channels = 64
          flag = True
        elif solution[j] == 3:
          num_channels = 32
          flag = True
        elif solution[j] == 4:
          num_channels = 16
          flag = True
        elif solution[j] == 5:
          num_channels = 8
          flag = True
        j = j-1
      width_in, height_in, size = size_nn_linear_calculator(layer_type=7, width=width_in, height=height_in, channels=num_channels)
    elif element == 8:
      #avg-3
      if (i-1)>=0:
        if solution[i-1]!=12 and solution[i-1]!=13 and solution[i-1]!=1 and solution[i-1]!=2 and solution[i-1]!=3 and solution[i-1]!=4 and solution[i-1]!=5: #if before the pooling layer there isn't a activation layer or a convolutional layer
          return False, 0

      #Find the last convolutional layer before the actual layer to define the number of output channels
      j = i-1
      num_channels = 8 #because our first layer is a conv-8
      flag = False
      while j>=0 and (not flag):
        if solution[j] == 1:
          num_channels = 128
          flag = True
        elif solution[j] == 2:
          num_channels = 64
          flag = True
        elif solution[j] == 3:
          num_channels = 32
          flag = True
        elif solution[j] == 4:
          num_channels = 16
          flag = True
        elif solution[j] == 5:
          num_channels = 8
          flag = True
        j = j-1
      width_in, height_in, size = size_nn_linear_calculator(layer_type=8, width=width_in, height=height_in, channels=num_channels)
    elif element == 9:
      #avg-2
      if (i-1)>=0:
        if solution[i-1]!=12 and solution[i-1]!=13 and solution[i-1]!=1 and solution[i-1]!=2 and solution[i-1]!=3 and solution[i-1]!=4 and solution[i-1]!=5: #if before the pooling layer there isn't a activation layer or a convolutional layer
          return False, 0

      #Find the last convolutional layer before the actual layer to define the number of output channels
      j = i-1
      num_channels = 8 #because our first layer is a conv-8
      flag = False
      while j>=0 and (not flag):
        if solution[j] == 1:
          num_channels = 128
          flag = True
        elif solution[j] == 2:
          num_channels = 64
          flag = True
        elif solution[j] == 3:
          num_channels = 32
          flag = True
        elif solution[j] == 4:
          num_channels = 16
          flag = True
        elif solution[j] == 5:
          num_channels = 8
          flag = True
        j = j-1
      width_in, height_in, size = size_nn_linear_calculator(layer_type=9, width=width_in, height=height_in, channels=num_channels)
    elif element == 10:
      #Dropout2d
      if (i-1)>=0:
        if solution[i-1]!=6 and solution[i-1]!=7 and solution[i-1]!=8 and solution[i-1]!=9: #If before the dropout layer there isn't a pooling layer.
          return False, 0
      else:
        return False, 0
    elif element == 11:
      #BatchNorm
      if (i-1)>=0:
        if solution[i-1]!=1 and solution[i-1]!=2 and solution[i-1]!=3 and solution[i-1]!=4 and solution[i-1]!=5:
          return False, 0
    elif element == 12 or element == 13:
      #ReLU and LeakyReLU
      if (i-1)>=0:
        if solution[i-1]!=1 and solution[i-1]!=2 and solution[i-1]!=3 and solution[i-1]!=4 and solution[i-1]!=5 and solution[i-1]!=11: #if before the activation layer there isn't a convolutional layer
          return False, 0

    i = i+1

  size = int(size)
  if size<8 or size>25000:
    return False, 0
  else:
    return True, size

Load the model

In [None]:
choose = int(input("On which model do you want to demo? (1 = base model, 2 = GA1, 3 = GA2)\n"))

path_demo = ""

my_model = None
layers = list() #To store the architecture of ga2 solution

match(choose):
  case 1:
    #Base model
    print("Demo with BASE model")
    my_model = ConvModel1()
    my_model.load_state_dict(torch.load(path_base_model))
    my_model = my_model.to(device)
    my_model.eval()

    path_demo = "EvaluationFirstApproach/Explainability/RealTime/"
  case 2:
    #Best solution GA1
    print("Demo with GA1 best solution")
    my_model = ConvModel1()
    my_model.load_state_dict(torch.load(path_ga1_model))
    my_model = my_model.to(device)
    my_model.eval()

    path_demo = "EvaluationGAFirstApproach/Explainability/RealTime/"
  case 3:
    #Best solution GA2
    print("Demo with GA2 best solution")

    #Take the architecture of ga2 best solution
    with open(path_ga2_solution, "r", newline="") as csvfile:
      reader = csv.reader(csvfile)
      next(reader) #Jump the header
      i = 0
      for row in reader:
        for element in row:
          if i>=4 and i<=19:
            layers.append(int(float(element)))

          i+=1

    print("GA2 best solution layers: ",layers)
    flag, size = check_validity(layers)
    print("Flag: ",flag)
    print("Input size of the output linear layer: ",size)
    my_model = ConvModel2(layers,size)
    my_model.load_state_dict(torch.load(path_ga2_model))
    my_model = my_model.to(device)
    my_model.eval()

    path_demo = "EvaluationGASecondApproach/Explainability/RealTime/"
  case _:
    print("Input error")

# Prediction in Real-Time

To take the photo

In [5]:
def take_photo(filename='photo.jpg', quality=0.8):
  js = Javascript('''
    async function takePhoto(quality) {
      const div = document.createElement('div');
      const capture = document.createElement('button');
      capture.textContent = 'Capture';
      div.appendChild(capture);

      const video = document.createElement('video');
      video.style.display = 'block';
      const stream = await navigator.mediaDevices.getUserMedia({video: true});

      document.body.appendChild(div);
      div.appendChild(video);
      video.srcObject = stream;
      await video.play();

      // Resize the output to fit the video element.
      google.colab.output.setIframeHeight(document.documentElement.scrollHeight, true);

      // Wait for Capture to be clicked.
      await new Promise((resolve) => capture.onclick = resolve);

      const canvas = document.createElement('canvas');
      canvas.width = video.videoWidth;
      canvas.height = video.videoHeight;
      canvas.getContext('2d').drawImage(video, 0, 0);
      stream.getVideoTracks()[0].stop();
      div.remove();
      return canvas.toDataURL('image/jpeg', quality);
    }
    ''')
  display(js)
  data = eval_js('takePhoto({})'.format(quality))
  # get OpenCV format image
  img = js_to_image(data)
  #binary = b64decode(data.split(',')[1])
  #with open(filename, 'wb') as f:
  #  f.write(binary)
  return img

def js_to_image(js_reply):
  """
  Params:
          js_reply: JavaScript object containing image from webcam
  Returns:
          img: OpenCV BGR image
  """
  # decode base64 image
  image_bytes = b64decode(js_reply.split(',')[1])

  # convert bytes to numpy array
  jpg_as_np = np.frombuffer(image_bytes, dtype=np.uint8)

  # decode numpy array into OpenCV BGR image
  img = cv2.imdecode(jpg_as_np, flags=1)

  #Transform the BGR image in a RGB image
  rgb_img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

  #Transform the image in a PIL image
  pil_image = transforms.ToPILImage()(rgb_img)

  return pil_image

# function to convert OpenCV Rectangle bounding box image into base64 byte string to be overlayed on video stream
def bbox_to_bytes(bbox_array):
  """
  Params:
          bbox_array: Numpy array (pixels) containing rectangle to overlay on video stream.
  Returns:
        bytes: Base64 image byte string
  """
  # convert array into PIL image
  bbox_PIL = PIL.Image.fromarray(bbox_array, 'RGBA')
  iobuf = io.BytesIO()
  # format bbox into png for return
  bbox_PIL.save(iobuf, format='png')
  # format return string
  bbox_bytes = 'data:image/png;base64,{}'.format((str(b64encode(iobuf.getvalue()), 'utf-8')))

  return bbox_bytes

Prediction on input image

In [None]:
transform = transforms.Compose([
      transforms.Resize((600, 450)),  # Resize the images to 600x450
      transforms.ToTensor()  # Convert the images to tensors
  ])

try:
  img = take_photo()

  print("Captured image:")
  plt.imshow(img)
  plt.axis('off')
  plt.show()

  img = transform(img)
  img = img.unsqueeze(0)
  img = img.to(device)

  prediction = my_model(img)
  prediction = F.softmax(prediction, dim=1) #Apply the Softmax function

  class_probabilities = prediction[0]
  for class_idx, probability in enumerate(class_probabilities):
    print(f"Class {class_idx}: Probability = {probability.item():.4f}")

  _, prediction = torch.max(prediction, 1) #Take the predicted class (class with high probability)
  prediction = prediction.cpu().numpy()
  if prediction[0] == 0:
    print("Model prediction is: BENIGN")
  else:
    print("Model prediction is: MELANOMA")

  img = img.cpu()
  img = img.squeeze(0)
  images = [img]
  if choose == 3:
    explain_prediction(my_model, images, prediction, 2, path_prefix+path_demo, layers)
  else:
    explain_prediction(my_model, images, prediction, 2, path_prefix+path_demo, None)
except Exception as err:
  # Errors will be thrown if the user does not have a webcam or if they do not
  # grant the page permission to access it.
  print("Error")
  print(str(err))