The data that is used in this project is the WSIROI dataset that's part of the TIGER dataset (https://tiger.grand-challenge.org/). 

In [None]:
# Connect the collab file to the drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# Import everything that's needed for the data exploration
from matplotlib import pyplot as plt 
from matplotlib.patches import Rectangle, Patch
from matplotlib.colors import to_rgb
from PIL import Image
from statistics import mean

import cv2
import json
import matplotlib.pyplot as plt
import numpy as np
import statistics

In [None]:
# Store the json file with bounding boxes
tissue_cells_json = json.load(open("/content/drive/Shared drives/TILs/dataset/TIGER/roi-level-annotations/tissue-cells/tiger-coco.json"))

# Data exploration

In [None]:
# Print the total amount of images in the dataset
print("There are", len(tissue_cells_json['images']), "images.")

There are 1879 images.


In [None]:
# Print the total amount of bounding box annotations
print("There are", len(tissue_cells_json['annotations']), "annotations.")

There are 30524 annotations.


In [None]:
# Calculate how many images are annotated with bounding boxes
id = []
for annotation in tissue_cells_json['annotations']:
  id.append(annotation['image_id'])
print(len(set(id)), "images have annotations.")

1139 images have annotations.


In [None]:
# Print some statistics about the annotations
ann_per_img = []
min, max = 100000, 0
for i in range(len(tissue_cells_json['images'])):
  ann_per_img.append(id.count(i))
  if id.count(i) > max:
    max = id.count(i)
  if id.count(i) < min:
    min = id.count(i)

print("=== Number of annotations per image ===")
print("Mean:", statistics.mean(ann_per_img), "\nMedian:", statistics.median(ann_per_img), "\nStandard deviation:", statistics.stdev(ann_per_img), "\nMax:", max, "\nMin:", min)

=== Number of annotations per image ===
Mean: 16.244811069717937 
Median: 2 
Standard deviation: 39.671141407165905 
Max: 660 
Min: 0


In [None]:
# Print some statistics about the image size
heights = []
widths = []
hi_g = []
wi_g = []

hi_k = []
wi_k = []
min_height, min_width, max_height, max_width = 100000, 100000, 0, 0

for image in tissue_cells_json['images']:
  heights.append(image['height'])
  widths.append(image['width'])
  if image['height'] < min_height:
    min_height = image['height']
  if image['height'] > max_height:
    max_height = image['height']
  if image['width'] < min_width:
    min_width = image['width']
  if image['width'] > max_width:
    max_width = image['width']
  if image['width'] > 800:
    wi_g.append(image['width'])
  else:
    wi_k.append(image['width'])
  if image['height'] > 800:
    hi_g.append(image['height'])
  else:
    hi_k.append(image['height'])

print("=== Size of the images ===")
print("Mean height:", statistics.mean(heights), ", Mean width:", statistics.mean(widths), "\nMedian height:", statistics.median(heights), ", Median width:", statistics.median(widths), 
      "\nStandard deviation height:", statistics.stdev(heights), ", Standard deviation width:", statistics.stdev(widths), "\nMax height:", max_height, ", Max width:", max_width, 
      "\nMin height:", min_height, ", Min width:", min_width)

print(statistics.mean(wi_k), statistics.mean(wi_g))
print(statistics.mean(hi_k), statistics.mean(hi_g))

=== Size of the images ===
Mean height: 215.6471527408196 , Mean width: 221.10431080361894 
Median height: 144 , Median width: 144 
Standard deviation height: 258.69575909416295 , Standard deviation width: 278.1007792627728 
Max height: 1253 , Max width: 1419 
Min height: 64 , Min width: 64
144.44495412844037 1211.4296296296295
144.2987385321101 1137.362962962963


In [None]:
# Plot the heights of the images
plt.hist(heights, bins = [0,100,200,300,400,500,600,700,800,900,1000,1100,1200,1300,1400,1500]) 
plt.title("Histogram height") 
plt.show()

In [None]:
# Print the mean height of the small and large images
print("The mean height of the images with a height > 800:", statistics.mean(hi_g), "\nThe mean height of the images with a height < 800:", statistics.mean(hi_k))

In [None]:
# Plot the widths of the images
plt.hist(widths, bins = [0,100,200,300,400,500,600,700,800,900,1000,1100,1200,1300,1400,1500]) 
plt.title("Histogram width") 
plt.show()

In [None]:
# Print the mean width of the small and large images
print("The mean width of the images with a width > 800:", statistics.mean(wi_g), "\nThe mean width of the images with a width < 800:", statistics.mean(wi_k))

NameError: ignored

In [None]:
# Print an image from the dataset with a given id
id = 12
for image in tissue_cells_json['images']:
  if image['id'] == id:
    img = image['file_name']
    path = "/content/drive/Shared drives/TILs/dataset/TIGER/roi-level-annotations/tissue-cells/" + img

tissue_cell_img = cv2.imread(path)
plt.imshow(tissue_cell_img)

In [None]:
# Print an image and bounding boxes from the dataset with a given id
id = 12
for image in tissue_cells_json['images']:
  if image['id'] == id:
    img = image['file_name']
    path = "/content/drive/Shared drives/TILs/dataset/TIGER/roi-level-annotations/tissue-cells/" + img

image = cv2.imread(path)
fig, ax = plt.subplots(1,1, figsize=(8,8))
ax.imshow(image)

annotations = []
for annotation in tissue_cells_json['annotations']:
  if annotation['image_id'] == id:
    annotations.append(annotation)

for annotation in annotations:
  bbox_centre_x, bbox_centre_y, width, height = annotation['bbox']
  patch = Rectangle((bbox_centre_x, bbox_centre_y), width, height, fill=False, linewidth = 2)
  ax.add_patch(patch)
    
plt.show()

In [None]:
# Print an image and pixel-wise segmentation from the dataset with a given id
colour_label = {
    0: 'k', # no label
    1: 'c', # invasive tumor
    2: 'g', # tumor associated stroma
    3: 'y', # in situ tumor
    4: 'crimson', # healty glands
    5: 'm', # necrosis not in situ
    6: 'darkviolet', # inflamed stroma
    7: 'mediumblue' # rest
}

id = 12
for image in tissue_cells_json['images']:
  if image['id'] == id:
    img = image['file_name']
    img = img[8:]
    path = "/content/drive/Shared drives/TILs/dataset/TIGER/roi-level-annotations/tissue-cells/masks" + img

image = cv2.imread(path)

for label in np.unique(image):
  colour = np.multiply(to_rgb(colour_label[label]),256).astype(np.uint8)
  mask = (image == list((label,)*3)).all(axis=2)
  image[mask] = colour
  print(label)

plt.imshow(image)

In [None]:
# Print the amoung of segmentation annotations per category
label_list = [0 for i in range(8)]
images_paths = [img_dict['file_name'] for img_dict in tissue_cells_json['images']]
for image_path in images_paths:
  image_path = image_path[8:]
  mask_path = "/content/drive/Shared drives/TILs/dataset/TIGER/roi-level-annotations/tissue-cells/masks" + image_path
  mask = cv2.imread(mask_path)
  for label in np.unique(mask):
    label_list[label] += 1

for i in range(len(label_list)):
  print("There are", label_list[i], "segmentation annotations for category", i)

# Model training

In [None]:
# Import YOLOv5 
!git clone https://github.com/ultralytics/yolov5  # clone
%cd yolov5
%pip install -qr requirements.txt  # install

import torch
import utils
display = utils.notebook_init()  # checks

YOLOv5 🚀 v7.0-162-gc3e4e94 Python-3.10.11 torch-2.0.0+cu118 CUDA:0 (Tesla V100-SXM2-16GB, 16151MiB)


Setup complete ✅ (2 CPUs, 12.7 GB RAM, 23.4/166.8 GB disk)


In [None]:
# Install torchvision
!pip install torch torchvision

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
# Import everyting that's needed for the model training, validation and testing
%cd yolov5
import torch
from yolov5 import utils
import torch
import utils
from IPython import display
from IPython.display import clear_output
from pathlib import Path
import yaml
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import glob

%matplotlib inline
display = utils.notebook_init()

YOLOv5 🚀 v7.0-162-gc3e4e94 Python-3.10.11 torch-2.0.0+cu118 CUDA:0 (Tesla V100-SXM2-16GB, 16151MiB)


Setup complete ✅ (2 CPUs, 12.7 GB RAM, 23.4/166.8 GB disk)


In [None]:
# Train the YOLOv5 model using transfer learning
!python train.py --img 150 --hyp 'hyp.scratch-high.yaml' --batch 256 --epochs 300 --data '/content/drive/Shared drives/TILs/Project/yolov5/data/data.yaml' --weights 'yolov5m.pt' --project 'runs_tils' --name 'feature_extraction_yolov5m' --cache --freeze 12

[34m[1mtrain: [0mweights=yolov5m.pt, cfg=, data=/content/drive/Shared drives/TILs/Project/yolov5/data/data.yaml, hyp=hyp.scratch-high.yaml, epochs=300, batch_size=256, imgsz=150, rect=False, resume=False, nosave=False, noval=False, noautoanchor=False, noplots=False, evolve=None, bucket=, cache=ram, image_weights=False, device=, multi_scale=False, single_cls=False, optimizer=SGD, sync_bn=False, workers=8, project=runs_tils, name=feature_extraction_yolov5m, exist_ok=False, quad=False, cos_lr=False, label_smoothing=0.0, patience=100, freeze=[12], save_period=-1, seed=0, local_rank=-1, entity=None, upload_dataset=False, bbox_interval=-1, artifact_alias=latest
[34m[1mgithub: [0mup to date with https://github.com/ultralytics/yolov5 ✅
[31m[1mrequirements:[0m /content/requirements.txt not found, check failed.
YOLOv5 🚀 v7.0-162-gc3e4e94 Python-3.10.11 torch-2.0.0+cu118 CUDA:0 (Tesla V100-SXM2-16GB, 16151MiB)

[34m[1mhyperparameters: [0mlr0=0.01, lrf=0.1, momentum=0.937, weight_decay

In [None]:
# Print validation results
display.Image(f"/content/drive/Shared drives/TILs/Project/yolov5/runs_tils/feature_extraction_yolov5m/results.png")

# Model fine-tuning

In [None]:
# Fine-tune the model
!python train.py --img 150 --hyp 'hyp.VOC.yaml' --batch 256 --epochs 150 --data '/content/drive/Shared drives/TILs/Project/yolov5/data/data.yaml' --weights '/content/drive/Shared drives/TILs/Project/yolov5/runs_tils/feature_extraction_yolov5m/weights/best.pt' --project 'runs_tils' --name 'fine-tuning_yolov5m' --cache

[34m[1mtrain: [0mweights=/content/drive/Shared drives/TILs/Project/yolov5/runs_tils/feature_extraction_yolov5m/weights/best.pt, cfg=, data=/content/drive/Shared drives/TILs/Project/yolov5/data/data.yaml, hyp=hyp.VOC.yaml, epochs=150, batch_size=256, imgsz=150, rect=False, resume=False, nosave=False, noval=False, noautoanchor=False, noplots=False, evolve=None, bucket=, cache=ram, image_weights=False, device=, multi_scale=False, single_cls=False, optimizer=SGD, sync_bn=False, workers=8, project=runs_tils, name=fine-tuning_yolov5m, exist_ok=False, quad=False, cos_lr=False, label_smoothing=0.0, patience=100, freeze=[0], save_period=-1, seed=0, local_rank=-1, entity=None, upload_dataset=False, bbox_interval=-1, artifact_alias=latest
[34m[1mgithub: [0mup to date with https://github.com/ultralytics/yolov5 ✅
[31m[1mrequirements:[0m /content/requirements.txt not found, check failed.
YOLOv5 🚀 v7.0-162-gc3e4e94 Python-3.10.11 torch-2.0.0+cu118 CUDA:0 (Tesla V100-SXM2-16GB, 16151MiB)

[3

In [None]:
# Print validation results
display.Image(f"/content/drive/Shared drives/TILs/Project/yolov5/runs_tils/fine-tuning_yolov5m/results.png")

FileNotFoundError: ignored

FileNotFoundError: ignored

<IPython.core.display.Image object>

# Model evaluation

In [None]:
# Evaluate the model using an unseen test set
!python val.py --img 150 --weights '/content/drive/Shared drives/TILs/Project/yolov5/runs_tils/fine-tuning_yolov5m/weights/best.pt' --batch 256 --data '/content/drive/Shared drives/TILs/Project/yolov5/data/data.yaml' --task test --project 'runs_tils' --name 'validation_on_test_data_yolov5m'

[34m[1mval: [0mdata=/content/drive/Shared drives/TILs/Project/yolov5/data/data.yaml, weights=['/content/drive/Shared drives/TILs/Project/yolov5/runs_tils/fine-tuning_yolov5m/weights/best.pt'], batch_size=256, imgsz=150, conf_thres=0.001, iou_thres=0.6, max_det=300, task=test, device=, workers=8, single_cls=False, augment=False, verbose=False, save_txt=False, save_hybrid=False, save_conf=False, save_json=False, project=runs_tils, name=validation_on_test_data_yolov5m, exist_ok=False, half=False, dnn=False
[31m[1mrequirements:[0m /content/requirements.txt not found, check failed.
YOLOv5 🚀 v7.0-162-gc3e4e94 Python-3.10.11 torch-2.0.0+cu118 CUDA:0 (Tesla V100-SXM2-16GB, 16151MiB)

Fusing layers... 
Model summary: 212 layers, 20852934 parameters, 0 gradients, 47.9 GFLOPs
[34m[1mtest: [0mScanning /content/drive/Shared drives/TILs/Project/datasets/TILs_object_detection/labels/test.cache... 2071 images, 1349 backgrounds, 0 corrupt: 100% 2071/2071 [00:00<?, ?it/s]
                 Class