## Cloning Repo

In [None]:
import os
!git clone https://github.com/JiayuanWang-JW/YOLOv8-multi-task
os.chdir('/content/YOLOv8-multi-task')
!pip install --quiet -e .
os.chdir('/content')
!pip install --quiet ultralytics
!pip uninstall -y albumentations ## Data augmentations did not work with multitask
!rm -r sample_data
os.kill(os.getpid(), 9) # Restart Runtime

Cloning into 'YOLOv8-multi-task'...
remote: Enumerating objects: 1698, done.[K
remote: Counting objects: 100% (290/290), done.[K
remote: Compressing objects: 100% (234/234), done.[K
remote: Total 1698 (delta 105), reused 194 (delta 49), pack-reused 1408 (from 1)[K
Receiving objects: 100% (1698/1698), 19.55 MiB | 14.50 MiB/s, done.
Resolving deltas: 100% (201/201), done.
Updating files: 100% (1313/1313), done.
  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m313.8/313.8 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[?25hFound existing installation: albumentations 1.4.15
Uninstalling albumentations-1.4.15:
  Successfully uninstalled albumentations-1.4.15


$$$$

## Data Preparation

**Mount Drive**

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


### Copy and unrar Dataset

In [None]:
import os
import shutil
from tqdm.auto import tqdm

In [None]:
## 1 min 21 s
!gdown --id 1iBpVko2sehPeOPhQXWaujOpKFQmNvd_m
!unrar x -y -idq /content/RailSem19.rar RailSem19
!rm /content/RailSem19.rar

$$$$

### Prepare Data Folders

In [None]:
import numpy as np
import json
from random import shuffle
import cv2
import sys

from sklearn.model_selection import train_test_split
from PIL import Image

In [None]:
mask_files = os.listdir('RailSem19/rs19_masks')
json_files = os.listdir('RailSem19/rs19_jsons')
len(mask_files)

8500

In [None]:
train_ratio = 0.9  # 90% training, 10% validation
train_masks, val_masks = train_test_split(mask_files,
                                          train_size=train_ratio,
                                          random_state=42)

print("Training Paths:", len(train_masks))
print("Validation Paths:", len(val_masks))

Training Paths: 7650
Validation Paths: 850


In [None]:
def create_directory_structure(root, segmentation_classes):
    # Define the main directory structure
    dirs = [
        f"{root}/images/train",
        f"{root}/images/val",
        f"{root}/detection-object/labels/train",
        f"{root}/detection-object/labels/val"
    ]

    # Add segmentation classes to the structure
    for seg_class in segmentation_classes:
        dirs.append(f"{root}/{seg_class}/labels/train")
        dirs.append(f"{root}/{seg_class}/labels/val")

    # Create the directories
    for directory in dirs:
        os.makedirs(directory, exist_ok=True)

root = "dataset_root"
segmentation_classes = ["track", "rail", "pole"] # rename as 'seg-track-02' 'seg-rail-03' 'seg-pole-04'
# segmentation_classes = ["track", "rail", "vegetation", "pole", "construction"]
create_directory_structure(root, segmentation_classes)
print("Directories Created !!!")

Directories Created !!!


**Utils Functions**

In [None]:
def polygon2bbox(pnts_draw):
      min_x = np.min(pnts_draw[:, 0])
      min_y = np.min(pnts_draw[:, 1])
      max_x = np.max(pnts_draw[:, 0])
      max_y = np.max(pnts_draw[:, 1])
      return min_x, min_y, max_x, max_y

def json2bbox(inp_path_json):
    restricted_classes=['track-sign-front',
                        'person-group','person',
                        'car', 'truck','train-car',
                        'track-signal-front', 'track-signal-back']

    inp_json = json.load(open(inp_path_json, 'r'))
    _ = {k:[] for k in restricted_classes}
    for obj in inp_json["objects"]:
        if not obj['label'] in restricted_classes:
          continue

        if "boundingbox" in obj:
            _[obj["label"]].append(obj["boundingbox"])

        elif "polygon" in obj:
            pnts_draw = np.around(np.array(obj["polygon"])).astype(np.int32)
            min_x, min_y, max_x, max_y = polygon2bbox(pnts_draw)
            _[obj["label"]].append([min_x, min_y, max_x, max_y])
    return _


def box2yolo(x1, y1, x2, y2, image_width, image_height, class_id):
    # Compute the center of the bounding box
    x_center = (x1 + x2) / 2.0
    y_center = (y1 + y2) / 2.0

    # Compute the width and height of the bounding box
    width = x2 - x1
    height = y2 - y1

    # Normalize the coordinates
    x_center_norm = x_center / image_width
    y_center_norm = y_center / image_height
    width_norm = width / image_width
    height_norm = height / image_height

    # YOLO format: class_id, x_center_norm, y_center_norm, width_norm, height_norm
    yolo_format = f"{class_id} {x_center_norm} {y_center_norm} {width_norm} {height_norm}"
    return yolo_format


def segment2yolo(coords,image_width, image_height, class_id):
    yolo_format = []
    for coord in coords:
        cnt2yolo = ''.join([f"{x / image_width} {y / image_height} " for x, y in coord])
        cnt2yolo = f"{class_id} {cnt2yolo}"
        yolo_format.append(cnt2yolo)
    return yolo_format



classes_dict = {
    ('track-signal-front', 'track-signal-back'):0, # track-signal
    ('track-sign-front'):1, # track-sign
    ('person-group','person'):2, # person
    ('car', 'truck', 'train-car'):3, # vehicule
    (3,12):4,        # track
    (17,18):5,       # rail
    (5):6,           # Pole
}

def get_class_id(key):
    for keys, value in classes_dict.items():
        if isinstance(keys, tuple):
            if key in keys:
                return value
        else:
            if key == keys:
                return value
    return None  # Return None if the key is not found

class_groups = [[3,12], # track
                [17,18], # rail
                [5], # Pole
                ]

key2class = {
    'track':4,
    'rail':5,
    'pole':6,
}

In [None]:
for mask in tqdm(train_masks):

      ## Write bbox information to txt file
      # open mask
      path = os.path.join('RailSem19/rs19_masks',mask)
      im_id_map = cv2.imread(path,cv2.IMREAD_GRAYSCALE) #get semantic label map
      # open image
      img = os.path.join('RailSem19/rs19_jpgs',mask.replace('.png','.jpg'))
      shutil.copy(img, 'dataset_root/images/train')
      img = np.array(Image.open(img).convert('RGB'))
      h,w,_ = img.shape
      # open json
      json_file = os.path.join('RailSem19/rs19_jsons',mask.replace('.png','.json'))


      ## bbox classes
      bbox = json2bbox(json_file)
      # write bbox annotations
      with open(f"dataset_root/detection-object/labels/train/{mask.replace('.png','.txt')}",'w') as f:
        for key,boxes in bbox.items():
            if not boxes:
              continue
            class_id = get_class_id(key)
            for box in boxes:
                x1,y1,x2,y2 = box
                annot = box2yolo(x1, y1, x2, y2, w, h, class_id)
                f.write(f'{annot}\n')


      ## Annotations for the segmentation
      polygons = {
          'track':[],
          'rail':[],
          'pole':[],
      }
      for i,select_classes in enumerate(class_groups):
          im_id_map_copy = np.copy(im_id_map)
          for select_class in select_classes:
              im_id_map_copy[im_id_map_copy == select_class] = 20 # there are 19 classes so 20 is not used

          im_id_map_copy[im_id_map_copy != 20] = 0
          im_id_map_copy[im_id_map_copy == 20] = 255

          contours, hierarchy  = cv2.findContours(im_id_map_copy, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
          for cnt in contours:
                      polygon = []
                      if cv2.contourArea(cnt) > 200:
                        for point in cnt:
                            polygon.append(list(point[0]))
                        polygons[list(polygons.keys())[i]].append(polygon)

      for key,coords in polygons.items():
            with open(f"dataset_root/{key}/labels/train/{mask.replace('.png','.txt')}",'w') as f:
                  if not coords: # check if the ther's segmentation in the image
                        continue
                  class_id = key2class[key]
                  yolo_format = segment2yolo(coords, w, h, class_id)
                  f.write('\n'.join(yolo_format))

  0%|          | 0/7650 [00:00<?, ?it/s]

In [None]:
for mask in tqdm(val_masks):
      ## Write bbox information to txt file
      # open mask
      path = os.path.join('RailSem19/rs19_masks',mask)
      im_id_map = cv2.imread(path,cv2.IMREAD_GRAYSCALE) #get semantic label map
      # open image
      img = os.path.join('RailSem19/rs19_jpgs',mask.replace('.png','.jpg'))
      shutil.copy(img, 'dataset_root/images/val')
      img = np.array(Image.open(img).convert('RGB'))
      h,w,_ = img.shape
      # open json
      json_file = os.path.join('RailSem19/rs19_jsons',mask.replace('.png','.json'))

      ## bbox classes
      bbox = json2bbox(json_file)

      with open(f"dataset_root/detection-object/labels/val/{mask.replace('.png','.txt')}",'w') as f:
        for key,boxes in bbox.items():
            if not boxes:
              continue
            class_id = get_class_id(key)
            for box in boxes:
                x1,y1,x2,y2 = box
                annot = box2yolo(x1, y1, x2, y2, w, h, class_id)
                f.write(f'{annot}\n')


      ## Annotations for the segmentation
      polygons = {
          'track':[],
          'rail':[],
          'pole':[],
      }
      for i,select_classes in enumerate(class_groups):
          im_id_map_copy = np.copy(im_id_map)
          for select_class in select_classes:
              im_id_map_copy[im_id_map_copy == select_class] = 20 # there are 19 classes so 20 is not used

          im_id_map_copy[im_id_map_copy != 20] = 0
          im_id_map_copy[im_id_map_copy == 20] = 255

          contours, hierarchy  = cv2.findContours(im_id_map_copy, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
          for cnt in contours:
                      polygon = []
                      if cv2.contourArea(cnt) > 200:
                          for point in cnt:
                              polygon.append(list(point[0]))
                          polygons[list(polygons.keys())[i]].append(polygon)

      for key,coords in polygons.items():
            with open(f"dataset_root/{key}/labels/val/{mask.replace('.png','.txt')}",'w') as f:
                  if not coords: # check if the ther's segmentation in the image
                        continue
                  class_id = key2class[key]
                  yolo_format = segment2yolo(coords, w, h, class_id)
                  f.write('\n'.join(yolo_format))

  0%|          | 0/850 [00:00<?, ?it/s]

In [None]:
!rm -r /content/RailSem19

**Rename Folders**

In [None]:
os.rename("/content/dataset_root/track", "/content/dataset_root/seg-track-04")
os.rename("/content/dataset_root/rail", "/content/dataset_root/seg-rail-05")
os.rename("/content/dataset_root/pole", "/content/dataset_root/seg-pole-06")

**Create yaml file for dataset**

In [None]:
file_path = os.path.join("YOLOv8-multi-task/ultralytics/datasets", 'railsem19.yaml')
yaml_content = """path: /content/dataset_root # dataset root dir

train: images/train
val: images/val

labels_list:
  - detection-object
  - seg-track-04
  - seg-rail-05
  - seg-pole-06

tnc: 7  # number of classes
nc_list: [4,1,1,1]
map: [None,{'4':'0'},{'5':'0'},{'6':'0'}]

# Classes for all tasks
names:
  0: track-signal
  1: track-sign
  2: person
  3: vehicule
  4: track  # track class segmentation
  5: rail  #  rail class segmentation
  6: pole # pole class segmentation"""

# Write the content to the .yaml file
with open(file_path, 'w') as file:
    file.write(yaml_content)

**Correct Concat_dropout Class**

In [None]:
Concat_dropout_class = """
class Concat_dropout(nn.Module):
    def __init__(self, dimension=1, ch=None):
        super().__init__()
        self.d = dimension
        self.weight = nn.Parameter(torch.rand(1) * 1e-1,
                      requires_grad=True)

        self.conv = nn.Conv2d(sum(ch),
                               ch[0],
                               kernel_size=1,
                               stride=1)

    def forward(self, x):

        cdt = torch.sigmoid(self.weight) >= 0.5
        return torch.where(cdt,
                          self.conv(torch.cat(x, self.d)),
                          x[0])
"""

with open("/content/YOLOv8-multi-task/ultralytics/nn/modules/conv.py", 'r') as file:
    lines = file.readlines()

class_lines = Concat_dropout_class.splitlines()
class_lines = [l + '\n' for l in class_lines]
lines = lines[:-23]+class_lines

with open("/content/YOLOv8-multi-task/ultralytics/nn/modules/conv.py", 'w') as file:
    file.writelines(lines)

**Correct TensorBoard Callback**

In [None]:
!rm /content/YOLOv8-multi-task/ultralytics/yolo/utils/callbacks/tensorboard.py
!cp "/content/drive/MyDrive/Stage 2024/tensorboard.py" /content/YOLOv8-multi-task/ultralytics/yolo/utils/callbacks/tensorboard.py

**Create Model Architecture**

In [None]:
file_path = os.path.join("YOLOv8-multi-task/ultralytics/models/v8", 'railsem19.yaml')
yaml_content = """
# Parameters
######Jiayuan
tnc: 7  # number of classes
#######

scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
  # [depth, width, max_channels]
  n: [0.33, 0.25, 1024]  # YOLOv8n summary: 225 layers,  3157200 parameters,  3157184 gradients,   8.9 GFLOPs
  s: [0.33, 0.50, 1024]  # YOLOv8s summary: 225 layers, 11166560 parameters, 11166544 gradients,  28.8 GFLOPs
  m: [0.67, 0.75, 768]   # YOLOv8m summary: 295 layers, 25902640 parameters, 25902624 gradients,  79.3 GFLOPs
  l: [1.00, 1.00, 512]   # YOLOv8l summary: 365 layers, 43691520 parameters, 43691504 gradients, 165.7 GFLOPs
  x: [1.00, 1.25, 512]   # YOLOv8x summary: 365 layers, 68229648 parameters, 68229632 gradients, 258.5 GFLOPs

scale: n

# YOLOv8.0n backbone
backbone:
  # [from, repeats, module, args]
  - [-1, 1, Conv, [64, 3, 2]]  # 0-P1/2
  - [-1, 1, Conv, [128, 3, 2]]  # 1-P2/4
  - [-1, 3, C2f, [128, True]]
  - [-1, 1, Conv, [256, 3, 2]]  # 3-P3/8
  - [-1, 6, C2f, [256, True]]
  - [-1, 1, Conv, [512, 3, 2]]  # 5-P4/16
  - [-1, 6, C2f, [512, True]]
  - [-1, 1, Conv, [1024, 3, 2]]  # 7-P5/32
  - [-1, 3, C2f, [1024, True]]
  - [-1, 1, SPPF, [1024, 5]]  # 9

# YOLOv8.0n head
head:
  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
  - [[-1, 6], 1, Concat, [1]]  # cat backbone P4
  - [-1, 3, C2f, [512]]  # 12

  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
  - [[-1, 4], 1, Concat, [1]]  # cat backbone P3
  - [-1, 3, C2f, [256]]  # 15 (P3/8-small)

  - [-1, 1, Conv, [256, 3, 2]]
  - [[-1, 12], 1, Concat, [1]]  # cat head P4
  - [-1, 3, C2f, [512]]  # 18 (P4/16-medium)

  - [-1, 1, Conv, [512, 3, 2]]
  - [[-1, 9], 1, Concat, [1]]  # cat head P5
  - [-1, 3, C2f, [1024]]  # 21 (P5/32-large)


 # track
  - [9, 1, nn.Upsample, [None, 2, 'nearest']]
  - [[-1, 6], 1, Concat_dropout, [1]]  # cat backbone P4
  - [-1, 3, C2f, [512]]  # 24

  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
  - [[-1, 4], 1, Concat_dropout, [1]]  # cat backbone P3
  - [-1, 3, C2f, [256]]  # 27 (P3/8-small)

  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]  #  for lane segmentation
  - [[-1, 2], 1, Concat_dropout, [1]]  #  cat backbone P2
  - [-1, 3, C2f, [128]]  # 30 (P2)

  - [-1, 1, nn.Upsample, [None, 2, 'nearest']] #
  - [[-1, 0], 1, Concat_dropout, [1]]  #  cat backbone P1
  - [-1, 3, C2f, [64]]  # 33 (P1)



 # rail
  - [9, 1, nn.Upsample, [None, 2, 'nearest']]
  - [[-1, 6], 1, Concat_dropout, [1]]  # cat backbone P4
  - [-1, 3, C2f, [512]]  # 36

  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
  - [[-1, 4], 1, Concat_dropout, [1]]  # cat backbone P3
  - [-1, 3, C2f, [256]]  # 39 (P3/8-small)

  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]  # 30 for drivable segmentation
  - [[-1, 2], 1, Concat_dropout, [1]]
  - [-1, 3, C2f, [128]]  # 42 (P2)

  - [-1, 1, nn.Upsample, [None, 2, 'nearest']] #
  - [[-1, 0], 1, Concat_dropout, [1]]
  - [-1, 3, C2f, [64]]  # 45 (P1)


  # pole
  - [9, 1, nn.Upsample, [None, 2, 'nearest']]
  - [[-1, 6], 1, Concat_dropout, [1]]  # cat backbone P4
  - [-1, 3, C2f, [512]]  # 48

  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
  - [[-1, 4], 1, Concat_dropout, [1]]  # cat backbone P3
  - [-1, 3, C2f, [256]]  # 51 (P3/8-small)

  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]  # 30 for drivable segmentation
  - [[-1, 2], 1, Concat_dropout, [1]]
  - [-1, 3, C2f, [128]]  # 54 (P2)

  - [-1, 1, nn.Upsample, [None, 2, 'nearest']] #
  - [[-1, 0], 1, Concat_dropout, [1]]
  - [-1, 3, C2f, [64]]  # 57 (P1)

# tasks
  - [[15, 18, 21], 1, Detect, [4]]  # 58 Detect(P3, P4, P5)

  - [[33], 1, Segment, [1, 32, 256]]  # 59 Track-Segment

  - [[45], 1, Segment, [1, 32, 256]]  # 60 Rail-segment

  - [[57], 1, Segment, [1, 32, 256]]  # 61 Pole-Segment
"""

# Write the content to the .yaml file
with open(file_path, 'w') as file:
    file.write(yaml_content)

$$$$

## Model

In [None]:
from ultralytics import YOLO
import torch
import torch.nn as nn

import sys
import os
import cv2

model = YOLO('/content/drive/MyDrive/Stage 2024/v4.pt')

  return torch.load(file, map_location='cpu'), file  # load


In [None]:
model_state_dict = model.model.model.state_dict()

# Save the state dict to a file
torch.save(model_state_dict, '/content/drive/MyDrive/Stage 2024/state_dict.pt')

In [None]:
model.model.state_dict()

**Initialise Model**

In [None]:
RailModel = YOLO('/content/YOLOv8-multi-task/ultralytics/models/v8/railsem19.yaml',
                 task='multi')


                   from  n    params  module                                       arguments                     
  0                  -1  1       464  ultralytics.nn.modules.conv.Conv             [3, 16, 3, 2]                 
  1                  -1  1      4672  ultralytics.nn.modules.conv.Conv             [16, 32, 3, 2]                
  2                  -1  1      7360  ultralytics.nn.modules.block.C2f             [32, 32, 1, True]             
  3                  -1  1     18560  ultralytics.nn.modules.conv.Conv             [32, 64, 3, 2]                
  4                  -1  2     49664  ultralytics.nn.modules.block.C2f             [64, 64, 2, True]             
  5                  -1  1     73984  ultralytics.nn.modules.conv.Conv             [64, 128, 3, 2]               
  6                  -1  2    197632  ultralytics.nn.modules.block.C2f             [128, 128, 2, True]           
  7                  -1  1    295424  ultralytics.nn.modules.conv.Conv             [128

### BDD100K Weights

**Transfer Weights**

In [None]:
def get_layer_keys(state_dict_keys, layer_index):
    layer_keys = [key for key in state_dict_keys if key.startswith(f'{layer_index}.')]
    return layer_keys

def transfer_weights_by_index(pretrained_model, new_model, indices):
    # Extract state dicts
    pretrained_dict = pretrained_model.state_dict()
    new_dict = new_model.state_dict()

    # Iterate over the specified indices and transfer weights
    for index in indices:
        # Construct the layer names based on index
        layers = get_layer_keys(new_dict,index)

        for l in layers:
            try:
              new_dict[l] = pretrained_dict[l]
            except Exception as e:
              print(e)
    # Update new model's state dict with transferred weights
    new_model.load_state_dict(new_dict)

# layer index to transfer weights to
indices = list(range(46))
remove = [23 +i for i in range(0,23,3)] # skip concat_dropout weights
remove += list(range(10,22,1)) # skip detection Neck
indices = [x for x in indices if x not in remove]

transfer_weights_by_index(model.model.model, RailModel.model.model, indices)

In [None]:
# model = "/content/drive/MyDrive/Stage 2024/weights/last.pt" # @param ["/content/drive/MyDrive/Stage 2024/weights/best.pt", "/content/drive/MyDrive/Stage 2024/weights/last.pt"]
# model = YOLO(model)

In [None]:
sys.path.insert(0, "/content/YOLOv8-multi-task/ultralytics")
data_path = os.path.join("YOLOv8-multi-task/ultralytics/datasets", 'railsem19.yaml')
RailModel.train(data=data_path,
                batch=16,
                epochs=20,
                imgsz=(640,640),
                name='/content/drive/MyDrive/Stage 2024/RailModel',
                val=True,
                task='multi')

New https://pypi.org/project/ultralytics/8.2.74 available 😃 Update with 'pip install -U ultralytics'
Ultralytics YOLOv8.0.105 🚀 Python-3.10.12 torch-2.3.1+cu121 CUDA:0 (Tesla T4, 15102MiB)
[34m[1myolo/engine/trainer: [0mtask=multi, mode=train, model=/content/YOLOv8-multi-task/ultralytics/models/v8/railsem19.yaml, data=YOLOv8-multi-task/ultralytics/datasets/railsem19.yaml, epochs=20, patience=50, batch=16, imgsz=(640, 640), save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=/content/drive/MyDrive/Stage 2024/RailModel, exist_ok=False, pretrained=False, optimizer=SGD, verbose=True, seed=0, deterministic=True, single_cls=False, combine_class=None, rect=False, cos_lr=False, close_mosaic=0, resume=False, amp=True, overlap_mask=True, mask_ratio=1, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, speed=False, source=None, show=False, save_txt=False, save_conf=False, save_

KeyboardInterrupt: 

In [None]:
# Load a model
model = YOLO("/content/drive/MyDrive/Stage 2024/RailModel/weights/last.pt")  # load a partially trained model
# Resume training
results = model.train(resume=True,
                      plots=False)

$$$$