## Setup

First, let's make sure we have the proper version installed. This notebook works in python 3. Import a few common modules.

In [1]:
from pkg_resources import parse_version
from PIL import Image

# Python ≥3.5 is required
import sys
assert sys.version_info >= (3,5) 

# Tensorflow ≥2.0 is required

# Common imports
import numpy as np
import os

# to make this notebook's output stable across runs
def reset_seed(seed=42):
    np.random.seed(seed)

# To plot pretty figures - source Aurelien Geron
import matplotlib
import matplotlib.pyplot as plt
plt.rcParams['axes.labelsize'] = 14
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12

# Where to save the figures
# Based on post by GM at https://stackoverflow.com/questions/53581278
if 'google.colab' in str(get_ipython()):
  print('Running on CoLab')
  from google.colab import drive
  # mount your google drive into colab
  # this is interactive unfortunately
  # must paste authorization code on prompt
  drive.mount('/content/drive') 
  PROJECT_ROOT_DIR = "drive/MyDrive/Colab Notebooks"
else:
  print('Not running on CoLab')
  PROJECT_ROOT_DIR = os.getcwd()

NB_ID = "project"

# create the directory if it does not exist
IMAGE_DIR = os.path.join(PROJECT_ROOT_DIR, "images", NB_ID)
os.makedirs(IMAGE_DIR, exist_ok = True)
        
def save_fig_nb(fig_id, tight_layout=True):
    path = os.path.join(IMAGE_DIR, fig_id + ".png")
    print("Saving figure", fig_id)
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format='png', dpi=300)
    
os.makedirs(os.path.join(PROJECT_ROOT_DIR,"tb_logs",NB_ID), exist_ok = True)

def get_logdir(add=''):
    import time
    log_id = time.strftime("log_%Y_%m_%d-%H_%M_%S")
    return os.path.join(PROJECT_ROOT_DIR,"tb_logs",NB_ID,log_id+add)

Not running on CoLab


A couple utility functions to plot grayscale and RGB images (source Aurelien Geron):

In [2]:
def plot_image(image):
    plt.imshow(image, cmap="gray", interpolation="nearest")
    plt.axis("off")

def plot_color_image(image):
    plt.imshow(image.astype(np.uint8),interpolation="nearest")
    plt.axis("off")

## Import tfds and other libraries

In [3]:
import os
import tensorflow as tf
import tensorflow_datasets as tfds
import numpy as np
# flow_vis library implements Middlebury flow vector coloring
import flow_vis
# Maybe use flowpy instead which can read/write png and flo files in KITTI and Middleburry format respectively.
from flow_util import * 
import matplotlib.patches as patches
import matplotlib.pyplot as plt


# For some reason we seem to need to load tf onto the GPU first
dummy = tf.constant( 32 )
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))

2023-11-10 03:22:30.104142: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-11-10 03:22:30.154219: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-11-10 03:22:30.154805: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
  from .autonotebook import tqdm as notebook_tqdm


Num GPUs Available:  0


2023-11-10 03:22:32.974843: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-11-10 03:22:33.022353: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1960] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


## Data Loading

The dataset is packaged as a Tensorflow dataset in three sub categories.


In [4]:
tfds.disable_progress_bar()

print(PROJECT_ROOT_DIR)

data_path = os.path.join(PROJECT_ROOT_DIR, 'tensorflow_data')
os.makedirs(data_path, exist_ok = True)

/home/luminatech/Documents/uOttawa/Computer_Vision/Tabinng and Testing/Testing_and_Training


# YoloV8 dataset

## Images dataset
Exporting images from tfrecords and splitting into train, validation, and test datasets in different folders

In [5]:
tfds.disable_progress_bar()

print("Using directory: ", data_path)

# Dataset names for each fold
fold_names = ['fixed_random_rotate', 'linear_movement_rotate', 'rotation_rotate']

# Create a dictionary to store the datasets for each fold
fold_datasets = {}

for fold_name in fold_names:
    # Load the dataset for the current fold
    dataset = tfds.load(f'elg7186_projectdata/{fold_name}', data_dir=data_path)
    fold_datasets[fold_name] = dataset

# Create a list to store all images in the fold
images_train = []
images_val = []
images_test = []
    
# Access images from each fold
for fold_name, dataset in fold_datasets.items():
    
    # Iterate over all examples in the fold
    vid_num = 0
    for example in dataset['train']:
        # Get the video from the example
        video = example['video']
        
        # Iterate over all frames in the video and add them to the list
        frame_num = 0
        for frame in video:
            if frame_num <16:
                images_train.append(frame.numpy())
                img = Image.fromarray(frame.numpy())
                img.save(f'./datasets/data/train/images/{fold_name}_video_{vid_num}_frame_{frame_num}.png')
            elif frame_num<18 :
                images_val.append(frame.numpy())
                img = Image.fromarray(frame.numpy())
                img.save(f'./datasets/data/val/images/{fold_name}_video_{vid_num}_frame_{frame_num}.png')
            else:
                images_test.append(frame.numpy())
                img = Image.fromarray(frame.numpy())
                img.save(f'./datasets/data/test/images/{fold_name}_video_{vid_num}_frame_{frame_num}.png')
                
            frame_num+=1
        vid_num += 1
    print(f"Total videos in {fold_name}: {len(dataset['train'])}")
        
print(f"Total images in train folder: {len(images_train)}")
print(f"Total images in val folder: {len(images_val)}")
print(f"Total images in test folder: {len(images_test)}")





Using directory:  /home/luminatech/Documents/uOttawa/Computer_Vision/Tabinng and Testing/Testing_and_Training/tensorflow_data




Total videos in fixed_random_rotate: 30
Total videos in linear_movement_rotate: 30
Total videos in rotation_rotate: 30
Total images in train folder: 1440
Total images in val folder: 180
Total images in test folder: 540


## Labels dataset
Exporting labels from tfrecords and splitting into train, validation, and test datasets in different folders

In [6]:
def find_index(input, id):
    # Function to find the index of 'id' in 'input'
    for i in range(len(input)):
        if input[i] == id:
            return i  
            break  

# Iterate through the 'fold_datasets' dictionary
for fold_name, dataset in fold_datasets.items():
    # Iterate through the 'train' dataset within each 'fold_name'
    vid_num = 0
    for example in dataset['train']:
        # Extract video name and type from example metadata
        video_name, video_type = get_video_names(example['metadata'])
        
        # Extract scale and offset from example metadata
        f_scale, f_offset = get_scale_offset(example['metadata'])
        
        # Extract number of frames and resolution from example metadata
        num_frames = int(example['metadata']['num_frames'])
        resolution = [example['metadata']['height'], example['metadata']['width']]
        
        # Print resolution
        print('resolution:', resolution)
        
        # Initialize arrays for x_mesh and y_mesh
        x_mesh = np.empty(1)
        y_mesh = np.empty(1)
        
        # Loop through each frame in the video
        for i in range(num_frames):
            
            # Get the image for the current frame
            image = example['video'][i, :, :, :]
            
            # Create a subplot for the boundimg boxes
            segmentation = example['segmentations'][i, :, :, :].numpy()

            if i<16:
                f= open(f"./datasets/data/train/labels/{fold_name}_video_{vid_num}_frame_{i}.txt","w+")
            elif i<18:
                g= open(f"./datasets/data/val/labels/{fold_name}_video_{vid_num}_frame_{i}.txt","w+")
            else:
                h= open(f"./datasets/data/test/labels/{fold_name}_video_{vid_num}_frame_{i}.txt","w+")
            # Loop through the instances in the frame
            for id in range(example['instances']['bbox_frames'].shape[0]):
                if i in example['instances']['bbox_frames'][id]:
                    t = find_index(example['instances']['bbox_frames'][id], i)
                    item = example['instances']['bboxes'][id, t, :]
                    cls_id = example['instances']['category'][id]
                    cls_name = example['instances']['asset_id'][id]
                    y_min = int(item[0] * float(resolution[0]))/example['metadata']['height']
                    x_min = int(item[1] * float(resolution[1]))/example['metadata']['width']
                    y_max = int(item[2] * float(resolution[0]))/example['metadata']['height']
                    x_max = int(item[3] * float(resolution[1]))/example['metadata']['width']
                    # Create a Rectangle patch for bounding box
                    rect = patches.Rectangle((x_min, y_min), x_max - x_min, y_max - y_min, linewidth=1, edgecolor='r', facecolor='none')
                    if i<16:
                        f.write(f"{cls_id} {(x_max-x_min)/2 + x_min} {(y_max-y_min)/2 + y_min} {(x_max-x_min)} {(y_max-y_min)}\n")
                    elif i<18:
                        g.write(f"{cls_id} {(x_max-x_min)/2 + x_min} {(y_max-y_min)/2 + y_min} {(x_max-x_min)} {(y_max-y_min)}\n")
                    else:
                        h.write(f"{cls_id} {(x_max-x_min)/2 + x_min} {(y_max-y_min)/2 + y_min} {(x_max-x_min)} {(y_max-y_min)}\n")
                else:
                    print("")  # empty line
        vid_num += 1


Video:  0003
Video type:  fixed_random_rotate_007
forward_flow range -16.59033203125 to 16.885251998901367
resolution: [<tf.Tensor: shape=(), dtype=int32, numpy=256>, <tf.Tensor: shape=(), dtype=int32, numpy=256>]


Video:  0006
Video type:  fixed_random_rotate_003
forward_flow range -15.715254783630371 to 53.34239959716797
resolution: [<tf.Tensor: shape=(), dtype=int32, numpy=256>, <tf.Tensor: shape=(), dtype=int32, numpy=256>]










Video:  0001
Video type:  fixed_random_rotate_003
forward_flow range -13.709030151367188 to 19.553253173828125
resolution: [<tf.Tensor: shape=(), dtype=int32, numpy=256>, <tf.Tensor: shape=(), dtype=int32, numpy=256>]








Video:  0008
Video type:  fixed_random_rotate_007
forward_flow range -14.55420207977295 to 18.55986213684082
resolution: [<tf.Tensor: shape=(), dtype=int32, numpy=256>, <tf.Tensor: shape=(), dtype=int32, numpy=256>]
Video:  0009
Video type:  fixed_random_rotate_001
forward_flow range -15.778400421142578 to 26.016069412231445
reso

# Model Training and Testing

In [7]:
pip install ultralytics

Note: you may need to restart the kernel to use updated packages.


### Initializing YOLOv8 Model with Ultralytics Library

In [2]:
from ultralytics import YOLO
model = YOLO("yolov8x.pt")

### Training YOLOv8 Model on the Kubric Dataset

In [4]:
model.train(data = "data.yaml", epochs = 10, batch = 8)

Ultralytics YOLOv8.0.208 🚀 Python-3.8.18 torch-2.0.1+cu117 CUDA:0 (NVIDIA GeForce RTX 2070 SUPER, 7972MiB)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolov8x.pt, data=data.yaml, epochs=10, patience=50, batch=8, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=train14, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, show=False, save_txt=False, save_conf=False, save_crop=False, show_labels=True, show_conf=True, vid_stride=1, stream_buffer=False, line_width=None, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, boxes=True, format=torchscript, k

ultralytics.utils.metrics.DetMetrics object with attributes:

ap_class_index: array([ 0,  1,  3,  6,  9, 10, 11, 12, 13, 14, 16])
box: ultralytics.utils.metrics.Metric object
confusion_matrix: <ultralytics.utils.metrics.ConfusionMatrix object at 0x7f09b01ba8e0>
curves: ['Precision-Recall(B)', 'F1-Confidence(B)', 'Precision-Confidence(B)', 'Recall-Confidence(B)']
curves_results: [[array([          0,    0.001001,    0.002002,    0.003003,    0.004004,    0.005005,    0.006006,    0.007007,    0.008008,    0.009009,     0.01001,    0.011011,    0.012012,    0.013013,    0.014014,    0.015015,    0.016016,    0.017017,    0.018018,    0.019019,     0.02002,    0.021021,    0.022022,    0.023023,
          0.024024,    0.025025,    0.026026,    0.027027,    0.028028,    0.029029,     0.03003,    0.031031,    0.032032,    0.033033,    0.034034,    0.035035,    0.036036,    0.037037,    0.038038,    0.039039,     0.04004,    0.041041,    0.042042,    0.043043,    0.044044,    0.045045,    0.

### Image Loading Function with OpenCV: Load Images from Folder

In [5]:
import cv2
import os

def load_images_from_folder(folder):
    """
    Load images from a specified folder.

    Parameters:
    - folder (str): Path to the folder containing images.

    Returns:
    - images (list): List of images loaded from the folder.
    """
    images = []

    # Iterate through each file in the folder
    for filename in sorted(os.listdir(folder)):
        # Read the image using OpenCV
        img = cv2.imread(os.path.join(folder, filename))

        # Check if the image is not None (i.e., if it was successfully loaded)
        if img is not None:
            # Append the loaded image to the list
            images.append(img)

    return images


Downloading Tracker Configuration Files: bytetrack.yaml and botsort.yaml from Ultralytics Repository

In [17]:
!wget https://raw.githubusercontent.com/ultralytics/ultralytics/main/ultralytics/cfg/trackers/bytetrack.yaml
!wget https://raw.githubusercontent.com/ultralytics/ultralytics/main/ultralytics/cfg/trackers/botsort.yaml

--2023-11-10 04:33:08--  https://raw.githubusercontent.com/ultralytics/ultralytics/main/ultralytics/cfg/trackers/bytetrack.yaml
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 2606:50c0:8003::154, 2606:50c0:8001::154, 2606:50c0:8002::154, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|2606:50c0:8003::154|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 694 [text/plain]
Saving to: ‘bytetrack.yaml’


2023-11-10 04:33:08 (7.03 KB/s) - ‘bytetrack.yaml’ saved [694/694]

--2023-11-10 04:33:09--  https://raw.githubusercontent.com/ultralytics/ultralytics/main/ultralytics/cfg/trackers/botsort.yaml
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 2606:50c0:8003::154, 2606:50c0:8001::154, 2606:50c0:8002::154, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|2606:50c0:8003::154|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 890 [text/plain]
Saving to: ‘botsort.ya

YOLOv8 Object Tracking and Visualization: Using bytetrack and botsort Trackers on Test Images

In [18]:
# Load images from the specified folder
images = load_images_from_folder('./datasets/data/test/images')

# Initialize YOLOv8 model with a specified checkpoint
model = YOLO("runs/detect/train14/weights/best.pt")

# Lists to store annotated frames for each tracker
video_byte = []
video_bot = []

# Loop through each image in the dataset
for image in images:
    # Run YOLOv8 tracking on the frame, persisting tracks between frames
    results_byte = model.track(image, persist=True, save=True, save_crop=True, project="runs/detect", name="inference", exist_ok=True, tracker='bytetrack.yaml')
    results_bot = model.track(image, persist=True, save=True, save_crop=True, project="runs/detect", name="inference", exist_ok=True, tracker='botsort.yaml')
    
    # Visualize the results on the frame
    annotated_frame_byte = results_byte[0].plot()
    annotated_frame_bot = results_bot[0].plot()
    
    # Append annotated frames to the respective lists
    video_byte.append(annotated_frame_byte)
    video_bot.append(annotated_frame_bot)
    
    # Display the annotated frame
    cv2.imshow("YOLOv8 Tracking using bytetrack tracker", annotated_frame_byte)
    cv2.imshow("YOLOv8 Tracking using botsort tracker", annotated_frame_bot)

# Create video writers for the output videos
out_byte = cv2.VideoWriter('project_byte.avi', cv2.VideoWriter_fourcc(*'DIVX'), 8, (256, 256))
out_bot = cv2.VideoWriter('project_bot.avi', cv2.VideoWriter_fourcc(*'DIVX'), 8, (256, 256))

# Write annotated frames to the output videos
for i in range(len(video_byte)):
    out_byte.write(video_byte[i])
    out_bot.write(video_bot[i])

# Release the video writers
out_byte.release()
out_bot.release()



0: 640x640 1 Consumer Goods, 1 None, 1 Shoe, 1 Toys, 43.2ms
Speed: 2.1ms preprocess, 43.2ms inference, 1.4ms postprocess per image at shape (1, 3, 640, 640)
Results saved to [1mruns/detect/inference[0m

0: 640x640 1 Consumer Goods, 1 None, 1 Shoe, 1 Toys, 43.3ms
Speed: 2.0ms preprocess, 43.3ms inference, 1.7ms postprocess per image at shape (1, 3, 640, 640)
Results saved to [1mruns/detect/inference[0m

0: 640x640 1 Consumer Goods, 1 None, 1 Shoe, 1 Toys, 35.9ms
Speed: 1.4ms preprocess, 35.9ms inference, 1.3ms postprocess per image at shape (1, 3, 640, 640)
Results saved to [1mruns/detect/inference[0m

0: 640x640 1 Consumer Goods, 1 None, 2 Shoes, 1 Toys, 36.5ms
Speed: 1.5ms preprocess, 36.5ms inference, 1.2ms postprocess per image at shape (1, 3, 640, 640)
Results saved to [1mruns/detect/inference[0m

0: 640x640 1 Consumer Goods, 1 None, 2 Shoes, 1 Toys, 37.4ms
Speed: 1.5ms preprocess, 37.4ms inference, 1.2ms postprocess per image at shape (1, 3, 640, 640)
Results saved to [1