In [None]:
!pip install ultralytics

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting ultralytics
  Downloading ultralytics-8.0.83-py3-none-any.whl (529 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m529.2/529.2 kB[0m [31m14.0 MB/s[0m eta [36m0:00:00[0m
Collecting sentry-sdk
  Downloading sentry_sdk-1.20.0-py2.py3-none-any.whl (198 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m198.8/198.8 kB[0m [31m18.6 MB/s[0m eta [36m0:00:00[0m
Collecting thop>=0.1.1
  Downloading thop-0.1.1.post2209072238-py3-none-any.whl (15 kB)
Installing collected packages: sentry-sdk, thop, ultralytics
Successfully installed sentry-sdk-1.20.0 thop-0.1.1.post2209072238 ultralytics-8.0.83


In [None]:
from ultralytics import YOLO
from itertools import chain 
import cv2
from google.colab.patches import cv2_imshow
from PIL import Image, ImageDraw, ImageFont
import numpy as np
from numpy import savetxt
from ultralytics.yolo.utils.ops import scale_image
import imageio

# Useful Resources
# Yolo Docs
# https://www.youtube.com/watch?v=m9fH9OWn8YM&ab_channel=Computervisionengineer
# https://docs.ultralytics.com/yolov5/train_custom_data/?h=#13-organize-directories

def generate_yaml_file():
    # YAML files are like configuration files for your dataset
    # They contain information about the dataset, like its location
    # and more importantly, the various classes and labels in the dataset
    # Yolo can generate the YAML file for you, if you tell it where the data is
    pass


def create_new_model(name = 'yolov8n.yaml', task = "segment"):
    # Create a new YOLO model from scratch (if you don't want to use the Yolo pretrained Models)
    # default task is "detect". 
    model = YOLO(name, task)    # contains the hyper-parameters, class labels, etc.
    return model


def load_pretrained_model(model_name, task = "segment"):
    # Load a pretrained YOLO model  e.g. yolov8n.pt or your own model
    # You can pass in either a .yaml or .pt file into the YOLO constructor
    # I believe .yaml creates a new model, .pt file loads a model
    model = YOLO(model_name, task)  # actual pretrained model
    return model


def train_model(model, dataset='coco128-seg.yaml', epochs = 10):
    # Train your model on a custom dataset
    # Training the model generates two .pt files (best.pt and last.pt) 
    # in the runs/'task_type'/train/weights folder
    # We can later use any of these to reload the model, or resume training (best.pt is recommended)
    # Training createsa a folder called 'train' which also contains all the results and graphs (showing loss)
    results = model.train(data=dataset, epochs=epochs)
    return model


def evaluate_model(model):
    # Evaluate the model's performance on the validation set
    results = model.val()


def export_model(model):
    # Exports the model (default is .torchscript)
    # By using the format parameter, you can export it to other model types (e.g TensorRT)
    # Don't use this for 'saving' the model. That is done during training automatically.
    success = model.export(format="onnx")


def get_labels_per_frame(results, unique=False):
    """
    This function returns a list of classes that have appeared in a single frame.

    params:
        unique: This parameter determines whether the classes in the return value will be unique or not. 

    returns:
        A list of tuples, in the format: (class_id, class_name)
    """
    labels = []
    all_labels = results[0].names

    if unique:
        for result in results:
            print(set(result.boxes.cls.tolist()))
            labels.append([(num, all_labels[num]) for num in set(result.boxes.cls.tolist())])
    else:
        for result in results:
            labels.append([(num, all_labels[num]) for num in result.boxes.cls.tolist()])
    return sorted(labels, key=lambda x: x[0])


def get_labels_per_video(results):
    """
    This function returns a list of all unique classes that have appeared in this video.

    returns:
        A list of tuples, in the format: (class_id, class_name)
    """
    labels = get_labels_per_frame(results)
    labels = list(set((chain.from_iterable(labels))))
    return sorted(labels, key=lambda x: x[0])


def get_object_from_image(result, classes='all', save=True, mask=False, background_path=None):
    """
    This function isolates an object(s) from the background using its masks in a frame.

    params:
        result: The result for a single frame
        classes: A list which defines which classes should be included in the output image. 
        save: Boolean value which defines whether to save image to the disk
    returns:
        A numpy image array
    """

    background = result.orig_img
    mask_image = np.zeros((background.shape[0], background.shape[1]), dtype=np.uint8)
    
    if result.masks != None:  # If no masks are found, return a black image
        for index, mask in enumerate(result.masks.xy):
            class_id = result.boxes.cls.tolist()[index]

            # Filter classes if required, and draw the object onto the mask
            if classes=='all' or class_id in classes:
                mask = np.round(mask, decimals=0)
                mask = mask.astype(int)
                cv2.fillPoly(mask_image, [mask], (255, 255, 255))

        # Apply the mask containing all our required classes to the background image to extract the segmented region
        segmented_region = cv2.cvtColor(cv2.bitwise_and(background, background, mask=mask_image), cv2.COLOR_BGR2RGB)
        
        if background_path:
             new_background = np.array(Image.open(background_path).resize((result.orig_img.shape[1], result.orig_img.shape[0])))
             mask = np.all(segmented_region == [0, 0, 0], axis=-1)
             segmented_region[mask] = new_background[mask]
        image = Image.fromarray(segmented_region)
        if save:
            image.save(f'output.png')
        return np.array(image)
    if background_path:
        return np.array(Image.open(background_path).resize((result.orig_img.shape[1], result.orig_img.shape[0])))
    return np.zeros_like(background)


def get_colormap():
    return [
    (128, 0, 0), # red
    (0, 128, 0), # green
    (128, 128, 0), # yellow
    (0, 0, 128), # blue
    (128, 0, 128), # purple
    (0, 128, 128), # cyan
    (128, 128, 128), # silver
    (64, 0, 0) # brown
  ]


def get_object_mask_from_image(result, classes='all', color_dict=None, save=True, background_path=None):
    """
    This function isolates an object(s)'s masks from the background using its masks in a frame.

    params:
        result: The result for a single frame
        classes: A list which defines which classes should be included in the output image. 
        save: Boolean value which defines whether to save image to the disk
    returns:
        A numpy image array
    """
    if classes == 'all':
        classes = result.boxes.cls.tolist()

    if color_dict == None:
        colors = get_colormap()
        color_dict = {classes[i]: colors[i % len(colors)] for i in range(len(classes))}
    else:
        color_dict = color_dict
    print(color_dict)
    
    background = np.zeros((result.orig_img.shape[0], result.orig_img.shape[1], 3), dtype=np.uint8)
    image = Image.fromarray(background)
    draw = ImageDraw.Draw(image)
    mask_image = np.zeros((result.orig_img.shape[0], result.orig_img.shape[1]), dtype=np.uint8)

    if result.masks != None:  # If no masks are found, return a black image
        for index, mask in enumerate(result.masks.xy):
            class_id = result.boxes.cls.tolist()[index]
            if class_id not in classes: continue

            draw.polygon(mask, outline="white", fill=color_dict[class_id])
        if save:
            image.save(f'output.png')
        return np.array(image)
    return np.zeros_like(background)


def get_object_from_video(results, path="output.mp4", classes="all", mask=False, background=None):
    """
    This function creates a video which isolates all the required classes, 
    and removes the background and other irrelvant classes.

    params:
        results: The list of results (output from the model prediction)
        path: Name or file path of the output video
        classes: list of classes to isolate and extract. By default, all classes are extracted.
    returns:
        None
    """
    frames = []

    if mask == False:
        for index, result in enumerate(results):
            frame = get_object_from_image(result, classes=classes, save=False, background_path=background)
            frames.append(frame)
    else:
        colors = get_colormap()
        classes = [x[0] for x in get_labels_per_video(results)]
        print("c", classes)
        colors = {classes[i]: colors[i % len(colors)] for i in range(len(classes))}
        for index, result in enumerate(results):
            frame = get_object_mask_from_image(result, classes=classes, color_dict=colors, save=False, background_path=background)
            frames.append(frame)        
    imageio.mimsave(path, frames, fps=24, quality=8, codec='h264')




In [None]:
#### Creating a new model
# model = create_new_model("yolov8s-seg.pt", "segment")
# train_model(model)
#detection_output = model.predict(source="input.mp4", save = True, conf=0.3) 
# https://www.youtube.com/watch?v=g0lMymp-FUc

#### Loading an existing model for predicting
model = load_pretrained_model('/content/drive/MyDrive/Projects/SEM 6/DLP/project/models/YOLOv8s-DAVISv3/50/last.pt', "segment")
detection_output = model.predict(source="/content/drive/MyDrive/Projects/SEM 6/DLP/project/videos/input1.mp4", conf=0.3, stream=True, save=True) 


### Loading an existing model for further training
# model = load_pretrained_model('runs/segment/train/weights/best.pt', "segment")
# train_model(model)
# detection_output = model.predict(source="dance.mp4", conf=0.3, save=True) 



#### MISC. INFO

# PyTorch models (.pt) can be used to train, val, predict and export, i.e. 'yolo export model=yolov8n.pt', 
# but exported formats like ONNX, TensorRT etc. only support 'predict' and 'val' modes, 
# i.e. 'yolo predict model=yolov8n.onnx'.

# You can directly use the model class, and also pull images from URLs automatically.
# results = model('https://ultralytics.com/images/bus.jpg')
# detection_output = model.predict(source="https://ultralytics.com/images/bus.jpg", conf=0.25, save=True) 

In [None]:
get_object_from_video(detection_output, mask=False)


video 1/1 (1/247) /content/drive/.shortcut-targets-by-id/1ixoytuMSeccKm2JZgDjIYgyAFRW2oZRR/project/videos/input1.mp4: 384x640 (no detections), 842.0ms
video 1/1 (2/247) /content/drive/.shortcut-targets-by-id/1ixoytuMSeccKm2JZgDjIYgyAFRW2oZRR/project/videos/input1.mp4: 384x640 (no detections), 805.2ms
video 1/1 (3/247) /content/drive/.shortcut-targets-by-id/1ixoytuMSeccKm2JZgDjIYgyAFRW2oZRR/project/videos/input1.mp4: 384x640 (no detections), 593.4ms
video 1/1 (4/247) /content/drive/.shortcut-targets-by-id/1ixoytuMSeccKm2JZgDjIYgyAFRW2oZRR/project/videos/input1.mp4: 384x640 (no detections), 489.7ms
video 1/1 (5/247) /content/drive/.shortcut-targets-by-id/1ixoytuMSeccKm2JZgDjIYgyAFRW2oZRR/project/videos/input1.mp4: 384x640 (no detections), 507.0ms
video 1/1 (6/247) /content/drive/.shortcut-targets-by-id/1ixoytuMSeccKm2JZgDjIYgyAFRW2oZRR/project/videos/input1.mp4: 384x640 (no detections), 482.4ms
video 1/1 (7/247) /content/drive/.shortcut-targets-by-id/1ixoytuMSeccKm2JZgDjIYgyAFRW2oZRR/pr

In [None]:
get_object_mask_from_image(detection_output[70])

{30.0: (128, 0, 0)}


array([[[  0,   0,   0],
        [  0,   0,   0],
        [  0,   0,   0],
        ...,
        [255, 255, 255],
        [  0,   0,   0],
        [  0,   0,   0]],

       [[  0,   0,   0],
        [  0,   0,   0],
        [  0,   0,   0],
        ...,
        [255, 255, 255],
        [  0,   0,   0],
        [  0,   0,   0]],

       [[  0,   0,   0],
        [  0,   0,   0],
        [  0,   0,   0],
        ...,
        [255, 255, 255],
        [  0,   0,   0],
        [  0,   0,   0]],

       ...,

       [[  0,   0,   0],
        [  0,   0,   0],
        [  0,   0,   0],
        ...,
        [255, 255, 255],
        [  0,   0,   0],
        [  0,   0,   0]],

       [[  0,   0,   0],
        [  0,   0,   0],
        [  0,   0,   0],
        ...,
        [  0,   0,   0],
        [  0,   0,   0],
        [  0,   0,   0]],

       [[  0,   0,   0],
        [  0,   0,   0],
        [  0,   0,   0],
        ...,
        [  0,   0,   0],
        [  0,   0,   0],
        [  0,   0,   0]]

In [None]:
def make_video_from_frames(output_folder, file_name, sorted_frames_paths, fps = 30):
  if not os.path.exists(output_folder):
    raise FileNotFoundError("Path {} does not exist".format(output_folder))
  output_path = os.path.join(output_folder, file_name)
  frames = [imageio.imread(f) for f in sorted_frames_paths]
  imageio.mimsave(output_path, frames, fps=30, quality=8, codec='h264')
  if not os.path.exists(output_path):
    raise FileNotFoundError(f"Could not create file {output_path}")
  return output_path