In [41]:
import torch
from ultralytics import YOLO
import cv2
import math 
import requests
import zipfile
from colorama import Fore
# import plotly
import plotly.express as px
import numpy as np

# import scale image from ultralytics
from ultralytics.utils.ops import scale_image

## Note: Roboflow attempts to download opencv-python-headless, which is known to break opencv. Only pull the dataset in through a generic request

In [42]:
zip_from = "dataset.zip"
zip_to = "dataset_original"

## Only needs to be run sometimes

In [None]:
params = {
    'key': 'K1gS1XvFY3',
}

response = requests.get('https://app.roboflow.com/ds/7qMTgZgyWQ', params=params)

# save the response
with open(zip_from, 'wb') as f:
    f.write(response.content)


with zipfile.ZipFile(zip_from, 'r') as zip_ref:
    zip_ref.extractall(zip_to)

In [43]:
torch.cuda.is_available()

True

In [44]:
# train a yolo model based on the dataset

# model for segmentation
model = YOLO('yolov8n-seg.pt')

results = model.train(data=f"/home/forsythcreations/git/ECE4554_Project/initial/rpc/{zip_to}/data.yaml", model="yolov8s.pt", task="segment", epochs=20, imgsz=640, batch=20)

Downloading https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-seg.pt to 'yolov8n-seg.pt'...
100%|██████████| 6.73M/6.73M [00:00<00:00, 34.9MB/s]
Ultralytics YOLOv8.0.208 🚀 Python-3.10.12 torch-1.12.1+cu102 CUDA:0 (NVIDIA GeForce RTX 2070, 7966MiB)
[34m[1mengine/trainer: [0mtask=segment, mode=train, model=yolov8s.pt, data=/home/forsythcreations/git/ECE4554_Project/initial/rpc/dataset_original/data.yaml, epochs=20, patience=50, batch=20, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=train9, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, show=False, save_txt=False, save

In [45]:
# use the results to predict the images
num = 9

# path to trained weights
weights_path = f"../../segment/train{num}/weights/best.pt"

new_model = YOLO(weights_path, task="segment")

In [None]:
# start webcam
cap = cv2.VideoCapture(0)
cap.set(3, 640)
cap.set(4, 480)

# object classes
classNames = ["rock", "paper", "scissors"]

while True:
    success, img = cap.read()
    results = new_model(img)

    # coordinates
    for r in results:
        x1, y1, x2, y2 = r  # Adjust this line based on the output structure of the detect function

        # convert to int values
        x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)

        # put box in cam
        cv2.rectangle(img, (x1, y1), (x2, y2), (255, 0, 255), 3)

        # confidence
        confidence = math.ceil((r.confidence * 100)) / 100

        # class name
        cls = int(r.class_id)
        class_name = classNames[cls]

        # object details
        org = [x1, y1 + 100]
        font = cv2.FONT_HERSHEY_SIMPLEX
        fontScale = 1
        color = (255, 0, 0)
        thickness = 2

        cv2.putText(img, class_name, org, font, fontScale, color, thickness)

    cv2.imshow('Webcam', img)
    if cv2.waitKey(1) == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

In [46]:
test_image = cv2.imread(f"{zip_to}/valid/images/testpaper01-13_png_jpg.rf.e5ef34d25961e6f3050f79361873389c.jpg")

# show the image in Jupyter notebook
test_image = cv2.cvtColor(test_image, cv2.COLOR_BGR2RGB)
fig = px.imshow(test_image)
fig.show()

In [75]:
def predict_on_image(model, img, conf = .3):
    result = model(img)[0]

    # detection
    # result.boxes.xyxy   # box with xyxy format, (N, 4)
    cls = result.boxes.cls.cpu().numpy()    # cls, (N, 1)
    probs = result.boxes.conf.cpu().numpy()  # confidence score, (N, 1)
    boxes = result.boxes.xyxy.cpu().numpy()   # box with xyxy format, (N, 4)

    # segmentation
    masks = result.masks.cpu().numpy()     # masks, (N, H, W)
    # masks = np.moveaxis(masks, 0, -1) # masks, (H, W, N)
    # # rescale masks to original image
    # masks = scale_image(masks, img.shape[0], img.shape[1])

    return boxes, masks, cls, probs

In [67]:
output = new_model(test_image)

output[0].masks.cpu().numpy();


0: 640x640 1 scissors, 5.2ms
Speed: 1.4ms preprocess, 5.2ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)


In [80]:
boxes, masks, cls, probs = predict_on_image(new_model, test_image)

print(masks.numpy())


0: 640x640 1 scissors, 5.2ms
Speed: 2.1ms preprocess, 5.2ms inference, 1.3ms postprocess per image at shape (1, 3, 640, 640)


AttributeError: 'Masks' object has no attribute 'xy'. See valid attributes below.

    A class for storing and manipulating detection masks.

    Attributes:
        xy (list): A list of segments in pixel coordinates.
        xyn (list): A list of normalized segments.

    Methods:
        cpu(): Returns the masks tensor on CPU memory.
        numpy(): Returns the masks tensor as a numpy array.
        cuda(): Returns the masks tensor on GPU memory.
        to(device, dtype): Returns the masks tensor with the specified device and dtype.
    

In [None]:
# start webcam
cap = cv2.VideoCapture(0)
cap.set(3, 640)
cap.set(4, 480)


# object classes
classNames = ["rock", "paper", "scissors"]

while True:
    success, img = cap.read()
    results = new_model.predict(img, stream=True)

    try: 
        print(type(results))

        # # coordinates
        # for r in results:
        #     boxes = r.boxes

        #     for box in boxes:
        #         # bounding box
        #         x1, y1, x2, y2 = box.xyxy[0]
        #         x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2) # convert to int values

        #         # put box in cam
        #         cv2.rectangle(img, (x1, y1), (x2, y2), (255, 0, 255), 3)

        #         # confidence
        #         confidence = math.ceil((box.conf[0]*100))/100
        #         # print("Confidence --->",confidence)

        #         # class name
        #         cls = int(box.cls[0])
        #         # print("Class name -->", classNames[cls])

        #         # object details
        #         org = [x1, y1 + 100]
        #         font = cv2.FONT_HERSHEY_SIMPLEX
        #         fontScale = 1
        #         color = (255, 0, 0)
        #         thickness = 2

        #         cv2.putText(img, classNames[cls], org, font, fontScale, color, thickness)

            # print(img.shape)
            # cv2.imshow('Webcam', img)
            # if cv2.waitKey(1) == ord('q'):
            #     break
    except Exception as e:
        print(f"{Fore.RED}Error: {e}{Fore.RESET}")
        break

cap.release()
cv2.destroyAllWindows()

In [None]:
cap.release()
cv2.destroyAllWindows()