In [1]:
import torch
import os
from ultralytics import YOLO
import cv2
import math 
import zipfile

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
torch.cuda.is_available()

True

In [3]:
zip_from = "dataset.zip"
zip_to = "dataset_original"

In [4]:
# Extract the zip file

with zipfile.ZipFile(zip_from, 'r') as zip_ref:
    zip_ref.extractall(zip_to)

In [5]:
# Get all the files in a directory

directories = ["test", "train", "valid"]
sub_directories = ["rock", "paper", "scissors"]

for directory in directories:
    for dir_index, sub_directory in enumerate(sub_directories):
        path = os.path.join(zip_to, directory, sub_directory)
        files = os.listdir(path)
        # iterate over all the files
        for idx, file in enumerate(files):
            # read the image
            # print(file)
            # get the extension of the file
            extension = file.split(".")[-1]
            if (extension == "jpg"):
                # rename the file with the directory name and the idx
                new_name = sub_directory + "_" + str(idx) + ".jpg"
                # rename the file
                os.rename(os.path.join(path, file), os.path.join(path, new_name))
                # create an annotation file
                annotation_file = open(os.path.join(path, new_name.split(".")[0] + ".txt"), "w")
                # write the string Each row is class x_center y_center width height format.
                # class 0 0 11
                annotation_file.write(f"{dir_index} 0 0 1 1")
                # close the file
                annotation_file.close()

In [10]:
# train a yolo model based on the dataset

model = YOLO('yolov8n.pt')

# model = YOLO('rps.yaml', task="classify")

results = model.train(data='rps.yaml', epochs=10)

Ultralytics YOLOv8.0.208 🚀 Python-3.10.12 torch-1.12.1+cu102 CUDA:0 (NVIDIA GeForce RTX 2070, 7966MiB)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolov8n.pt, data=rps.yaml, epochs=10, patience=50, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=train8, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, show=False, save_txt=False, save_conf=False, save_crop=False, show_labels=True, show_conf=True, vid_stride=1, stream_buffer=False, line_width=None, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, boxes=True, format=torchscript, keras=

In [11]:
# use the results to predict the images

# path to trained weights
weights_path = "../runs/detect/train8/weights/best.pt"

new_model = YOLO(weights_path, task="detect")

In [12]:
# start webcam
cap = cv2.VideoCapture(0)
cap.set(3, 640)
cap.set(4, 480)


# object classes
classNames = ["rock", "paper", "scissors"]


while True:
    success, img = cap.read()
    results = new_model(img, stream=True)

    # coordinates
    for r in results:
        boxes = r.boxes

        for box in boxes:
            # bounding box
            x1, y1, x2, y2 = box.xyxy[0]
            x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2) # convert to int values

            # put box in cam
            cv2.rectangle(img, (x1, y1), (x2, y2), (255, 0, 255), 3)

            # confidence
            confidence = math.ceil((box.conf[0]*100))/100
            # print("Confidence --->",confidence)

            # class name
            cls = int(box.cls[0])
            # print("Class name -->", classNames[cls])

            # object details
            org = [x1, y1 + 100]
            font = cv2.FONT_HERSHEY_SIMPLEX
            fontScale = 1
            color = (255, 0, 0)
            thickness = 2

            cv2.putText(img, classNames[cls], org, font, fontScale, color, thickness)

    cv2.imshow('Webcam', img)
    if cv2.waitKey(1) == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


0: 480x640 (no detections), 4.3ms
Speed: 0.7ms preprocess, 4.3ms inference, 0.3ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 3.5ms
Speed: 0.9ms preprocess, 3.5ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 9.4ms
Speed: 1.5ms preprocess, 9.4ms inference, 1.1ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 6.5ms
Speed: 1.5ms preprocess, 6.5ms inference, 1.7ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 6.4ms
Speed: 1.4ms preprocess, 6.4ms inference, 0.6ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 6.1ms
Speed: 1.4ms preprocess, 6.1ms inference, 0.6ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 6.2ms
Speed: 1.4ms preprocess, 6.2ms inference, 0.6ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 6.4ms
Speed: 1.4ms preprocess, 6.4ms inference, 1.2ms 

In [9]:
cap.release()
cv2.destroyAllWindows()