In [1]:
import torch
import os
from ultralytics import YOLO
import cv2
import math 
import zipfile
import matplotlib.pyplot as plt
import plotly.express as px

torch.cuda.is_available()

zip_from = "dataset.zip"
zip_to = "dataset_original"


  from .autonotebook import tqdm as notebook_tqdm


# Let's Start Small

In [2]:
model = YOLO('yolov8n.pt')

success = True
frame = cv2.imread('room.jpg')
objects = []
if success:
    results = model(frame, verbose = False)
    for result in results:
        for i in range(int(result.boxes.cls.shape[0])):
            name = result.names[int(result.boxes.cls[i])]
            print(f"Found object: {name}")
            objects.append(name)

    annotated_frame = results[0].plot()
    print(objects)
    fig = px.imshow(annotated_frame)
    fig.show()

Found object: couch
Found object: chair
Found object: tv
Found object: chair
Found object: microwave
Found object: couch
Found object: vase
Found object: chair
Found object: book
Found object: chair
Found object: book
Found object: dining table
['couch', 'chair', 'tv', 'chair', 'microwave', 'couch', 'vase', 'chair', 'book', 'chair', 'book', 'dining table']


In [5]:
# Making the above functional 

def observe(img, model):
    success = True
    objects = []
    if success:
        results = model(img, verbose = False)
        for result in results:
            for i in range(int(result.boxes.cls.shape[0])):
                name = result.names[int(result.boxes.cls[i])]
                print(".", end = " ")
                objects.append(name)

        annotated_frame = results[0].plot()
        print(f"\n{objects}")
        fig = px.imshow(annotated_frame)
        fig.show()
        return annotated_frame, objects

model = YOLO('yolov8n.pt')
frame = cv2.imread('room.jpg')
observe(frame, model);

# trying second model
num = 10
weights_path = f"../../detect/train{num}/weights/best.pt"
model1 = YOLO(weights_path)
test_image = cv2.imread(f"{zip_to}/train/images/paper01-040_png_jpg.rf.61ba553838ec13a527024f4a901c4ecf.jpg")
observe(test_image, model1);


. . . . . . . . . . . . 
['couch', 'chair', 'tv', 'chair', 'microwave', 'couch', 'vase', 'chair', 'book', 'chair', 'book', 'dining table']



[]


In [None]:
# Extract the zip file

with zipfile.ZipFile(zip_from, 'r') as zip_ref:
    zip_ref.extractall(zip_to)

In [None]:
# Get all the files in a directory

directories = ["test", "train", "valid"]
sub_directories = ["rock", "paper", "scissors"]

for directory in directories:
    for dir_index, sub_directory in enumerate(sub_directories):
        path = os.path.join(zip_to, directory, sub_directory)
        files = os.listdir(path)
        # iterate over all the files
        for idx, file in enumerate(files):
            # read the image
            # print(file)
            # get the extension of the file
            extension = file.split(".")[-1]
            if (extension == "jpg"):
                # rename the file with the directory name and the idx
                new_name = sub_directory + "_" + str(idx) + ".jpg"
                # rename the file
                os.rename(os.path.join(path, file), os.path.join(path, new_name))
                # create an annotation file
                annotation_file = open(os.path.join(path, new_name.split(".")[0] + ".txt"), "w")
                # write the string Each row is class x_center y_center width height format.
                # class 0 0 11
                annotation_file.write(f"{dir_index} 0.5 0.5 .8 .8")
                # close the file
                annotation_file.close()

                # # create a flipped image and rename it, along the vertical axis
                # # read the image
                # image = cv2.imread(os.path.join(path, new_name))
                # # invert the image
                # image = cv2.flip(image, 0)
                # # save the image
                # cv2.imwrite(os.path.join(path, "flipped_" + new_name), image)
                # # create an annotation file
                # annotation_file = open(os.path.join(path, "flipped_" + new_name.split(".")[0] + ".txt"), "w")
                # # write the string Each row is class x_center y_center width height format.
                # # class 0 0 11
                # annotation_file.write(f"{dir_index} 0.5 0.5 .6 .8")
                # # close the file
                # annotation_file.close()

In [3]:
# train a yolo model based on the dataset

model = YOLO('yolov8n.pt', task="classify")

# larger base model
# model = YOLO('yolov5l.pt')

# model = YOLO('rps.yaml', task="classify")

data_path = f"/home/forsythcreations/git/ECE4554_Project/initial/rpc/{zip_to}/data.yaml"
print(data_path)
results = model.train(data=data_path, model="yolov8n.pt", epochs=15, imgsz=640, batch=30, pretrained=True, verbose=False);

Ultralytics YOLOv8.0.208 🚀 Python-3.10.12 torch-1.12.1+cu102 CUDA:0 (NVIDIA GeForce RTX 2070, 7966MiB)


/home/forsythcreations/git/ECE4554_Project/initial/rpc/dataset_original/data.yaml


[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolov8n.pt, data=/home/forsythcreations/git/ECE4554_Project/initial/rpc/dataset_original/data.yaml, epochs=15, patience=50, batch=30, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=train10, exist_ok=False, pretrained=True, optimizer=auto, verbose=False, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, show=False, save_txt=False, save_conf=False, save_crop=False, show_labels=True, show_conf=True, vid_stride=1, stream_buffer=False, line_width=None, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, boxes=True, format=torchscript, keras=False, optimize=False, int8=

In [None]:
# use the results to predict the images
num = 7

# path to trained weights
weights_path = f"../../detect/train{num}/weights/best.pt"

custom_model = YOLO(weights_path, task="classify")

In [None]:
# Produces am image to be rendered by CV using the applied model

classNames = ["rock", "paper", "scissors"]

def produce_image(r, img):
    confidence = 0
    for r in results:
        boxes = r.boxes

        for box in boxes:
            # bounding box
            x1, y1, x2, y2 = box.xyxy[0]
            x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2) # convert to int values

            # put box in cam
            cv2.rectangle(img, (x1, y1), (x2, y2), (255, 0, 255), 3)

            # confidence
            confidence = math.ceil((box.conf[0]*100))/100
            # print("Confidence --->",confidence)

            # class name
            cls = int(box.cls[0])
            # print("Class name -->", classNames[cls])

            # object details
            org = [x1, y1 + 100]
            font = cv2.FONT_HERSHEY_SIMPLEX
            fontScale = 1
            color = (255, 0, 0)
            thickness = 2

            cv2.putText(img, classNames[cls], org, font, fontScale, color, thickness)

    return confidence, img


# custom_model = YOLO('yolov8n.pt')

# load an image
test_image = cv2.imread(f"{zip_to}/train/images/paper01-040_png_jpg.rf.61ba553838ec13a527024f4a901c4ecf.jpg")

results = custom_model(test_image, verbose = False)

# # predict the image
# out = produce_image(custom_model(test_image, verbose = False), test_image)[1]

# test_image = cv2.cvtColor(out, cv2.COLOR_BGR2RGB)
# fig = px.imshow(test_image)
# fig.show()

# Process results list
for result in results:
    boxes = result.boxes  # Boxes object for bbox outputs
    masks = result.masks  # Masks object for segmentation masks outputs
    keypoints = result.keypoints  # Keypoints object for pose outputs
    probs = result.probs  # Probs object for classification outputs

    print(f"Boxes: {boxes} - Masks: {masks} - Keypoints: {keypoints} - Probs : {probs}" )

In [None]:
# start webcam
cap = cv2.VideoCapture(0)
cap.set(3, 640)
cap.set(4, 480)


# object classes


while True:
    success, img = cap.read()
    results = new_model(img, stream=True)

    output = produce_image(new_model.predict(img), img)[1]


    cv2.imshow('Webcam', output)
    if cv2.waitKey(1) == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

In [None]:
cap.release()
cv2.destroyAllWindows()