In [1]:
import torch
import os
from ultralytics import YOLO
import cv2
import math 
import zipfile
import matplotlib.pyplot as plt
import plotly.express as px
import requests

torch.cuda.is_available()

zip_from = "dataset.zip"
zip_to = "dataset_original"


  from .autonotebook import tqdm as notebook_tqdm


# Let's Start Small

In [2]:
model = YOLO('yolov8n.pt')

success = True
frame = cv2.imread('room.jpg')
objects = []
if success:
    results = model(frame, verbose = False)
    for result in results:
        for i in range(int(result.boxes.cls.shape[0])):
            name = result.names[int(result.boxes.cls[i])]
            print(f"Found object: {name}")
            objects.append(name)

    annotated_frame = results[0].plot()
    print(objects)
    fig = px.imshow(annotated_frame)
    fig.show()

Found object: couch
Found object: chair
Found object: tv
Found object: chair
Found object: microwave
Found object: couch
Found object: vase
Found object: chair
Found object: book
Found object: chair
Found object: book
Found object: dining table
['couch', 'chair', 'tv', 'chair', 'microwave', 'couch', 'vase', 'chair', 'book', 'chair', 'book', 'dining table']


In [24]:
# Making the above functional 

def observe(img, model,  verbose = True, confidence_threshold = None, prioritize = False):
    success = True
    objects = []
    confidence = []
    filtered_results = []

    if success:
        results = model(img, verbose = False)
        for result in results:
            for i in range(int(result.boxes.cls.shape[0])):
                name = result.names[int(result.boxes.cls[i])]
                # Pull out the confidence values
                c = result.boxes.conf[i].item()
                objects.append(name)
                confidence.append({"name" : name, "confidence": c, "bounding_box" : result.boxes.xyxy[i].tolist()})

                if (confidence_threshold != None and c > confidence_threshold):
                    filtered_results.append(name)

        annotated_frame = results[0].plot()

        if (verbose):
            print(f"\n{objects}")
            fig = px.imshow(annotated_frame)
            fig.show()

        if (confidence_threshold != None):
            filtered_image = img.copy()
        else:
            filtered_image = annotated_frame.copy()

        filtered_objects = []

        if (not prioritize):
            for item in confidence:
                if (confidence_threshold != None and item["confidence"] > confidence_threshold):
                    filtered_objects.append(item["name"])
                    box = item["bounding_box"]
                    cv2.rectangle(filtered_image, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), (255, 0, 0), 2)
                    # Put the text in the center of the bounding box
                    text_x = int((box[0] + box[2])/2)
                    text_y = int((box[1] + box[3])/2)
                    # name and confidence
                    n_and_c = f"{item['name']} ({item['confidence']:.2f})"
                    cv2.putText(filtered_image, n_and_c, (text_x, text_y), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)
        else:
            # pull the single highest confidence from the list and draw it
            highest_confidence = 0
            highest_confidence_name = ""

            box = None

            for item in confidence:
                if (item["confidence"] > highest_confidence and item["confidence"] > confidence_threshold):
                    highest_confidence = item["confidence"]
                    highest_confidence_name = item["name"]
                    box = item["bounding_box"]

            if (box != None):
                filtered_objects.append(highest_confidence_name)
                cv2.rectangle(filtered_image, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), (255, 0, 0), 2)
                # Put the text in the center of the bounding box
                text_x = int((box[0] + box[2])/2)
                text_y = int((box[1] + box[3])/2)
                # name and confidence
                n_and_c = f"{highest_confidence_name} ({highest_confidence:.2f})"
                cv2.putText(filtered_image, n_and_c, (text_x, text_y), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)

        if (verbose):
            print(f"\n{filtered_objects}")
            fig = px.imshow(filtered_image)
            fig.show()

        return annotated_frame, objects, confidence, filtered_results, filtered_image, filtered_objects

model = YOLO('yolov8n.pt')
frame = cv2.imread('room.jpg')
a, o, c, f, fi, fo = observe(frame, model, confidence_threshold=0.80);


['couch', 'chair', 'tv', 'chair', 'microwave', 'couch', 'vase', 'chair', 'book', 'chair', 'book', 'dining table']



['couch']


In [None]:
c

In [None]:
# Next, we can grab the images from Roboflow and run them through the model.

url = "https://app.roboflow.com/ds/yrar1E7CeJ"

In [None]:
# Download tje zip

params = {
    'key': '5Gv32VL6Wj',
}

response = requests.get(url, params=params)

# save the response
with open(zip_from, 'wb') as f:
    f.write(response.content)

# Extract the zip file

with zipfile.ZipFile(zip_from, 'r') as zip_ref:
    zip_ref.extractall(zip_to)

In [4]:
# train a yolo model based on the dataset

model = YOLO('yolov8n.pt', task="detect")

# larger base model
# model = YOLO('yolov5l.pt')

# model = YOLO('rps.yaml', task="classify")

data_path = f"/home/forsythcreations/git/ECE4554_Project/initial/rpc/{zip_to}/data.yaml"
print(data_path)
results = model.train(model="yolov8n.pt", data=data_path, epochs=600, imgsz=640, patience = 60, batch=30);

Ultralytics YOLOv8.0.208 🚀 Python-3.10.12 torch-1.12.1+cu102 CUDA:0 (NVIDIA GeForce RTX 2070, 7966MiB)


/home/forsythcreations/git/ECE4554_Project/initial/rpc/dataset_original/data.yaml


[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolov8n.pt, data=/home/forsythcreations/git/ECE4554_Project/initial/rpc/dataset_original/data.yaml, epochs=600, patience=60, batch=30, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=train19, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, show=False, save_txt=False, save_conf=False, save_crop=False, show_labels=True, show_conf=True, vid_stride=1, stream_buffer=False, line_width=None, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, boxes=True, format=torchscript, keras=False, optimize=False, int8=

In [6]:
# use the results to predict the images
num = 19

# path to trained weights
weights_path = f"../../detect/train{num}/weights/best.pt"

custom_model = YOLO(weights_path)

In [7]:
test_image = cv2.imread(f"{zip_to}/train/images/paper01-017_png_jpg.rf.6c82af9b5e5bf5109221916f7270fd90.jpg")

_, _, c, _, _, _ = observe(test_image, custom_model, confidence_threshold=0.70);


['paper']



['paper']


In [26]:
# start webcam
cap = cv2.VideoCapture(0)
cap.set(3, 640)
cap.set(4, 480)


# object classes


while True:
    img = None
    success, img = cap.read()

    if (success):
        output, _, c, _, fi, fo = observe(img, custom_model, verbose = False, confidence_threshold=0.5, prioritize = True)

        # draw the fi
        
        cv2.imshow('Webcam', fi)
        if cv2.waitKey(1) == ord('q'):
            break

cap.release()
cv2.destroyAllWindows()

In [25]:
cap.release()
cv2.destroyAllWindows()

In [None]:
import datetime
import calendar

def getTime():
    return int(calendar.timegm(datetime.datetime.now().timetuple()))

In [None]:
import time
import os


os.makedirs("more_images", exist_ok=True)

# take a photo over second and save it to a folder

# start webcam
cap = cv2.VideoCapture(0)
cap.set(3, 640)
cap.set(4, 480)

num_of_photos = 20

photo_name = "paper"

# Get the current time in seconds
t = getTime()

counter = 0

while (counter < num_of_photos):
    success, img = cap.read()
    if (t + 2 < getTime() and success):
        print(f"{counter}", end = " ")
        cv2.imwrite(f"more_images/{photo_name}/{photo_name}_{counter}_v2.jpg", img)
        t = getTime()
        counter += 1
    cv2.imshow('Webcam', img)