In [None]:
import numpy as np
import cv2

# Load the pre-trained YOLOv3 network
net = cv2.dnn.readNetFromDarknet("yolov3.cfg", "yolov3.weights")

# Load the COCO class labels
classes = []
with open("coco.names", "r") as f:
    classes = [line.strip() for line in f.readlines()]

# Define the input image size
input_size = (416, 416)

# Define the confidence threshold and non-maximum suppression threshold
conf_threshold = 0.5
nms_threshold = 0.4

# Load the input image and preprocess it
image = cv2.imread("input.jpg")
blob = cv2.dnn.blobFromImage(image, 1/255.0, input_size, swapRB=True, crop=False)

# Set the input to the network and forward pass
net.setInput(blob)
outputs = net.forward(net.getUnconnectedOutLayersNames())

# Perform post-processing to extract bounding boxes and confidence scores
boxes = []
confidences = []
class_ids = []
for output in outputs:
    for detection in output:
        scores = detection[5:]
        class_id = np.argmax(scores)
        confidence = scores[class_id]
        if confidence > conf_threshold:
            center_x = int(detection[0] * image.shape[1])
            center_y = int(detection[1] * image.shape[0])
            width = int(detection[2] * image.shape[1])
            height = int(detection[3] * image.shape[0])
            left = int(center_x - width / 2)
            top = int(center_y - height / 2)
            boxes.append([left, top, width, height])
            confidences.append(float(confidence))
            class_ids.append(class_id)

# Perform non-maximum suppression to remove overlapping bounding boxes
indices = cv2.dnn.NMSBoxes(boxes, confidences, conf_threshold, nms_threshold)

# Draw the final bounding boxes and class labels on the input image
for i in indices:
    i = i[0]
    box = boxes[i]
    left = box[0]
    top = box[1]
    width = box[2]
    height = box[3]
    label = f"{classes[class_ids[i]]}: {confidences[i]:.2f}"
    color = (0, 255, 0)
    cv2.rectangle(image, (left, top), (left + width, top + height), color, 2)
    cv2.putText(image, label, (left, top - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

# Show the output image
cv2.imshow("Output", image)
cv2.waitKey(0)
cv2.destroyAllWindows()


In [None]:
#The above but for video
"""
import cv2
import numpy as np

# Load the pre-trained YOLOv3 network and COCO class labels
net = cv2.dnn.readNetFromDarknet("yolov3.cfg", "yolov3.weights")
classes = []
with open("coco.names", "r") as f:
    classes = [line.strip() for line in f.readlines()]

# Define the input size, confidence threshold, and non-maximum suppression threshold
input_size = (416, 416)
conf_threshold = 0.5
nms_threshold = 0.4

# Initialize the video stream
cap = cv2.VideoCapture(0)

# Loop over frames from the video stream
while True:
    # Read the next frame from the video stream
    ret, frame = cap.read()
    if not ret:
        break

    # Preprocess the input frame for the neural network
    blob = cv2.dnn.blobFromImage(frame, 1/255.0, input_size, swapRB=True, crop=False)

    # Set the input to the neural network and perform a forward pass
    net.setInput(blob)
    outputs = net.forward(net.getUnconnectedOutLayersNames())

    # Perform post-processing to extract bounding boxes and confidence scores
    boxes = []
    confidences = []
    class_ids = []
    for output in outputs:
        for detection in output:
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]
            if confidence > conf_threshold:
                center_x = int(detection[0] * frame.shape[1])
                center_y = int(detection[1] * frame.shape[0])
                width = int(detection[2] * frame.shape[1])
                height = int(detection[3] * frame.shape[0])
                left = int(center_x - width / 2)
                top = int(center_y - height / 2)
                boxes.append([left, top, width, height])
                confidences.append(float(confidence))
                class_ids.append(class_id)

    # Perform non-maximum suppression to remove overlapping bounding boxes
    indices = cv2.dnn.NMSBoxes(boxes, confidences, conf_threshold, nms_threshold)

    # Draw the final bounding boxes and class labels on the input frame
    for i in indices:
        i = i[0]
        box = boxes[i]
        left = box[0]
        top = box[1]
        width = box[2]
        height = box[3]
        label = f"{classes[class_ids[i]]}: {confidences[i]:.2f}"
        color = (0, 255, 0)
        cv2.rectangle(frame, (left, top), (left + width, top + height), color, 2)
        cv2.putText(frame, label, (left, top - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

    # Show the output frame
    cv2.imshow("Object Detection", frame)

    # Exit the program if the 'q' key is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release resources and close windows
cap.release()
cv2.destroyAllWindows()
"""

## Training Process

In [None]:
# Step 1: Prepare your dataset
# You will need to prepare your dataset of images labeled with the objects you want to detect.

# Step 2: Download pre-trained weights
# You can download pre-trained weights for the backbone network, such as ResNet or DarkNet.

# Step 3: Define the model architecture
# You will need to define the architecture of your YOLO network. This will include the backbone network, the detection layers, and the loss function.

# Step 4: Compile the model
# You will need to compile the model with an optimizer and a loss function.
model.compile(optimizer='adam', loss='mse')

# Step 5: Train the model
# Train the model using the prepared dataset.
model.fit(train_images, train_labels, epochs=10, validation_data=(val_images, val_labels))

# Step 6: Evaluate the model
# Evaluate the performance of the trained model on the validation set.
model.evaluate(val_images, val_labels)

# Step 7: Fine-tune the model (optional)
# If the model is not performing well, you can fine-tune it by adjusting the hyperparameters or by adding more data to the training set.
