In [15]:
from ultralytics import YOLO
import cv2
import matplotlib.pyplot as plt
import json
import matplotlib.patches as patches

garbage = YOLO('Garbage Models/bulk_garbage_seg.pt')

model = YOLO('yolov8m.pt')

# Initialize the video capture object with the input video file
cap = cv2.VideoCapture('Input/9_test_large.mp4')

# Get the frame width, frame height, and frames per second (fps) of the input video
frame_width = int(cap.get(3))
frame_height = int(cap.get(4))
fps = cap.get(5)

# Define the output video path
output_path = 'output/9_output_large.mp4'

# Define the video codec and create a VideoWriter object for writing processed frames
fourcc = cv2.VideoWriter_fourcc(*'XVID')  # You can change the codec as needed
out = cv2.VideoWriter(output_path, fourcc, fps, (frame_width, frame_height))

# Loop through each frame in the input video
while cap.isOpened():
    # Read a frame from the video
    success, frame = cap.read()
    
    # Check if reading the frame was successful
    if not success:
        print("Failed to read a frame from the video.")
        break
    
    # Check if the frame is not empty
    if not frame.any():
        print("Empty frame.")
        break
    
    # If reading the frame and the frame is not empty, continue processing
    if success:
        # Make a copy of the frame
        img = frame.copy()
        
        # masking each frame when something else is detected than garbage
        black = frame.copy()
        
        # Perform object detection using the first YOLO model to detect humans and block them
        results_1 = model.predict(frame, device=0)
        
        # Convert the detection results to JSON format
        annote_1_json = results_1[0].tojson()
        
        # Parse the JSON results
        res = json.loads(annote_1_json)
        
        # Iterate through the detected objects
        for i in res:
            # Extract the coordinates of the bounding box
            x1 = int(i.get('box').get('x1'))
            x2 = int(i.get('box').get('x2'))
            y1 = int(i.get('box').get('y1'))
            y2 = int(i.get('box').get('y2'))
            
            # Draw a black rectangle to block the detected human
            cv2.rectangle(black, (x1, y1), (x2, y2), (0, 0, 0), cv2.FILLED)
        
        # Perform garbage detection using the second YOLO model on the blocked image
        results_2 = garbage.predict(black, device=0,conf=0.5)
        
        # Get annotated image with garbage detections
        annote_2 = results_2[0].plot(img=img)
        
        # Write the annotated frame to the output video
        out.write(annote_2)
    else:
        break

# Release the video capture and video writer objects
cap.release()
out.release()


0: 384x640 (no detections), 33.5ms
Speed: 4.0ms preprocess, 33.5ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 34.3ms
Speed: 1.0ms preprocess, 34.3ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 25.0ms
Speed: 4.0ms preprocess, 25.0ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 31.2ms
Speed: 4.0ms preprocess, 31.2ms inference, 7.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 25.0ms
Speed: 3.0ms preprocess, 25.0ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 31.6ms
Speed: 3.0ms preprocess, 31.6ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 25.0ms
Speed: 2.0ms preprocess, 25.0ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 29.0ms
Speed: 3.0ms preprocess, 29.0ms i

In [16]:
from ultralytics import YOLO
import cv2
import matplotlib.pyplot as plt
import json
import matplotlib.patches as patches

garbage = YOLO('Garbage Models/bulk_garbage_seg.pt')

model = YOLO('yolov8m.pt')

# Initialize the video capture object with the input video file
cap = cv2.VideoCapture('Input/10_test_large.mp4')

# Get the frame width, frame height, and frames per second (fps) of the input video
frame_width = int(cap.get(3))
frame_height = int(cap.get(4))
fps = cap.get(5)

# Define the output video path
output_path = 'output/10_output_large.mp4'

# Define the video codec and create a VideoWriter object for writing processed frames
fourcc = cv2.VideoWriter_fourcc(*'XVID')  # You can change the codec as needed
out = cv2.VideoWriter(output_path, fourcc, fps, (frame_width, frame_height))

# Loop through each frame in the input video
while cap.isOpened():
    # Read a frame from the video
    success, frame = cap.read()
    
    # Check if reading the frame was successful
    if not success:
        print("Failed to read a frame from the video.")
        break
    
    # Check if the frame is not empty
    if not frame.any():
        print("Empty frame.")
        break
    
    # If reading the frame and the frame is not empty, continue processing
    if success:
        # Make a copy of the frame
        img = frame.copy()
        
        # masking each frame when something else is detected than garbage
        black = frame.copy()
        
        # Perform object detection using the first YOLO model to detect humans and block them
        results_1 = model.predict(frame, device=0)
        
        # Convert the detection results to JSON format
        annote_1_json = results_1[0].tojson()
        
        # Parse the JSON results
        res = json.loads(annote_1_json)
        
        # Iterate through the detected objects
        for i in res:
            # Extract the coordinates of the bounding box
            x1 = int(i.get('box').get('x1'))
            x2 = int(i.get('box').get('x2'))
            y1 = int(i.get('box').get('y1'))
            y2 = int(i.get('box').get('y2'))
            
            # Draw a black rectangle to block the detected human
            cv2.rectangle(black, (x1, y1), (x2, y2), (0, 0, 0), cv2.FILLED)
        
        # Perform garbage detection using the second YOLO model on the blocked image
        results_2 = garbage.predict(black, device=0,conf=0.5)
        
        # Get annotated image with garbage detections
        annote_2 = results_2[0].plot(img=img)
        
        # Write the annotated frame to the output video
        out.write(annote_2)
    else:
        break

# Release the video capture and video writer objects
cap.release()
out.release()


0: 384x640 2 persons, 2 ties, 26.0ms
Speed: 2.0ms preprocess, 26.0ms inference, 3.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 garbage, 32.0ms
Speed: 5.0ms preprocess, 32.0ms inference, 3.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 2 ties, 1 bottle, 25.0ms
Speed: 3.0ms preprocess, 25.0ms inference, 3.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 garbage, 29.5ms
Speed: 3.0ms preprocess, 29.5ms inference, 4.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 2 ties, 1 bottle, 24.0ms
Speed: 2.0ms preprocess, 24.0ms inference, 3.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 garbage, 29.0ms
Speed: 2.5ms preprocess, 29.0ms inference, 5.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 2 ties, 1 bottle, 24.5ms
Speed: 2.0ms preprocess, 24.5ms inference, 3.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 garbage, 30.0ms
Speed: 3.0ms prepro

In [17]:
from ultralytics import YOLO
import cv2
import matplotlib.pyplot as plt
import json
import matplotlib.patches as patches

garbage = YOLO('Garbage Models/bulk_garbage_seg.pt')

model = YOLO('yolov8m.pt')

# Initialize the video capture object with the input video file
cap = cv2.VideoCapture('Input/11.mp4')

# Get the frame width, frame height, and frames per second (fps) of the input video
frame_width = int(cap.get(3))
frame_height = int(cap.get(4))
fps = cap.get(5)

# Define the output video path
output_path = 'output/11_output.mp4'

# Define the video codec and create a VideoWriter object for writing processed frames
fourcc = cv2.VideoWriter_fourcc(*'XVID')  # You can change the codec as needed
out = cv2.VideoWriter(output_path, fourcc, fps, (frame_width, frame_height))

# Loop through each frame in the input video
while cap.isOpened():
    # Read a frame from the video
    success, frame = cap.read()
    
    # Check if reading the frame was successful
    if not success:
        print("Failed to read a frame from the video.")
        break
    
    # Check if the frame is not empty
    if not frame.any():
        print("Empty frame.")
        break
    
    # If reading the frame and the frame is not empty, continue processing
    if success:
        # Make a copy of the frame
        img = frame.copy()
        
        # masking each frame when something else is detected than garbage
        black = frame.copy()
        
        # Perform object detection using the first YOLO model to detect humans and block them
        results_1 = model.predict(frame, device=0)
        
        # Convert the detection results to JSON format
        annote_1_json = results_1[0].tojson()
        
        # Parse the JSON results
        res = json.loads(annote_1_json)
        
        # Iterate through the detected objects
        for i in res:
            # Extract the coordinates of the bounding box
            x1 = int(i.get('box').get('x1'))
            x2 = int(i.get('box').get('x2'))
            y1 = int(i.get('box').get('y1'))
            y2 = int(i.get('box').get('y2'))
            
            # Draw a black rectangle to block the detected human
            cv2.rectangle(black, (x1, y1), (x2, y2), (0, 0, 0), cv2.FILLED)
        
        # Perform garbage detection using the second YOLO model on the blocked image
        results_2 = garbage.predict(black, device=0,conf=0.5)
        
        # Get annotated image with garbage detections
        annote_2 = results_2[0].plot(img=img)
        
        # Write the annotated frame to the output video
        out.write(annote_2)
    else:
        break

# Release the video capture and video writer objects
cap.release()
out.release()


0: 640x384 1 person, 1 car, 1 motorcycle, 1 bench, 1 cup, 1 refrigerator, 38.5ms
Speed: 2.0ms preprocess, 38.5ms inference, 4.0ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 (no detections), 33.0ms
Speed: 1.0ms preprocess, 33.0ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 person, 1 car, 1 motorcycle, 24.0ms
Speed: 3.0ms preprocess, 24.0ms inference, 5.6ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 (no detections), 30.0ms
Speed: 4.0ms preprocess, 30.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 person, 1 car, 1 motorcycle, 24.5ms
Speed: 3.0ms preprocess, 24.5ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 (no detections), 30.0ms
Speed: 2.0ms preprocess, 30.0ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 person, 1 car, 1 motorcycle, 24.5ms
Speed: 2.0ms preprocess, 24.5ms inference, 4.0ms postprocess per image at shap