In [13]:
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python.components import processors
from mediapipe.tasks.python import vision
import cv2
import math
import numpy as np
import torch

In [14]:
BaseOptions = mp.tasks.BaseOptions
ObjectDetector = mp.tasks.vision.ObjectDetector
ObjectDetectorOptions = mp.tasks.vision.ObjectDetectorOptions
VisionRunningMode = mp.tasks.vision.RunningMode

In [15]:
result_classes = []
bounding_boxes = []

def print_result(result, output_image: mp.Image, timestamp_ms: int):
    global result_classes, bounding_boxes
    result_classes = []
    bounding_boxes = []
    for i, detection in enumerate(result.detections):
      result_classes.append(detection.categories[0].category_name)
      bounding_boxes.append(detection.bounding_box)

In [None]:
import os

model_path = 'C:/Users/patel/Desktop/Jinay/Code/Projects/Hackathon-Project/app/detector.tflite'

options = ObjectDetectorOptions(
    base_options=BaseOptions(model_asset_path=model_path),
    running_mode=VisionRunningMode.LIVE_STREAM,
    max_results=5,
    result_callback=print_result)

objectDetector = ObjectDetector.create_from_options(options)

In [17]:
import time

# Check if webcam is available
cap = cv2.VideoCapture(0)

if not cap.isOpened():
    print("Error: Could not open webcam")

print("Webcam opened successfully. Press 'q' to quit.")
    
landmarks = []
frame_count = 0
max_frames = 1000  # Limit frames to prevent infinite loops

while cap.isOpened() and frame_count < max_frames:
  ret, frame = cap.read()
  
  start_points = [(0, 0), (frame.shape[1], 0)]
  end_points = [(150, frame.shape[1]), (frame.shape[1] - 150, frame.shape[1])]
  if not ret:
      print("Failed to grab frame")
      break
  frame_count += 1
    
  # Flip the frame horizontally for a mirrored view
  frame = cv2.flip(frame, 1)
  # Convert the BGR image to RGB
  rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

  # mask of the sections we dont' need
  mask = np.ones(frame.shape, dtype=np.uint8) * 255 
  for i in range(len(start_points)):
    cv2.rectangle(mask, start_points[i], end_points[i], (0, 0, 0), thickness=cv2.FILLED)
  rgb_frame = cv2.bitwise_and(rgb_frame, mask)

  mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=rgb_frame)
  timestamp_ms = int(time.time() * 1000)
    
  try:
      objectDetector.detect_async(mp_image, timestamp_ms)
  except Exception as e:
      print(f"Classification error: {e}")
  
  for i in range(len(result_classes)):
    x1, y1 = int(bounding_boxes[i].origin_x), int(bounding_boxes[i].origin_y)
    x2, y2 = x1 + int(bounding_boxes[i].width), y1 + int(bounding_boxes[i].height)
    cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)

    label = result_classes[i]
    cv2.putText(frame, label, (x1, y1 - 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.9, (36,255,12), 2)


  for i in range(len(start_points)):
    cv2.rectangle(frame, start_points[i], end_points[i], (0, 0, 0), thickness=cv2.FILLED)

  # Display the frame
  cv2.imshow('MediaPipe Image Classification', frame)

  # Exit if 'q' is pressed or window is closed
  key = cv2.waitKey(1) & 0xFF
  if key == ord('q'):
    break
        
  # Check if window was closed
  if cv2.getWindowProperty('MediaPipe Image Classification', cv2.WND_PROP_VISIBLE) < 1:
    break 
  # Exit if 'q' is pressed
  if cv2.waitKey(1) & 0xFF == ord('q'):
      break
  for result_class in result_classes:
    if result_class == "bottle":
      print("DETECTED BOTTLE :):)")
          
print("Webcam session ended")

# Release resources
cap.release()
cv2.destroyAllWindows()

Webcam opened successfully. Press 'q' to quit.
Webcam session ended
