In [2]:
######## Webcam Object Detection Using Tensorflow-trained Classifier #########
import os
import argparse
import cv2
import numpy as np
import sys
import time
from threading import Thread
import importlib.util
import playsound as play
#import tensorflow as tf
# Define VideoStream class to handle streaming of video from webcam in separate processing thread
# Source - Adrian Rosebrock, PyImageSearch: https://www.pyimagesearch.com/2015/12/28/increasing-raspberry-pi-fps-with-python-and-opencv/
class VideoStream:
    """Camera object that controls video streaming from the Picamera"""
    def __init__(self,resolution=(640,480),framerate=30):
        # Initialize the PiCamera and the camera image stream
        self.stream = cv2.VideoCapture(0)
        ret = self.stream.set(cv2.CAP_PROP_FOURCC, cv2.VideoWriter_fourcc(*'MJPG'))
        ret = self.stream.set(3,resolution[0])
        ret = self.stream.set(4,resolution[1])
            
        # Read first frame from the stream
        (self.grabbed, self.frame) = self.stream.read()

	# Variable to control when the camera is stopped
        self.stopped = False

    def start(self):
	# Start the thread that reads frames from the video stream
        Thread(target=self.update,args=()).start()
        return self

    def update(self):
        # Keep looping indefinitely until the thread is stopped
        while True:
            # If the camera is stopped, stop the thread
            if self.stopped:
                # Close camera resources
                self.stream.release()
                return

            # Otherwise, grab the next frame from the stream
            (self.grabbed, self.frame) = self.stream.read()

    def read(self):
	# Return the most recent frame
        return self.frame

    def stop(self):
	# Indicate that the camera and thread should be stopped
        self.stopped = True

        
#audio feedback code
def audioFeedback(filename):
    for name in filename:
        base = os.path.join(AUDIO_DIR,BASE+EXTENSION)
        name = name+EXTENSION
        name = os.path.join(AUDIO_DIR,name)
        end = os.path.join(AUDIO_DIR,END+EXTENSION)
        play.playsound(base)
        play.playsound(name)
        play.playsound(end)
    
#*********************************function definition ends*************************************


MODEL_NAME = 'C:/Users/Raihan/.keras/datasets/test_models/'
GRAPH_NAME = 'model.tflite'
LABELMAP_NAME = 'label_map.txt'
resolution = '1280x720'
min_conf_threshold = float(0.6)
resW, resH = resolution.split('x')
imW, imH = int(resW), int(resH)

#audio feedback variables
AUDIO_DIR = 'C:/Users/Raihan/computerVision/models/research/object_detection/audio_feedback/audio'
BASE = 'base'
END = 'end'
EXTENSION = '.mp3'
filename = None

# Import TensorFlow libraries
# If tflite_runtime is installed, import interpreter from tflite_runtime, else import from regular tensorflow
# If using Coral Edge TPU, import the load_delegate library
pkg = importlib.util.find_spec('tflite_runtime')
if pkg:
    from tflite_runtime.interpreter import Interpreter
else:
    from tensorflow.lite.python.interpreter import Interpreter  

# Get path to current working directory
CWD_PATH = os.getcwd()

# Path to .tflite file, which contains the model that is used for object detection
PATH_TO_CKPT = os.path.join(CWD_PATH,MODEL_NAME,GRAPH_NAME)

# Path to label map file
PATH_TO_LABELS = os.path.join(CWD_PATH,MODEL_NAME,LABELMAP_NAME)

# Load the label map
with open(PATH_TO_LABELS, 'r') as f:
    labels = [line.strip() for line in f.readlines()]
    
# Have to do a weird fix for label map if using the COCO "starter model" from
# https://www.tensorflow.org/lite/models/object_detection/overview
# First label is '???', which has to be removed.
#if labels[0] == '???':
#    del(labels[0])

# Load the Tensorflow Lite model.
# If using Edge TPU, use special load_delegate argument
interpreter = Interpreter(model_path=PATH_TO_CKPT)


# Get model details
#input_details = interpreter.get_input_details()
#output_details = interpreter.get_output_details()

#resize and allocate the model
#interpreter.resize_tensor_input(input_details[0]['index'],(1,200,200,3))
interpreter.allocate_tensors()

# Get model details
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
#get width and height of the input 
height = input_details[0]['shape'][1]
width = input_details[0]['shape'][2]
print(height)
print(width)
floating_model = (input_details[0]['dtype'] == np.float32)

input_mean = 127.5
input_std = 127.5

# Initialize frame rate calculation
frame_rate_calc = 1
freq = cv2.getTickFrequency()



INFO:tensorflow:Enabling eager execution
INFO:tensorflow:Enabling v2 tensorshape
INFO:tensorflow:Enabling resource variables
INFO:tensorflow:Enabling tensor equality
INFO:tensorflow:Enabling control flow v2
320
320


In [4]:

detected_objects = []
print(detected_objects)
detected_objects.append('person')
detected_objects.append('bottle')
print(detected_objects)
detected_objects = np.array(detected_objects)
detected_objects = np.unique(detected_objects)
print(detected_objects)
detected_objects = []
print(detected_objects)


[]
['person', 'bottle']
['bottle' 'person']
[]


In [3]:
# Initialize video stream
videostream = VideoStream(resolution=(imW,imH),framerate=30).start()
time.sleep(1)

num_of_detection = 0
filename = ''
feedback_flag = False
detected_objects = []
#for frame1 in camera.capture_continuous(rawCapture, format="bgr",use_video_port=True):
while True:
    
    # Start timer (for calculating frame rate)
    t1 = cv2.getTickCount()

    # Grab frame from video stream
    frame1 = videostream.read()

    # Acquire frame and resize to expected shape [1xHxWx3]
    frame = frame1.copy()
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    frame_resized = cv2.resize(frame_rgb, (height, width))
    
    input_data = np.expand_dims(frame_resized, axis=0)
    #if input_details[0]['dtype'] == np.uint8:
    #    input_scale, input_zero_point = input_details[0]["quantization"]
    #    input_data = input_data / input_scale + input_zero_point
    #    input_data = input_data.astype(input_details[0]["dtype"])
        
 
    # Normalize pixel values if using a floating model (i.e. if model is non-quantized)
    if floating_model:
        input_data = (np.float32(input_data) - input_mean) / input_std
        input_data = np.float32(input_data)
        
        
    # Perform the actual detection by running the model with the image as input
    interpreter.set_tensor(input_details[0]['index'],input_data)
    interpreter.invoke()
    #threading.Thread(target=interpreter.invoke, args=(), daemon=True).start()

    # Retrieve detection results
    # 6 = scores 5 = classes 4 = could be scores too or boxes
    # 1 = boxes , 2 = classes, 4 = scores for ssd fpnlite
    # 0 = boxes , 1 = classes, 3 = scores for ssd mobilenet
    
    boxes = interpreter.get_tensor(output_details[0]['index'])[0] # Bounding box coordinates of detected objects
    classes = interpreter.get_tensor(output_details[1]['index'])[0] # Class index of detected objects
    scores = interpreter.get_tensor(output_details[2]['index'])[0] # Confidence of detected objects
    #num = interpreter.get_tensor(output_details[3]['index'])[0]  # Total number of detected objects (inaccurate and not needed)
    
    #list of detected objects in one frame
    detected_objects_prev = detected_objects.copy()
    detected_objects_prev = np.array(detected_objects_prev)
    print("previous: ",end = "")
    print(detected_objects_prev)
    detected_objects = []

    #number of detection that is being performed
    num_of_detection+=1
    
    # Loop over all detections and draw detection box if confidence is above minimum threshold
    for i in range(len(scores)):
        if ((scores[i] > min_conf_threshold) and (scores[i] <= 1.0)):
            
            #assign filename of the audio file
            filenamePrev = filename
            filename = labels[int(classes[i])]
            detected_objects.append(filename)
            
            #if(filename!=filenamePrev):
            #    detected_objects.append(filename)
           
            #print(detected_objects)
            # Get bounding box coordinates and draw box
            # Interpreter can return coordinates that are outside of image dimensions, need to force them to be within image using max() and min()
            ymin = int(max(1,(boxes[i][0] * imH)))
            xmin = int(max(1,(boxes[i][1] * imW)))
            ymax = int(min(imH,(boxes[i][2] * imH)))
            xmax = int(min(imW,(boxes[i][3] * imW)))
            
            cv2.rectangle(frame, (xmin,ymin), (xmax,ymax), (10, 255, 0), 2)

            # Draw label
            object_name = labels[int(classes[i])] # Look up object name from "labels" array using class index
            label = '%s: %d%%' % (object_name, int(scores[i]*100)) # Example: 'person: 72%'
            labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2) # Get font size
            label_ymin = max(ymin, labelSize[1] + 10) # Make sure not to draw label too close to top of window
            cv2.rectangle(frame, (xmin, label_ymin-labelSize[1]-10), (xmin+labelSize[0], label_ymin+baseLine-10), (255, 255, 255), cv2.FILLED) # Draw white box to put label text in
            cv2.putText(frame, label, (xmin, label_ymin-7), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2) # Draw label text
    
    detected_objects = np.array(detected_objects)
    detected_objects = np.unique(detected_objects)
    print("current: ",end = "")
    print(detected_objects)
 
    if(num_of_detection%10 == 0):
        #x = Thread(target = audioFeedback, args = (detected_objects,), daemon = True)
        #x.start()
        audioFeedback(detected_objects)
        
    #if(np.array_equal(detected_objects,detected_objects_prev)):
    #    print("do nothing")
    #else:
    #    for NAME in detected_objects:
    #        audioFeedback(NAME)
    
    
    # Draw framerate in corner of frame
    cv2.putText(frame,'FPS: {0:.2f}'.format(frame_rate_calc),(30,50),cv2.FONT_HERSHEY_SIMPLEX,1,(255,255,0),2,cv2.LINE_AA)
    #audio feedback
    
    # All the results have been drawn on the frame, so it's time to display it.
    cv2.imshow('Object detector', frame)
    
    # Calculate framerate
    t2 = cv2.getTickCount()
    time1 = (t2-t1)/freq
    frame_rate_calc= 1/time1
    
    
    # Press 'q' to quit
    if cv2.waitKey(1) == ord('q'):
        break

# Clean up
cv2.destroyAllWindows()
videostream.stop()

previous: []
current: ['person']
previous: ['person']
current: ['person']
previous: ['person']
current: ['person']
previous: ['person']
current: ['person']
previous: ['person']
current: ['person']
previous: ['person']
current: ['person']
previous: ['person']
current: ['person']
previous: ['person']
current: ['person']
previous: ['person']
current: ['person']
previous: ['person']
current: ['person']
previous: ['person']
current: ['person']
previous: ['person']
current: ['person']
previous: ['person']
current: ['person']
previous: ['person']
current: ['person']
previous: ['person']
current: ['person']
previous: ['person']
current: ['person']
previous: ['person']
current: ['person']
previous: ['person']
current: ['person']
previous: ['person']
current: ['person']
previous: ['person']
current: ['person']
previous: ['person']
current: ['person']
previous: ['person']
current: ['person']
previous: ['person']
current: ['person']
previous: ['person']
current: ['bottle' 'person']
previous: ['bot