In [1]:
import torch
import numpy as np
import cv2
from time import time
from tkinter import *
from PIL import Image
from PIL import ImageTk
import cv2
import imutils

class MugDetection:
    """
    Class implements Yolo5 model to make inferences on a youtube video using Opencv2.
    """
    def __init__(self, capture_index, model_name):
        """
        Initializes the class with youtube url and output file.
        :param url: Has to be as youtube URL,on which prediction is made.
        :param out_file: A valid output file name.
        """
        self.capture_index = capture_index
        self.model = self.load_model(model_name)
        self.classes = self.model.names
        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
        self.root = Tk()
        self.btnIniciar = Button(self.root, text="Iniciar", width=45, command=self.iniciar3)
        self.btnIniciar.grid(column=0, row=0, padx=5, pady=5)
        self.btnFinalizar = Button(self.root, text="Finalizar", width=45, command=self.finalizar)
        self.btnFinalizar.grid(column=1, row=0, padx=5, pady=5)
        self.lblVideo = Label(self.root)
        self.lblVideo.grid(column=0, row=1, columnspan=2)
        print("Using Device: ", self.device)

    def get_video_capture(self):
        """
        Creates a new video streaming object to extract video frame by frame to make prediction on.
        :return: opencv2 video capture object, with lowest quality frame available for video.
        """
      
        return cv2.VideoCapture(self.capture_index)

    def load_model(self, model_name):
        """
        Loads Yolo5 model from pytorch hub.
        :return: Trained Pytorch model.
        """
        if model_name:
            model = torch.hub.load('ultralytics/yolov5', 'custom', path=model_name, force_reload=True)
        else:
            model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True)
        return model

    def score_frame(self, frame):
        """
        Takes a single frame as input, and scores the frame using yolo5 model.
        :param frame: input frame in numpy/list/tuple format.
        :return: Labels and Coordinates of objects detected by model in the frame.
        """
        self.model.to(self.device)
        frame = [frame]
        results = self.model(frame)
        labels, cord = results.xyxyn[0][:, -1], results.xyxyn[0][:, :-1]
        return labels, cord

    def class_to_label(self, x):
        """
        For a given label value, return corresponding string label.
        :param x: numeric label
        :return: corresponding string label
        """
        return self.classes[int(x)]

    def plot_boxes(self, results, frame):
        """
        Takes a frame and its results as input, and plots the bounding boxes and label on to the frame.
        :param results: contains labels and coordinates predicted by model on the given frame.
        :param frame: Frame which has been scored.
        :return: Frame with bounding boxes and labels ploted on it.
        """
        labels, cord = results
        n = len(labels)
        x_shape, y_shape = frame.shape[1], frame.shape[0]
        for i in range(n):
            row = cord[i]
            if row[4] >= 0.3:
                x1, y1, x2, y2 = int(row[0]*x_shape), int(row[1]*y_shape), int(row[2]*x_shape), int(row[3]*y_shape)
                bgr = (0, 255, 0)
                cv2.rectangle(frame, (x1, y1), (x2, y2), bgr, 2)
                cv2.putText(frame, self.class_to_label(labels[i]), (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, 0.9, bgr, 2)

        return frame
    def __call__(self):
        self.root.mainloop()
    def iniciar():
        global cap
        cap = cv2.VideoCapture(0, cv2.CAP_DSHOW)
        visualizar()
    def visualizar():
        global cap
        if cap is not None:
            ret, frame = cap.read()
            if ret == True:
                frame = imutils.resize(frame, width=640)
                frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                im = Image.fromarray(frame)
                img = ImageTk.PhotoImage(image=im)
                lblVideo.configure(image=img)
                lblVideo.image = img
                lblVideo.after(10, visualizar)
            else:
                lblVideo.image = ""
                cap.release()
    def iniciar3(self):
        """
        This function is called when class is executed, it runs the loop to read the video frame by frame,
        and write the output into a new file.
        :return: void
        """
        cap = self.get_video_capture()
        if cap is not None:
            ret, frame = cap.read()
            if ret == True:
                frame = imutils.resize(frame, width=640)
                frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                im = Image.fromarray(frame)
                img = ImageTk.PhotoImage(image=im)
                self.lblVideo.configure(image=img)
                self.lblVideo.image = img
                self.lblVideo.after(10, self.iniciar3)
            else:
                lblVideo.image = ""
                cap.release()
        # assert cap.isOpened()
      
        # while True:
          
        #     ret, frame = cap.read()
        #     assert ret
            
        #     frame = cv2.resize(frame, (416,416))
            
        #     start_time = time()
        #     results = self.score_frame(frame)
        #     frame = self.plot_boxes(results, frame)
            
        #     end_time = time()
        #     fps = 1/np.round(end_time - start_time, 2)
        #     #print(f"Frames Per Second : {fps}")
             
        #     cv2.putText(frame, f'FPS: {int(fps)}', (20,70), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0,255,0), 2)
            
        #     cv2.imshow('YOLOv5 Detection', frame)
        #     print('si')
        #     im = Image.fromarray(frame)
        #     img = ImageTk.PhotoImage(image=im)
        #     self.lblVideo.configure(image=img)
        #     self.lblVideo.image = img
 
        #     if cv2.waitKey(5) & 0xFF == 27:
        #         break
      
        cap.release()
    def iniciar2(self):
        """
        This function is called when class is executed, it runs the loop to read the video frame by frame,
        and write the output into a new file.
        :return: void
        """
        cap = self.get_video_capture()
        assert cap.isOpened()
        if cap is not None:
            ret, frame = cap.read()
            if ret == True:
                while True:
                    assert ret
                    start_time = time()
                    results = self.score_frame(frame)
                    frame = cv2.resize(frame, (416,416))
                    frame = self.plot_boxes(results, frame)
                    end_time = time()
                    fps = 1/np.round(end_time - start_time, 2)
                    cv2.putText(frame, f'FPS: {int(fps)}', (20,70), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0,255,0), 2)
                
                    cv2.imshow('YOLOv5 Detection', frame)
        
                    if cv2.waitKey(5) & 0xFF == 27:
                        break
                    print('aca')
                    # frame = imutils.resize(frame, width=640)
                    # frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                    im = Image.fromarray(frame)
                    img = ImageTk.PhotoImage(image=im)
                    self.lblVideo.configure(image=img)
                    self.lblVideo.image = img
                    #self.lblVideo.after(10, self.iniciar2)
                
            else:
                lblVideo.image = ""
                cap.release()
        # while True:
          
        #     # ret, frame = cap.read()
        #     assert ret
            
        #     # frame = cv2.resize(frame, (416,416))
            
        #     start_time = time()
        #     results = self.score_frame(frame)
        #     frame = self.plot_boxes(results, frame)
            
        #     end_time = time()
        #     fps = 1/np.round(end_time - start_time, 2)
        #     #print(f"Frames Per Second : {fps}")
             
        #     cv2.putText(frame, f'FPS: {int(fps)}', (20,70), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0,255,0), 2)
            
        #     cv2.imshow('YOLOv5 Detection', frame)
 
        #     if cv2.waitKey(5) & 0xFF == 27:
        #         break
        # cap.release()


    def finalizar():
        global cap
        cap.release()
    
    
    
        


In [2]:
# Create a new object and execute.
detector = MugDetection(capture_index=0, model_name='best.pt')
detector()

Downloading: "https://github.com/ultralytics/yolov5/archive/master.zip" to C:\Users\diego/.cache\torch\hub\master.zip
YOLOv5  2022-6-25 Python-3.10.3 torch-1.11.0+cpu CPU

Fusing layers... 
Model summary: 213 layers, 7015519 parameters, 0 gradients
Adding AutoShape... 


Using Device:  cpu


In [7]:
def iniciar():
    global cap
    cap = cv2.VideoCapture(0, cv2.CAP_DSHOW)
    visualizar()
def visualizar():
    global cap
    if cap is not None:
        ret, frame = cap.read()
        if ret == True:
            frame = imutils.resize(frame, width=640)
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            im = Image.fromarray(frame)
            img = ImageTk.PhotoImage(image=im)
            lblVideo.configure(image=img)
            lblVideo.image = img
            lblVideo.after(10, visualizar)
        else:
            lblVideo.image = ""
            cap.release()
def finalizar():
    global cap
    cap.release()

In [12]:
cap = None
root = Tk()
btnIniciar = Button(root, text="Iniciar", width=45, command=iniciar)
btnIniciar.grid(column=0, row=0, padx=5, pady=5)
btnFinalizar = Button(root, text="Finalizar", width=45, command=finalizar)
btnFinalizar.grid(column=1, row=0, padx=5, pady=5)
lblVideo = Label(root)
lblVideo.grid(column=0, row=1, columnspan=2)


In [13]:
root.mainloop()

In [2]:
# Create a new object and execute.
detector = MugDetection(capture_index=0, model_name='best.pt')
detector()

Downloading: "https://github.com/ultralytics/yolov5/archive/master.zip" to C:\Users\diego/.cache\torch\hub\master.zip
YOLOv5  2022-6-25 Python-3.10.3 torch-1.11.0+cpu CPU

Fusing layers... 
Model summary: 213 layers, 7015519 parameters, 0 gradients
Adding AutoShape... 


NameError: name 'Tk' is not defined

In [3]:
from tkinter import *
from PIL import Image
from PIL import ImageTk
import cv2
import imutils

In [4]:
def iniciar():
    global cap
    cap = cv2.VideoCapture(0, cv2.CAP_DSHOW)
    visualizar()
def visualizar():
    global cap
    if cap is not None:
        ret, frame = cap.read()
        if ret == True:
            frame = imutils.resize(frame, width=640)
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            im = Image.fromarray(frame)
            img = ImageTk.PhotoImage(image=im)
            lblVideo.configure(image=img)
            lblVideo.image = img
            lblVideo.after(10, visualizar)
        else:
            lblVideo.image = ""
            cap.release()
def finalizar():
    global cap
    cap.release()

In [5]:
cap = None
root = Tk()
btnIniciar = Button(root, text="Iniciar", width=45, command=iniciar)
btnIniciar.grid(column=0, row=0, padx=5, pady=5)
btnFinalizar = Button(root, text="Finalizar", width=45, command=finalizar)
btnFinalizar.grid(column=1, row=0, padx=5, pady=5)
lblVideo = Label(root)
lblVideo.grid(column=0, row=1, columnspan=2)
root.mainloop()

In [2]:
__author__ = 'bunkus'
from kivy.app import App
from kivy.uix.widget import Widget
from kivy.uix.boxlayout import BoxLayout
from kivy.uix.image import Image
from kivy.clock import Clock
from kivy.graphics.texture import Texture

import cv2

class CamApp(App):

    def build(self):
        self.img1=Image()
        layout = BoxLayout()
        layout.add_widget(self.img1)
        #opencv2 stuffs
        self.capture = cv2.VideoCapture(0)
        cv2.namedWindow("CV2 Image")
        Clock.schedule_interval(self.update, 1.0/33.0)
        return layout

    def update(self, dt):
        # display image from cam in opencv window
        ret, frame = self.capture.read()
        cv2.imshow("CV2 Image", frame)
        # convert it to texture
        buf1 = cv2.flip(frame, 0)
        buf = buf1.tostring()
        texture1 = Texture.create(size=(frame.shape[1], frame.shape[0]), colorfmt='bgr') 
        #if working on RASPBERRY PI, use colorfmt='rgba' here instead, but stick with "bgr" in blit_buffer. 
        texture1.blit_buffer(buf, colorfmt='bgr', bufferfmt='ubyte')
        # display image from the texture
        self.img1.texture = texture1

if __name__ == '__main__':
    CamApp().run()
    cv2.destroyAllWindows()

[INFO   ] [Logger      ] Record log in C:\Users\diego\.kivy\logs\kivy_22-06-25_0.txt
[ERROR  ] [Core        ] option --ip not recognized


Kivy Usage: ipykernel_launcher.py [KIVY OPTION...] [-- PROGRAM OPTIONS]::

            Options placed after a '-- ' separator, will not be touched by kivy,
            and instead passed to your program.

            Set KIVY_NO_ARGS=1 in your environment or before you import Kivy to
            disable Kivy's argument parser.

        -h, --help
            Prints this help message.
        -d, --debug
            Shows debug log.
        -a, --auto-fullscreen
            Force 'auto' fullscreen mode (no resolution change).
            Uses your display's resolution. This is most likely what you want.
        -c, --config section:key[:value]
            Set a custom [section] key=value in the configuration object.
        -f, --fullscreen
            Force running in fullscreen mode.
        -k, --fake-fullscreen
            Force 'fake' fullscreen mode (no window border/decoration).
            Uses the resolution specified by width and height in your config.
        -w, --windowed
 

AssertionError: 