## 1. Custom data 및 소스코드 사용 위한 Google Drive 연동

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
%cd /content/drive/MyDrive/IoT_capstone/yolov7

/content/drive/MyDrive/IoT_capstone/yolov7


##2. 필요한 라이브러리

In [3]:
import cv2
import time
import torch
import argparse
import numpy as np
from utils.datasets import letterbox
from utils.torch_utils import select_device
from models.experimental import attempt_load
from utils.plots import output_to_keypoint, plot_skeleton_kpts, plot_one_box_kpt, colors
from utils.general import non_max_suppression_kpt, strip_optimizer
from torchvision import transforms
import tensorflow
from PIL import ImageFont, ImageDraw, Image
import os

In [4]:
def load_classes(path):
    with open(path,'r') as f:
        names = f.read().split('\n')
        #filter통해 빈 string을 지우고 리스트에 class 저장함
    return list(filter(None, names))

## 3. Firebase 연동 코드

In [None]:
import firebase_admin
from firebase_admin import credentials
from firebase_admin import firestore

cred = credentials.Certificate('./flutter-4798c-firebase-adminsdk-apes2-583e445d37.json')
PROJECT_ID = 'flutter-4798c'
default_app = firebase_admin.initialize_app(cred, {
    'storageBucket': f'{PROJECT_ID}.appspot.com'
})
db = firestore.client()
db_ref = db.collection('detect').document("yolov5")

## 3. Parameters

In [11]:
poseweight = '/content/drive/MyDrive/IoT_capstone/yolov7/yolov7-w6-pose.pt' #yolov7 weight파일
source = '/content/drive/MyDrive/IoT_capstone/8.mp4' #테스트할 영상 위치
device = 'cuda'
hide_conf = False
hide_labels = False
line_thickness = 3

sequence = np.empty(51)
pose_name = '' #LSTM 출력값 저장할 string 변수
frame_count = 0
actions = np.array(['violence', 'nonviolence']) #행동 action변수에 정의

##4.weight 파일 가져오기

In [12]:
model = attempt_load(poseweight, map_location=device) #yolov7 weight파일 가져오기
_ = model.eval() #평가 모드
names = model.module.names if hasattr(model, 'module') else model.names

lstm_model = tensorflow.keras.models.load_model("/content/drive/MyDrive/IoT_capstone/model/thirdweight.h5") #LSTM weight파일 가져오기

capture = cv2.VideoCapture(source)
if capture.isOpened() == False:
    print("Video can;t open. Please check video path again")

Fusing layers... 


## 5. 영상처리에 필요한 parameters

In [13]:
frame_count = 0
frame_width = int(capture.get(3)/2) #video capture의 width 구해서 반으로 자른 크기로 영상 출력하기 
frame_height = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT))
j = 1
seq = 20 #20frame 기준으로 저장

In [14]:
# Define the codec and create VideoWriter object
# Define the codec and create VideoWriter object
# fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # You can use other codecs as well
fourcc = cv2.VideoWriter_fourcc(*'h264')
fps = 7
output_video = cv2.VideoWriter('output2.mp4', fourcc, fps, (frame_width, frame_height))

##6. 영상 read해서 test 시작

In [None]:
while(capture.isOpened):
    #영상 read시작함
    print(f"Frame {frame_count+1} Processing")
    ret, frame = capture.read()

    if ret:
        org_image = frame
        image = cv2.cvtColor(org_image,cv2.COLOR_BGR2RGB)
        image = letterbox(image, (frame_width), stride=64, auto=True)[0]
        image = transforms.ToTensor()(image)
        image = torch.tensor(np.array([image.numpy()]))

        image = image.to(device)
        image = image.float()

        with torch.no_grad():  #get predictions
            output_data, _ = model(image)

        output_data = non_max_suppression_kpt(output_data,   #Apply non max suppression
                                            0.70,   # Conf. Threshold.
                                            0.65, # IoU Threshold.
                                            nc=model.yaml['nc'], # Number of classes.
                                            nkpt=model.yaml['nkpt'], # Number of keypoints.
                                            kpt_label=True)
        output = output_to_keypoint(output_data)
        im0 = image[0].permute(1, 2, 0) * 255 # Change format [b, c, h, w] to [h, w, c] for displaying the image.
        im0 = im0.cpu().numpy().astype(np.uint8)
        
        im0 = cv2.cvtColor(im0, cv2.COLOR_RGB2BGR) #reshape image format to (BGR)
        gn = torch.tensor(im0.shape)[[1, 0, 1, 0]]  # normalization gain whwh
        
        for i, pose in enumerate(output_data):
            #객체 감지
            if len(output_data): #프레임 있다면
                for c in pose[:,5].unique():
                    #객체 감지 되는동안
                    n = (pose[:,5]==c).sum()
                    print(f"No of Object in Current Frame: {n}")
                
                for det_index, (*xyxy, conf, cls) in enumerate(reversed(pose[:,:6])):
                    c = int(cls)
                    kpts = pose[det_index, 6:]
                    label = None if hide_labels else (names[c] if hide_conf else f'{names[c]} {conf:.2f}')
                    plot_one_box_kpt(xyxy, im0, label=label, color=colors(c, True), 
                                        line_thickness=line_thickness,kpt_label=True, kpts=kpts, steps=3, 
                                        orig_shape=im0.shape[:2])
                
                if j<=seq:
                    for idx in range(output.shape[0]):
                        kpts = output[idx, 7:].T
                        plot_skeleton_kpts(im0, kpts, 3)
                        sequence = np.vstack([sequence,kpts])
                        print("shape: ",sequence.shape)
                    
                        if sequence.shape == (20,51):  
                                result = lstm_model.predict(np.expand_dims(sequence, axis=0))
                                print("result: ",result)
                                sequence = np.empty(51)
                                pose_name = actions[np.argmax(result)]
                                print(pose_name)

                                if pose_name == 'violence':
                                        print("폭력이 감지되었어요!")
                                        db_ref.update({'detect': 'true'}) # firebase

                                elif pose_name == 'nonviolence':
                                    print("폭력이 감지되지 않았아요!")
                                    db_ref.update({'detect': 'False'}) # firebase
                                else:
                                    print(pose_name)
                                db_ref.update({'detect': 'False'}) # firebase
        cv2.imshow("Violence detect result", im0)
        # if cv2.waitKey(1)==ord('c') : # 1 millisecond
        #     break
    else:
        break

Frame 1 Processing
No of Object in Current Frame: 1
shape:  (2, 51)
Frame 1 Processing
No of Object in Current Frame: 1
shape:  (3, 51)
Frame 1 Processing
No of Object in Current Frame: 1
shape:  (4, 51)
Frame 1 Processing
No of Object in Current Frame: 1
shape:  (5, 51)
Frame 1 Processing
No of Object in Current Frame: 1
shape:  (6, 51)
Frame 1 Processing
No of Object in Current Frame: 1
shape:  (7, 51)
Frame 1 Processing
No of Object in Current Frame: 1
shape:  (8, 51)
Frame 1 Processing
No of Object in Current Frame: 1
shape:  (9, 51)
Frame 1 Processing
No of Object in Current Frame: 1
shape:  (10, 51)
Frame 1 Processing
No of Object in Current Frame: 1
shape:  (11, 51)
Frame 1 Processing
No of Object in Current Frame: 1
shape:  (12, 51)
Frame 1 Processing
No of Object in Current Frame: 1
shape:  (13, 51)
Frame 1 Processing
No of Object in Current Frame: 1
shape:  (14, 51)
Frame 1 Processing
No of Object in Current Frame: 1
shape:  (15, 51)
Frame 1 Processing
No of Object in Current

KeyboardInterrupt: ignored

##7. 영상 저장까지 하는 버전

In [15]:
violence_detected = False  # Variable to track violence detection
violence_color = (0, 0, 255)  # Color for indicating violence
nonviolence_color = (0, 255, 0)  # Color for indicating non-violence

while capture.isOpened:
    # 영상 read 시작함
    print(f"Frame {frame_count+1} Processing")
    ret, frame = capture.read()

    if ret:
        org_image = frame
        image = cv2.cvtColor(org_image, cv2.COLOR_BGR2RGB)
        image = letterbox(image, (frame_width), stride=64, auto=True)[0]
        image = transforms.ToTensor()(image)
        image = torch.tensor(np.array([image.numpy()]))

        image = image.to(device)
        image = image.float()

        with torch.no_grad():  # get predictions
          output_data, _ = model(image)

        # if isinstance(output_data, torch.Tensor):
        #     output_data = [output_data]

        output_data = non_max_suppression_kpt(output_data,  # Apply non-max suppression
                                              0.70,  # Conf. Threshold.
                                              0.65,  # IoU Threshold.
                                              nc=model.yaml['nc'],  # Number of classes.
                                              nkpt=model.yaml['nkpt'],  # Number of keypoints.
                                              kpt_label=True)

        output = output_to_keypoint(output_data)
        im0 = image[0].permute(1, 2, 0) * 255  # Change format [b, c, h, w] to [h, w, c] for displaying the image.
        im0 = im0.cpu().numpy().astype(np.uint8)

        im0 = cv2.cvtColor(im0, cv2.COLOR_RGB2BGR)  # reshape image format to (BGR)
        gn = torch.tensor(im0.shape)[[1, 0, 1, 0]]  # normalization gain whwh

        for i, pose in enumerate(output_data):
            # 객체 감지
            if len(output_data):  # 프레임이 있다면
                for c in pose[:, 5].unique():
                    # 객체 감지되는 동안
                    n = (pose[:, 5] == c).sum()
                    print(f"No of Object in Current Frame: {n}")

                for det_index, (*xyxy, conf, cls) in enumerate(reversed(pose[:, :6])):
                    c = int(cls)
                    kpts = pose[det_index, 6:]
                    label = None if hide_labels else (model.names[c] if hide_conf else f'{model.names[c]} {conf:.2f}')
                    plot_one_box_kpt(xyxy, im0, label=label, color=colors(c, True),
                                     line_thickness=line_thickness, kpt_label=True, kpts=kpts, steps=3,
                                     orig_shape=im0.shape[:2])

                    if j <= seq:
                        for idx in range(output.shape[0]):
                            kpts = output[idx, 7:].T
                            plot_skeleton_kpts(im0, kpts, 3)
                            sequence = np.vstack([sequence, kpts])
                            print("shape: ", sequence.shape)

                            if sequence.shape == (20, 51):
                                result = lstm_model.predict(np.expand_dims(sequence, axis=0))
                                print("result: ", result)
                                sequence = np.empty(51)
                                pose_name = actions[np.argmax(result)]
                                print(pose_name)

                                if pose_name == 'violence':
                                    print("폭력이 감지되었어요!")
                                    cv2.putText(im0, "폭력이 감지되었어요!", (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1,
                                                violence_color, 2)
                                    violence_detected = True
                                elif pose_name == 'nonviolence':
                                    print("폭력이 감지되지 않았아요!")
                                    cv2.putText(im0, "폭력이 감지되지 않았어요!", (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1,
                                                nonviolence_color, 2)
                                    violence_detected = False

                    output_video.write(im0)  # Write frame with detection to output video

    else:
        break

# Release the VideoWriter object
output_video.release()

Frame 1 Processing
No of Object in Current Frame: 1
shape:  (2, 51)
Frame 1 Processing
No of Object in Current Frame: 1
shape:  (3, 51)
Frame 1 Processing
No of Object in Current Frame: 1
shape:  (4, 51)
Frame 1 Processing
No of Object in Current Frame: 1
shape:  (5, 51)
Frame 1 Processing
No of Object in Current Frame: 1
shape:  (6, 51)
Frame 1 Processing
No of Object in Current Frame: 1
shape:  (7, 51)
Frame 1 Processing
No of Object in Current Frame: 1
shape:  (8, 51)
Frame 1 Processing
No of Object in Current Frame: 1
shape:  (9, 51)
Frame 1 Processing
No of Object in Current Frame: 1
shape:  (10, 51)
Frame 1 Processing
No of Object in Current Frame: 1
shape:  (11, 51)
Frame 1 Processing
No of Object in Current Frame: 1
shape:  (12, 51)
Frame 1 Processing
No of Object in Current Frame: 1
shape:  (13, 51)
Frame 1 Processing
No of Object in Current Frame: 1
shape:  (14, 51)
Frame 1 Processing
No of Object in Current Frame: 1
shape:  (15, 51)
Frame 1 Processing
No of Object in Current