from __future__ import division from models import * from utils.utils import * from utils.datasets import * from utils.parse_config import * import os import sys import time import datetime import argparse import tqdm import cv2 from PIL import Image import torch from torch.utils.data import DataLoader from torchvision import datasets from torchvision import transforms from torch.autograd import Variable import torch.nn.functional as F import torch.optim as optim import time import numpy as np # def preprocess(img): h, w, c = img.shape dim_diff = np.abs(h - w) pad1, pad2 = dim_diff // 2, dim_diff - dim_diff // 2 pad = ((pad1, pad2), (0,0), (0,0)) if h <= w else ((0,0), (pad1, pad2), (0,0)) img = np.pad(img, pad, 'constant', constant_values=0) print (img.shape) dst_img = img return dst_img if __name__ == '__main__': # torch device = torch.device("cuda") # data setting data_config = parse_data_config('config/oxfordhand.data') valid_path = data_config["valid"] # class name class_names = load_classes(data_config["names"]) # initiate model model = Darknet('config/prune_yolov3-hand.cfg').to(device) model.load_state_dict(torch.load('weights/prune_yolov3_ckpt.pth')) # model test model.eval() Tensor = torch.cuda.FloatTensor cap = cv2.VideoCapture(0) w_cap = cv2.VideoWriter('./out.avi', cv2.VideoWriter_fourcc(*'XVID'), 25, (416, 416)) while (cap.isOpened()): ret, frame = cap.read() if ret == True: #frame = cv2.imread('/home/yehao/YOLOv3-model-pruning/test.jpg') frame = frame[:, :, ::-1] """ standart method """ #frame = Image.open('/home/yehao/YOLOv3-model-pruning/test.jpg') #frame = np.array(frame) #print (frame.shape) start = time.time() # do pad #print ('origin img: ', frame[100, 200:250, 0]) dst_frame = preprocess(frame) print (dst_frame.shape) #print ('pad img: ', dst_frame[100, 200:250, 0]) show_frame = cv2.resize(dst_frame[:, :, ::-1], (416, 416)) tensor = transforms.ToTensor()(dst_frame) print (tensor.shape) #print ('to tensor img: ', tensor[0, 100, 200:250]) tensor = F.interpolate(tensor.unsqueeze(0), size=416, mode="nearest") print (tensor.shape) #imgs = F.interpolate(tensor, size=416, mode="nearest") #print (tensor) imgs = Variable(tensor.type(Tensor), requires_grad=False) print (imgs.shape) with torch.no_grad(): outputs = model(imgs) outputs = non_max_suppression(outputs, conf_thres=0.01, nms_thres=0.3) end = time.time() print ('cost time: %.2f s'%(end - start)) #print (outputs) print (type(outputs)) if str(outputs[0]) == 'None': print ('empty') else: print (outputs[0]) for i in range(len(outputs[0])): coor = outputs[0][i].tolist() #print (coor) #print (type(coor)) x1 = int(coor[0]) y1 = int(coor[1]) x2 = int(coor[2]) y2 = int(coor[3]) conf = coor[4] if conf > 0.6: print (x1, y1, x2, y2, conf) cv2.rectangle(show_frame, (x1, y1), (x2, y2), (0, 0, 255), 2) #last = cv2.resize(show_frame, (800, 800)) w_cap.write(show_frame) cv2.imshow('frame', show_frame) #cv2.waitKey(10000) cv2.waitKey(5)