In [1]:
import numpy as np

def convertToPixels(roi, vid):
    frame_width = int(vid.get(3))
    frame_height = int(vid.get(4))

    xmin = int(roi[0] * frame_width);
    xmax = int(roi[2] * frame_width);
    ymin = int(roi[1] * frame_height);
    ymax = int(roi[3] * frame_height);

    roi = [xmin, ymin, xmax, ymax];
    return roi


def getMeanRoi(rois, vid):
    rois = np.array(rois);
    rois = rois[:, 1:];  # remove time

    meanROI = np.mean(rois, 0)
    meanROI = convertToPixels(meanROI, vid);

    return meanROI


def checkROI(roi, vid):
    frame_width = int(vid.get(3))
    frame_height = int(vid.get(4))

    if (roi[0] < 0 or roi[2] > frame_width or roi[1] < 0 or roi[3] > frame_height):
        return 0

    return 1;


def getROIPixelCenter(roi, vid):
    roi = convertToPixels(roi, vid);

    cx = int((roi[0] + roi[2]) / 2)
    cy = int((roi[1] + roi[3]) / 2)

    return cx, cy


def getROIRelativeCenter(roi, vid):
    cx = (roi[0] + roi[2]) / 2.0
    cy = (roi[1] + roi[3]) / 2.0

    return cx, cy


def resizeROI(roi, meanROI, smoothCenters, roi_scale, vid):
    # roi is in [0,1]
    # meanROI is in [1,num_pixels]

    roi = np.array(roi);
    meanROI = np.array(meanROI);

    MW = int((meanROI[2] - meanROI[0]) * roi_scale);  # mean width
    MH = int((meanROI[3] - meanROI[1]) * roi_scale);  # mean height

    roi = convertToPixels(roi, vid);

    # cx,cy = getROIPixelCenter(roi,vid)
    SC = np.array(smoothCenters)
    # print ("SC:",SC)

    # get smoothened center
    cx = int(np.mean(SC, 0)[0]);
    cy = int(np.mean(SC, 0)[1]);

    # print (cx,",",cy)

    xmin = cx - MW / 2
    ymin = cy - MH / 2
    xmax = cx + MW / 2
    ymax = cy + MH / 2

    # resizedROI = [cx-MW/2 , cy-MH/2, cx+MW/2, cy+MH/2 ];
    resizedROI = [xmin, ymin, xmax, ymax];

    if (checkROI(resizedROI, vid) == 0):
        print("Dropping ROI:", resizedROI);
        resizedROI = roi  # this should be resized later

    return resizedROI;

In [2]:
import cv2

def show(img, wait):
    cv2.imshow('image', img)
    cv2.waitKey(wait)

def PV(arr):
    for x in arr:
        print(x);

def PNN(ss):
    print(ss, end=', ', flush=True)

def getFrame(cap, i, show):
    cap.set(1, i)
    ret, frame = cap.read()
    if (show == 1):
        cv2.imshow('frame', frame);
        cv2.waitKey(0);
    return frame;

def readFile(fil):
    lines = [];
    with open(fil) as f:
        lines = f.readlines()
    return lines;

def viewVideo(vid):
    frameNo = 0;

    while (True):
        ret, frame = vid.read()

        if ret == True:
            frameNo += 1;

            # Write the frame into the file 'output.avi'
            # out.write(frame)
            # PNN(frameNo);

            # Display the resulting frame
            cv2.imshow('frame', frame)

            # Press Q on keyboard to stop recording
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break

        # Break the loop
        else:
            break

def cropFrame(vid, time, roi):
    # roi is in [1, num_pixels]

    frame_width = int(vid.get(3))
    frame_height = int(vid.get(4))
    total_frames = int(vid.get(7))
    FPS = vid.get(cv2.CAP_PROP_FPS)

    # pdb.set_trace();

    frameNo = int(time * FPS) + 1

    if (frameNo < 0 or frameNo > total_frames):
        print("ERROR : frameNo:", frameNo, " is invalid.")
        return None;

    frame = getFrame(vid, frameNo, 0);

    roi = list(map(int, roi))

    xmin = roi[0];
    xmax = roi[2];
    ymin = roi[1];
    ymax = roi[3];

    if (xmin < 0 or xmax > frame_width or ymin < 0 or ymax > frame_height):
        print("ERROR : ymin,ymax, xmin,xmax : ", ymin, ", ", ymax, ", ", xmin, ", ", xmax);
        return None

    cropped = frame[ymin:ymax, xmin:xmax, :]

    return cropped;

In [3]:
import cv2
import os
import numpy as np
import numpy as np

MIN_VIDEO_LENGTH = 2  # in seconds. face segments smaller than this are ignored

TRACK_SMOOTH_NUMFRAMES = 10  # number of past frames to look at for smoothing tracking

ROI_SCALE = 1.3;

SKIP_FRAMES_NUM = 5;
# START PROGRAM

videoFile = "abcd.mp4";
trackFile = "tbbt.mp4.track.txt"
folder = "C:/Users/dell/combined/videoSample";
videoFile=folder+'/'+videoFile;
trackFile=folder+'/'+trackFile;

print(videoFile, "\n", trackFile);

# READ VIDEO

vid = cv2.VideoCapture(videoFile);

# Default resolutions of the frame are obtained.The default resolutions are system dependent.
# We convert the resolutions from float to integer.
frame_width = int(vid.get(3))
frame_height = int(vid.get(4))
total_frames = int(vid.get(7))
FPS = vid.get(cv2.CAP_PROP_FPS) # Frames per second

# Define the codec and create VideoWriter object.The output is stored in 'outpy.avi' file.
# out = cv2.VideoWriter('outpy.avi',cv2.VideoWriter_fourcc('M','J','P','G'), 30, (frame_width,frame_height))

print("height:",frame_height,", width:",frame_width, ", total_frames:",total_frames, ", FPS:", FPS)
# READ TRACK
lines = readFile(trackFile);

# dict containing all detection for each face. indexed by face number
faces = {}; 

for x in lines:
    x = (x.split(' '))
    x = list(map(float,x[0:-1]));
    if (x[1] not in faces):
        faces[x[1]] = []

    faces[x[1]].append([x[0],x[2],x[3],x[4],x[5]])

croppedFacesPath = 'croppedFaces/'

for f in faces.items():
    print("Face :",f[0], "Time:", f[1][0][0]," to ",f[1][-1][0]);
    
    if ( ( f[1][-1][0] - f[1][0][0] ) < MIN_VIDEO_LENGTH ):
        continue;

    fourcc = cv2.VideoWriter_fourcc('M','J','P','G')
    # videoName = croppedFacesPath+str(f[0])+'.avi';
    videoName = croppedFacesPath+"/"+videoFile.split('/')[1]+"_"+str(f[0])+'.mp4';
    
    meanROI = getMeanRoi(f[1],vid);

    print ("meanROI: ",meanROI);
    oMW = int(meanROI[2]-meanROI[0]); # mean width
    oMH = int(meanROI[3]-meanROI[1]); # mean height

    frame_width = int(vid.get(3))
    frame_height = int(vid.get(4))

    # Scale ROI region
    MW = int( oMW*ROI_SCALE )
    MH = int( oMH*ROI_SCALE )

    roi_scale = ROI_SCALE;

    if (MW<0 or MH<0 or MW>frame_width or MH>frame_height):
        roi_scale = 1.0;
        MW = oMW;
        MH = oMH;

    outVideoSize = (MW,MH)
    out = cv2.VideoWriter(videoName,fourcc, FPS, outVideoSize)


    smoothCenters = [];
    

    segmentFrames = len(f[1]);
    frameNum = 0;
    for fr in f[1]:
        frameNum+=1;
        # skip first few and last few frames
        if (frameNum<SKIP_FRAMES_NUM or frameNum>(segmentFrames- SKIP_FRAMES_NUM )):
            continue;

        # ROI in fr[1:]. format: [xmin,ymin,xmax,ymax] as in tracking txt file

        cx,cy = getROIPixelCenter(fr[1:],vid);

        if (len(smoothCenters)<1 ):
            for ind in range(0,TRACK_SMOOTH_NUMFRAMES):
                smoothCenters.append([cx,cy])

        # remove oldest center and add current
        temp = smoothCenters.pop(0);
        smoothCenters.append([cx,cy]);
        # print(len(smoothCenters));

        # resize roi to crop region of mean width and height
        resizedROI = resizeROI(fr[1:], meanROI, smoothCenters,roi_scale, vid);
        cropped = cropFrame(vid,fr[0], resizedROI);
        
        # check if roi or frameNo is invalid. If yes, remove its file
        if (cropped is None):
            print("Face:",f[0], "bounds error(roi or frameNo).")
            print("deleting file:", videoName)
            out.release()
            os.remove(videoName);
            break;

        cropped = cv2.resize(cropped, outVideoSize) # REMOVE THIS and crop single size roi from video 
        out.write(cropped)

    cv2.destroyAllWindows()
    out.release()

# When everything done, release the video capture and video write objects
vid.release()
# Closes all the frames
cv2.destroyAllWindows() 

C:/Users/dell/combined/videoSample/abcd.mp4 
 C:/Users/dell/combined/videoSample/tbbt.mp4.track.txt
height: 2160 , width: 4096 , total_frames: 595 , FPS: 25.0
Face : 0.0 Time: 0.0  to  0.0
Face : 1.0 Time: 0.04  to  2.28
meanROI:  [1957, 427, 2573, 1007]
Face : 2.0 Time: 2.36  to  4.68
meanROI:  [1575, 402, 2177, 967]
Face : 3.0 Time: 4.72  to  6.68
Face : 4.0 Time: 5.04  to  7.96
meanROI:  [1455, 428, 1773, 727]
Face : 5.0 Time: 6.0  to  7.4
Face : 6.0 Time: 8.0  to  12.08
meanROI:  [566, 231, 768, 428]
Face : 7.0 Time: 8.6  to  13.72
meanROI:  [1975, 421, 2161, 598]
Face : 8.0 Time: 9.84  to  12.24
meanROI:  [1276, 262, 1449, 425]
Face : 9.0 Time: 11.52  to  13.72
meanROI:  [3271, 887, 3476, 1081]
Dropping ROI: [3835.0, 856.0, 4101.0, 1108.0]
Dropping ROI: [3831.0, 855.0, 4097.0, 1107.0]
Face : 10.0 Time: 11.56  to  13.72
meanROI:  [740, 304, 912, 465]
Face : 11.0 Time: 13.76  to  15.76
meanROI:  [1696, 614, 2122, 1015]
Face : 12.0 Time: 15.8  to  16.12
Face : 13.0 Time: 15.8  to  19