In [3]:
import cv2

# Open the video file
video_path = 'DeepSquat1 (1).avi'
cap = cv2.VideoCapture(video_path)

if not cap.isOpened():
    print("Cannot open camera")

# Define the keypoint mapping for this OpenPose body_25 model
keypoints_mapping = {
    0:  "Nose", 1:  "Neck", 2:  "RShoulder", 3:  "RElbow", 4:  "RWrist", 5:  "LShoulder", 6:  "LElbow",
    7:  "LWrist", 8:  "MidHip", 9:  "RHip", 10: "RKnee", 11: "RAnkle", 12: "LHip", 13: "LKnee",
    14: "LAnkle", 15: "REye", 16: "LEye", 17: "REar", 18: "LEar", 19: "LBigToe", 20: "LSmallToe",
    21: "LHeel", 22: "RBigToe", 23: "RSmallToe", 24: "RHeel", 25: "Background"
}

humanML_mapping = {
    0: "root", 1: "RH", 2: "LH", 3: "BP", 4: "RK", 5: "LK", 6: "BT", 
    7: "RMrot", 8: "LMrot", 9: "BLN", 10: "RF", 11: "LF", 12: "BMN", 13: "RSI", 
    14: "LSI", 15: "BUN", 16: "RS", 17: "LS", 18: "RE", 19: "LE", 20: "RW", 21: "LW"
}

#compatible_mapping = [12, 9, 16, 18, 20, 17, 19, 21, 0, 1, 4, x, 2, 5, x, X, X, X, X, X, X, 8, x, x, 7, x]

humanml_from_openpose = [8, 4, 7, 8, 10, 13, 8, 24, 21, 1, 22, 19, 0, 2, 5, 0, 2, 5, 3, 6, 4, 7] #check BMN, hand 

humanmlframes = []

#load the model
net = cv2.dnn.readNetFromCaffe('pose_deploy.prototxt', 'pose_iter_584000.caffemodel')

def framestep(cap):
    
    # Get video properties
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    print("fps: %d" %(fps))
    print()
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    
    new_vid = []
    if (fps % 10 == 0): #we can easily do 10 fps
        stepsize = fps/1 # reset to 10
        print("total frames to be analyzed: ", frame_count / stepsize)
        curr_frame = 0
        i = 0
        while curr_frame < frame_count:
            ret, frame = cap.read()
            if curr_frame % stepsize == 0:
                print("starting frame: ", i)
                new_vid.append(pose(frame))
                i+=1
            curr_frame+=1
    
    else:
        print("NOT BUILT YET")
        pass
    
    return new_vid
    
def squarify(frame):
    height, width, _ = frame.shape
    min_dim = min(height, width)

    # Calculate the cropping dimensions
    crop_height = (height - min_dim) // 2
    crop_width = (width - min_dim) // 2

    # Crop the image equally from both sides to make it a square
    return frame[crop_height:crop_height+min_dim, crop_width:crop_width+min_dim] , min_dim

def pose(frame):
    
    frame, size = squarify(frame)
    blob = cv2.dnn.blobFromImage(frame, 1/255, (size, size),
                             (0, 0, 0), swapRB=False, crop=True)

    # run forward pass to get the pose estimation
    net.setInput(blob)
    output = net.forward()
    
    # Extract joint locations
    joint_locations = []

    for i in range(len(keypoints_mapping) - 1): #-1 bc we dont want point for the background
        keypoint = output[0, i, :, :]
        min_val, confidence, min_loc, point = cv2.minMaxLoc(keypoint)

        if confidence > 0.1:  # can adjust the confidence threshold if needed ???
            joint_locations.append((8 * int(point[0]), 8 * int(point[1]), i))
        else:
            joint_locations.append(None)

    #joint_locations contains the locations of the detected joints and corresponding index

    for location in joint_locations:
        if location:
            x, y, index = location
            cv2.circle(frame, (x, y), 5, (0, 0, 255), -1)
            #cv2.putText(image, str(index), (x, y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)

    #final mapping array
    humanml = []

    #map openpose joint location in every frame to humanml3d locations
    for i in range(0, 22):
        # for 15: BUN (chin), average the nose and neck y positions
        if i == 15:
            humanml.append((((joint_locations[0][0] + joint_locations[1][0]) / 2, (joint_locations[0][1] + joint_locations[1][1]) / 2, 15)))
        elif i == 6:
            # for BT
            humanml.append((((joint_locations[1][0] * 3 + joint_locations[8][0] * 7) / 10, (joint_locations[1][1] * 3 + joint_locations[8][1] * 7) / 10, 6)))
        else:
            humanml.append((joint_locations[humanml_from_openpose[i]][0], joint_locations[humanml_from_openpose[i]][1], i))

    humanmlframes.append(humanml)

    return frame

my_video = framestep(cap)

print("HumanML3d joint positions: ")

for i in range(0, len(humanml_from_openpose)):
    print("%s <= %s" % (humanML_mapping[i], keypoints_mapping[humanml_from_openpose[i]]))

print(humanmlframes)

size = my_video[0].shape[1], my_video[0].shape[0]
print(size)
out = cv2.VideoWriter("real.avi", cv2.VideoWriter_fourcc(*'DIVX'), 30, size)

for i in range(len(my_video)):
    out.write(my_video[i])
    cv2.imshow("video", my_video[i])
    cv2.waitKey(0)

cv2.destroyAllWindows()
out.release()
cap.release()

exit(1)

HumanML3d joint positions: 
root <= MidHip
RH <= RWrist
LH <= LWrist
BP <= MidHip
RK <= RKnee
LK <= LKnee
BT <= MidHip
RMrot <= RHeel
LMrot <= LHeel
BLN <= Neck
RF <= RBigToe
LF <= LBigToe
BMN <= Nose
RSI <= RShoulder
LSI <= LShoulder
BUN <= Nose
RS <= RShoulder
LS <= LShoulder
RE <= RElbow
LE <= LElbow
RW <= RWrist
LW <= LWrist
[]


NameError: name 'my_video' is not defined