In [1]:
import cv2
import os
import random
import math
import datetime as dt
from collections import deque
import pandas as pd
import pickle
import statistics
import matplotlib.pyplot as plt
%matplotlib inline

import numpy as np
from numpy.lib.scimath import sqrt # used for hoF
from numpy import arctan2 # used for hoF

from scipy import pi, cos, sin # used for HoF
from scipy.ndimage import uniform_filter # used for hoF

from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
import sklearn.metrics as metrics

from tensorflow.keras.layers import *
from tensorflow.keras.models import Sequential, Model, load_model
from tensorflow.keras.utils import to_categorical, plot_model
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint,CSVLogger
from tensorflow.keras.losses import categorical_crossentropy
from tensorflow.keras.optimizers import Adam
import tensorflow as tf
print("Tensorflow version: ", tf.__version__)
print(tf.test.gpu_device_name())

#for i3d extraction
from models.i3d.extract_i3d import ExtractI3D
from utils.utils import build_cfg_path
from omegaconf import OmegaConf
import torch

device = 'cuda' if torch.cuda.is_available() else 'cpu'
torch.cuda.get_device_name(0)

Tensorflow version:  2.6.0
/device:GPU:0


  from .autonotebook import tqdm as notebook_tqdm


'NVIDIA GeForce GTX 1650'

In [2]:
#Global Variables
dive_action_labels  = ['Entry', 'Flight', 'Takeoff']
temp_segment_model = None
autoscore_model = None
ss_twist_classifier_model = None
somersault_model = None
twist_model = None
angle_of_entry_model = None
splash_model = None
linear_regression_model = None
folderpath      = 'modelcheckpoints/'

#i3d variables
i3dextractor = None
stack_size = 12
step_size = 4

In [3]:
def load_temporal_segment_model():
    print('loading temporal segment model')
    global temp_segment_model
    image_height, image_width = 64, 64
    model = Sequential()
    model.add(Conv2D(filters = 64, kernel_size = (3, 3), activation = 'relu', input_shape = (image_height, image_width, 3)))
    model.add(Conv2D(filters = 64, kernel_size = (3, 3), activation = 'relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size = (2, 2)))
    model.add(GlobalAveragePooling2D())
    model.add(Dense(256, activation = 'relu'))
    model.add(BatchNormalization())
    model.add(Dense(3, activation = 'softmax'))
    model.compile(loss=categorical_crossentropy, optimizer=Adam(), metrics=['accuracy'])
    #model.summary()
    model.load_weights(folderpath+"model_2D.h5")
    temp_segment_model = model

In [4]:
def load_autoscore_model():
    print('loading autoscore model')
    global autoscore_model
    if (autoscore_model == None):
        autoscore_model = load_model('modelcheckpoints/fullyconnected_ID128D96D1_0.1.h5')

In [5]:
def load_ss_twist_classifier_model():
    print('loading ss_twist_classifier model')
    global ss_twist_classifier_model
    image_height, image_width = 64, 64
    optim = tf.keras.optimizers.Adam(0.0001)
    model = Sequential()
    model.add(Conv2D(filters = 64, kernel_size = (3, 3), activation = 'relu', input_shape = (image_height, image_width, 3)))
    model.add(Conv2D(filters = 64, kernel_size = (3, 3), activation = 'relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size = (2, 2)))
    model.add(Conv2D(filters = 64, kernel_size = (3, 3), activation = 'relu'))
    model.add(GlobalAveragePooling2D())
    model.add(Dense(256, activation = 'relu'))
    model.add(BatchNormalization())
    model.add(Dense(2, activation = 'softmax'))
    model.compile(loss=categorical_crossentropy, optimizer=optim, metrics=['accuracy'])
    #model.summary()
    model.load_weights(folderpath+"model_flightClassify_2D_3.h5")
    ss_twist_classifier_model = model
    

In [6]:
def load_somersault_model():
    print('loading somersault model')
    global somersault_model

In [7]:
def load_twist_model():
    print('loading twist model')
    global twist_model

In [8]:
def load_angle_of_entry_model():
    print('loading angle of entry model')
    global angle_of_entry_model

In [9]:
def load_splash_model():
    print('loading splash model')
    global splash_model

In [10]:
def load_linear_regression_model():
    print('loading linear regression model')
    global linear_regression_model

In [11]:
def ensureDirectoryClean(dirpath):
    if not os.path.exists(dirpath):
        os.makedirs(dirpath)
    else:
        for f in os.listdir(dirpath):
            os.remove(os.path.join(dirpath, f))

def extractFolderAndFileNameFromAbsPath(absFilePath):
    filename_sep = absFilePath.rindex('\\')
    extension_sep = absFilePath.rindex(".")
    folder = absFilePath[0: filename_sep]
    shortfilename = absFilePath[filename_sep+1:extension_sep]
    ext = absFilePath[extension_sep+1:len(absFilePath)]
    return folder, shortfilename, ext

def extractEventNoAndDiveNo(folderPath):
    tokens = folderPath.split("\\")
    diveno = tokens[len(tokens)-1]
    eventno = tokens[len(tokens)-2]
    return eventno, diveno

In [12]:
def predict_temporal_segmentation(vidpath, imgOutputDir):
    global temp_segment_model
    window_size=3
    predicted_label_list = []
    predicted_labels_probabilities_deque = deque(maxlen = window_size)
 
    video_reader = cv2.VideoCapture(vidpath)
 
    original_video_width = int(video_reader.get(cv2.CAP_PROP_FRAME_WIDTH))
    original_video_height = int(video_reader.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(video_reader.get(cv2.CAP_PROP_FPS))
    
    folder, shortfilename, _ = extractFolderAndFileNameFromAbsPath(vidpath)
    for dive_action_label in dive_action_labels:
        subdir = imgOutputDir+"\\"+shortfilename+"\\"+ dive_action_label 
        ensureDirectoryClean(subdir)
    
    #video_writer = cv2.VideoWriter(imgOutputDir, cv2.VideoWriter_fourcc('M', 'P', '4', 'V'), fps, (original_video_width, original_video_height))
    count_seg=0
    image_height, image_width = 64, 64
    while True: 
        count_seg += 1 
        status, frame = video_reader.read() 
 
        if not status:
            break
         
        # predict frame type (Takeoff/Flight/Entry)
        resized_frame = cv2.resize(frame, (image_height, image_width))
        normalized_frame = resized_frame / 255 
        predicted_labels_probabilities = temp_segment_model.predict(np.expand_dims(normalized_frame, axis = 0))[0] 
        predicted_labels_probabilities_deque.append(predicted_labels_probabilities)
 
        if len(predicted_labels_probabilities_deque) == window_size:
            predicted_labels_probabilities_np = np.array(predicted_labels_probabilities_deque)
            #print('predicted_labels_probabilities_np : ',predicted_labels_probabilities_np)
            predicted_labels_probabilities_averaged = predicted_labels_probabilities_np.mean(axis = 0)
            #print('predicted_labels_probabilities_averaged',predicted_labels_probabilities_averaged)
            predicted_label = np.argmax(predicted_labels_probabilities_averaged)
            
        else: # len(predicted_labels_probabilities_deque) < window_size
            predicted_label = np.argmax(predicted_labels_probabilities) 
        
        predicted_label_list.append(predicted_label)
        predicted_class_name = dive_action_labels[predicted_label]
        #print('predicted_class' , predicted_class_name)
        
        subdir = imgOutputDir+"\\"+shortfilename+"\\"+ predicted_class_name 
        imagename = subdir+"\\"+("frame%04d.jpg" % count_seg)
        #print('write to ', imagename)
        cv2.imwrite(imagename, frame)

    video_reader.release()
    #video_writer.release()
    return predicted_label_list

In [13]:
#resizeFrameDim [width, height]
def createVideo(image_folder, video_folder, divephase, vidname, resizeFrame=False, resizeFrameDim=[64,64]):
    images = []
    #folders = [image_folder+"\\Ntakeoff", image_folder+"\\Nflight", image_folder+"\\Nentry"]
    subfolder_images = sorted(os.listdir(image_folder))
    for subfolder_image in subfolder_images:
        if subfolder_image.endswith(".jpg"):
            images.append(image_folder+"\\"+subfolder_image)
    if (len(images)==0):
        return
    
    frame = cv2.imread(images[0])

    height, width, layers = frame.shape
    if (resizeFrame == True):
        height = resizeFrameDim[1]
        width = resizeFrameDim[0]
    vidFullName = video_folder+'\\'+vidname+"_"+divephase+".mp4"
    print('writing video to ', vidFullName , ' framewidth ', width, ' frameheight ', height)
    fps = 25
    video = cv2.VideoWriter(vidFullName, cv2.VideoWriter_fourcc('M', 'P', '4', 'V'), fps, (width,height))

    
    for image in images:
        frame = cv2.imread(image)
        if (resizeFrame == True):
            frame = cv2.resize(frame, (height, width))
        video.write(frame)

    cv2.destroyAllWindows()
    video.release()
    return vidFullName

In [14]:
#cleanUp flag to delete temp folder created for normalizing number of images
def createNormalizedVideos(vidpath, imgOutputDir, numImages, cleanUp=True):
    video_list = []
    folder, shortfilename, _ = extractFolderAndFileNameFromAbsPath(vidpath)
    subdirNorm = imgOutputDir+"\\"+shortfilename+"\\N_all" 
    ensureDirectoryClean(subdirNorm)
    count_norm = 0
    for dive_action_label in ['Takeoff', 'Flight', 'Entry']: #Need to redefine the oder instead of using dive_action_label
        subdir = imgOutputDir+"\\"+shortfilename+"\\"+ dive_action_label 
        index = {}
        files = os.listdir(subdir)
        numFiles = len(files)
        if (numFiles == 0):
            os.rmdir(subdirNorm)
            continue
        for file in files:
            index[file] = 1
        if (numFiles < numImages):
            diff = numImages - numFiles
            count_diff = 0;
            while (count_diff < diff):
                rand = random.choice(files)
                index[rand] = index[rand]+1
                count_diff += 1
            
        elif (numFiles > numImages):
            diff = numFiles - numImages
            count_diff = 0;
            while (count_diff < diff):
                rand = random.choice(files)
                if (index[rand] > 0):
                    index[rand] = index[rand]-1
                    count_diff += 1
        sortedkeys = list(index.keys())
        sortedkeys.sort()
        #count = 0
 
        for key in sortedkeys:
            frame = cv2.imread(subdir+"\\"+key)
            for i in range(index[key]):
                count_norm +=1
                imagename = subdirNorm+"\\"+("frame%04d.jpg" % count_norm)
                cv2.imwrite(imagename, frame)
    #write video to directory
    imgInputDir = imgOutputDir+"\\"+shortfilename+ "\\"+ "N_all"
    vidFullName = createVideo(imgInputDir, imgOutputDir+"\\"+shortfilename, "N_all", shortfilename,
                             resizeFrame=True, resizeFrameDim=[64,64])
    video_list.append(vidFullName)
        
    #cleanup
    if(cleanUp == True):
        for f in os.listdir(subdirNorm):
            os.remove(subdirNorm+"\\"+f)
        os.rmdir(subdirNorm) 
            
    return video_list
#createNormalizedVideos(".\\uploads\\01_10_all.mp4", '.\\images', 32)

In [15]:
def videosAreComplete(vidpath, imgOutputDir):
    hasTakeoff = False
    hasFlight = False
    hasEntry = False
    folder, shortfilename, _ = extractFolderAndFileNameFromAbsPath(vidpath)
    print (imgOutputDir, ' === ' , shortfilename)
    print('tkoff files ', getNumFiles(imgOutputDir+"\\" + shortfilename+"\\Takeoff"))
    print('Flight files ', getNumFiles(imgOutputDir+"\\" + shortfilename+"\\Flight"))
    print('Entry files ', getNumFiles(imgOutputDir+"\\" + shortfilename+"\\Entry"))
    
    if (getNumFiles(imgOutputDir+"\\" + shortfilename+"\\Takeoff") > 0):
        hasTakeoff = True
    else:
        print('Error: No Takeoff files!')
        
    if (getNumFiles(imgOutputDir+"\\" + shortfilename+"\\Flight") > 0):
        hasFlight = True
    else:
        print('Error: No Flight files!')
        
    if (getNumFiles(imgOutputDir+"\\" + shortfilename+"\\Entry") > 0):
        hasEntry = True
    else:
        print('Error: No Entry files!')
    
    return hasTakeoff and hasFlight and hasEntry

def getNumFiles(folderpath):
    return len([f for f in os.listdir(folderpath)  if f.endswith('.jpg') and os.path.isfile(os.path.join(folderpath, f))])


#videosAreComplete = videosAreComplete(".\\uploads\\01_10_all.mp4", ".\\images")


In [16]:
def extractI3DFeatures(vidpaths):
    global i3dextractor, stack_size, step_size
    if (i3dextractor == None):
        args = OmegaConf.load(build_cfg_path('i3d'))
        args.stack_size = stack_size
        args.step_size = step_size
        args.extraction_fps = 25
        args.device='cpu'#force to cpu to prevent OOM error
        args.flow_type = 'raft' # 'pwc' is not supported on Google Colab (cupy version mismatch)
        i3dextractor = ExtractI3D(args)
    rgb_features = [] 
    for vidpath in vidpaths:
        print(f'I3D Extracting for {vidpath}')
        feature_dict = i3dextractor.extract(vidpath)
        vidname = vidpath.replace('.mp4','')
        for k, v in feature_dict.items():
            if (k=='rgb'): # or k=='flow'):
                rgb_features.append(v)
    i3d_features = np.concatenate(rgb_features, axis=None)
    numWindows = (96-stack_size)/step_size
    return i3d_features.reshape(1, int(numWindows*1024))

In [17]:
def predict_autoscore(vidpath, imgOutputDir):
    print('predict autoscore')
    global autoscore_model
    # check that there are 3 videos, else decline to proceed
    if (videosAreComplete(vidpath, imgOutputDir)==False):
        print('videos are incomplete! missing either entry, flight or takeoff phase')
        return -1
    numImages=32
    norm_vidpaths = createNormalizedVideos(vidpath, imgOutputDir, numImages)

    videos_features = extractI3DFeatures(norm_vidpaths)
    score = autoscore_model.predict(videos_features)[0][0]
    for video in norm_vidpaths:
        os.remove(video)
    return score

In [18]:
def predict_ss_or_twist(imgFolder):
    print('predict ss or twist')
    global ss_twist_classifier_model
    window_size = 1
    images = sorted(os.listdir(imgFolder))
    predicted_label_list = []
    predicted_labels_probabilities_deque = deque(maxlen = window_size)
    eventno, diveno = extractEventNoAndDiveNo(imgFolder)
    numTwistFrames = 0
    numSSFrames = 0
    fps = 25
    frame = cv2.imread(imgFolder+"\\"+images[0])
    image_height = frame.shape[0]
    image_width = frame.shape[1]
    videoSS = cv2.VideoWriter(imgFolder+eventno+"_"+diveno+"_ss.mp4", cv2.VideoWriter_fourcc(*'mp4v'), fps, (image_width,image_height))
    videoTW = cv2.VideoWriter(imgFolder+eventno+"_"+diveno+"_tw.mp4", cv2.VideoWriter_fourcc(*'mp4v'), fps, (image_width,image_height))
    for i in range(len(images)):
        frame = cv2.imread(imgFolder+"\\"+images[i])
        
        # Resize the Frame to fixed Dimensions
        resized_frame = cv2.resize(frame, (64,64))
        
        # Normalize the resized frame by dividing it with 255 so that each pixel value then lies between 0 and 1
        normalized_frame = resized_frame / 255
 
        # Passing the Image Normalized Frame to the model and receiving Predicted Probabilities.
        predicted_labels_probabilities = ss_twist_classifier_model.predict(np.expand_dims(normalized_frame, axis = 0))[0]
 
        # Appending predicted label probabilities to the deque object
        predicted_labels_probabilities_deque.append(predicted_labels_probabilities)
 
        # Assuring that the Deque is completely filled before starting the averaging process
        if len(predicted_labels_probabilities_deque) == window_size:
 
            # Converting Predicted Labels Probabilities Deque into Numpy array
            predicted_labels_probabilities_np = np.array(predicted_labels_probabilities_deque)
            #print('predicted_labels_probabilities_np : ',predicted_labels_probabilities_np)
 
            # Calculating Average of Predicted Labels Probabilities Column Wise 
            predicted_labels_probabilities_averaged = predicted_labels_probabilities_np.mean(axis = 0)
            #print('predicted_labels_probabilities_averaged',predicted_labels_probabilities_averaged)
 
            # Converting the predicted probabilities into labels by returning the index of the maximum value.
            predicted_label = np.argmax(predicted_labels_probabilities_averaged)
 
        else: # len(predicted_labels_probabilities_deque) < window_size
            predicted_label = np.argmax(predicted_labels_probabilities) 
 

        # Accessing The Class Name using predicted label.
        #predicted_class_name = dive_action_labels[predicted_label]
        
        if (predicted_label == 0): #SS
            numSSFrames += 1
            videoSS.write(frame)
            predicted_label_list.append('SS')
        elif (predicted_label == 1):
            numTwistFrames += 1
            videoTW.write(frame)
            predicted_label_list.append('TW')
    print('numSSFrames ', numSSFrames)
    print('numTwistFrames ', numTwistFrames)
    print(predicted_label_list)
    cv2.destroyAllWindows()
    videoSS.release()
    videoTW.release()
        
    

In [19]:
def predict_num_somersaults(imgFolder):
    return -1

In [20]:
def predict_num_twists(imgFolder):
    return -1

In [21]:
# Separate entry and splash

def calculate_mean_color_value(frames):
    frame_mean_values = []

    for frame in frames:
        frame_mean_value = np.mean(frame)
        frame_mean_values.append(frame_mean_value)

    # video_mean_color_value = np.mean(frame_mean_values)
    return frame_mean_values


def blue_color_proportion(frame, blur_kernel_size=(5, 5)):
    blurred_frame = cv2.GaussianBlur(frame, blur_kernel_size, 0)
    blue_channel = blurred_frame[:, :, 0]
    total_pixels = blurred_frame.size // 3
    blue_pixels = np.sum(blue_channel > 210)  # Threshold for blue color intensity (0-255)
    proportion = blue_pixels / total_pixels
    return proportion

def get_blue_proportions(frames, blur_kernel_size=(5, 5)):
    proportion_list = []
    for i, frame in enumerate(frames):
        proportion = blue_color_proportion(frame, blur_kernel_size)
        proportion_list.append(proportion)
    return proportion_list  

def moving_average(data, window_size):
    return [
        sum(data[i:i + window_size]) / window_size
        for i in range(len(data) - window_size + 1)
    ]

def find_splash_frame_index(data, window_size=5, threshold = 0.01):
    smoothed_data = moving_average(data, window_size)
    stdev = statistics.stdev(data)

    # if no index find        
    for i in range(len(smoothed_data) - 1):
        if abs(smoothed_data[i+1] - smoothed_data[i]) < threshold - stdev/2:
            return i + window_size // 2

    return len(data)//5

def get_entry_splash_frames(frames):
    color_info = get_blue_proportions(frames)
    splash_frame_index = find_splash_frame_index(color_info)
    if splash_frame_index is not None:
        entry_frames = frames[:splash_frame_index]
        splash_frames = frames[splash_frame_index:]
        return entry_frames, splash_frames
    else:
        print("No splash frame detected")

def crop_image(img,  crop_r_w = 0.5, crop_r_h = 0):
    h, w ,_ = img.shape
    # Define the coordinates of the top-left and bottom-right corners of the rectangle to crop
    x1, y1 = int(w*crop_r_w/2), int(h*crop_r_h/2)  # top-left corner
    x2, y2 = int(w*(1-crop_r_w/2)), int(h*(1-crop_r_h/2))  # bottom-right corner
    return img.copy()[y1:y2, x1:x2]

In [22]:
# find entry key frame
def remove_bg(img):
    # Convert the image to the HSV color space
    hsv_image = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)

    # # Define the lower and upper bounds of the blue color range
    lower_range = np.array([90, 50, 50])
    upper_range = np.array([120, 255, 255])

    # Threshold the image to extract the blue pixels
    mask = cv2.inRange(hsv_image, lower_range, upper_range)
    mask = cv2.bitwise_not(mask) 
    mask = cv2.GaussianBlur(mask, (5, 5), 0)

    # Define the kernel for the morphological operations
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))

    # Apply dilation to fill small holes and connect nearby contours
    mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel)
    mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel)

    # Apply binary threshold to ensure only black and white pixels
    _, mask = cv2.threshold(mask, 0, 255, cv2.THRESH_BINARY)
    mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel)

    # retunr binary mask 
    return mask

def find_human_contour(img):
    height, width= img.shape
    # find all contour
    ctrs = cv2.findContours(img, 
                          cv2.RETR_EXTERNAL,
                          cv2.CHAIN_APPROX_SIMPLE)
    ctrs = ctrs[0] 

    ctrs = sorted(ctrs, key=cv2.contourArea, reverse=True)[:5]
    for c in ctrs:
        (x, y, w, h)  = cv2.boundingRect(c)
        aspr = h/float(w)
        if x != 0 and x+w != width and y+h != height and aspr > 0.8:   
            return c
    # print('no contour found')
    return None

def get_consecutive_numbers(input_list):
    consecutive_numbers = []
    temp = []

    for i, num in enumerate(input_list):
        if i == 0: 
            temp.append(num)
            continue

        if num == input_list[i - 1] + 1:
            temp.append(num)
        else:
            temp = [num]

    return temp

def get_consecutive_numbers_front(input_list):
    consecutive_numbers = []
    temp = []

    for i, num in enumerate(input_list):
        if i == 0: 
            temp.append(num)
            continue

        if num == input_list[i - 1] + 1:
            temp.append(num)
        else:
            break

    return temp

def find_decreasing_point_smoothed(data, window_size=2):
    smoothed_data = moving_average(data, window_size)
    stdev = statistics.stdev(data)
    for i in range(1, len(smoothed_data) - 1):
        if smoothed_data[i] < smoothed_data[i - 1] and smoothed_data[i] <= smoothed_data[i + 1]:
        # if smoothed_data[i] - smoothed_data[i+1] > 0:
            return i   # Return the index adjusted for the moving average window

    return -1  # If the curve does not decrease, return -1

def find_key_frame_index(ctr_areas):
    # select the key frame
    thr = sum(ctr_areas) / len(ctr_areas)
    frame_indexs = []
    for i in range(len(ctr_areas)):
        if ctr_areas[i] > thr * 0.8:
            frame_indexs.append(i)

    frame_indexs = get_consecutive_numbers(frame_indexs)

    if len(frame_indexs) > 0:
   

        frame_areas = [ctr_areas[i] for i in frame_indexs]   
        thr = sum(frame_areas) / len(frame_areas) * 1
        key_frame_indexs = []

        for i in frame_indexs:
            if thr * 1.5 > ctr_areas[i] >= thr:
                key_frame_indexs.append(i)

        if len(key_frame_indexs) > 0:
          # key_frame_indexs = get_consecutive_numbers_front(frame_indexs)

          return key_frame_indexs[-1]

    # print(key_frame_indexs)
    return len(ctr_areas)//2

# return the key frame and the key frame ctr
def find_key_frame(frames):
    ctr_areas = []
    ctr_list = []
    ctr_images = []
    for frame in frames:
        mask = remove_bg(frame)
        ctr = find_human_contour(mask)
        ctr_list.append(ctr)
    
        if ctr is not None: 
            ctr_areas.append(cv2.contourArea(ctr))

        else:
            ctr_areas.append(0)    

    # find key frame
    key_frame_index = find_key_frame_index(ctr_areas)

    return frames[key_frame_index], ctr_list[key_frame_index], ctr_areas[key_frame_index]

def analyse_entry_action(frames):
    ctr_areas = []
    ctr_list = []
    ctr_images = []
    for frame in entry_frames:
        # cv2_imshow(frame)
        mask = remove_bg(frame)
        ctr = find_human_contour(mask)
        ctr_list.append(ctr)
    
        if ctr is not None: 
            ctr_areas.append(cv2.contourArea(ctr))

        else:
            ctr_areas.append(0)    

    # find key frame
    key_frame_index = find_key_frame(ctr_areas)
    for i in range(len(entry_frames)):
        img = entry_frames[i].copy()
        ctr = ctr_list[i]
        if ctr is not None: 
            if i != key_frame_index:
                cv2.drawContours(img, [ctr], -1, (0, 255, 0), 2)
            else:
                cv2.drawContours(img, [ctr], -1, (0, 0, 255), 2)
        ctr_images.append(img)

  
    return ctr_images, key_frame_index, ctr_list, ctr_areas


In [23]:
# Analysis angle

# Calculate the angle of the line segment against the horizontal line
def calculate_angle(line1, line2 = None):

    if line2: 
        slope1 = slope2 = 1
        # calculate the slopes of the two lines
        if line1[1][0] - line1[0][0]:
            slope1 = (line1[1][1] - line1[0][1]) / (line1[1][0] - line1[0][0])  
        if line2[1][0] - line2[0][0]:
            slope2 = (line2[1][1] - line2[0][1]) / (line2[1][0] - line2[0][0])
        # calculate the angle between the two lines  
        angle = math.atan(abs((slope2 - slope1) / (1 + slope1 * slope2))) * 180 / math.pi
        angle = abs(angle)

    else:
        h = line1[1][1] - line1[0][1] 
        w = line1[1][0] - line1[0][0]  
        if w: 
            angle = math.atan(abs(h/w))* 180 / math.pi
            angle = abs(angle)
        else:
            angle = 90.0
  
    return angle

def compute_bend_angle(line1, line2):
    # Calculate the vectors of the lines
    vector1 = [line1[1][0] - line1[0][0], line1[1][1] - line1[0][1]]
    vector2 = [line2[1][0] - line2[0][0], line2[1][1] - line2[0][1]]

    # Calculate the dot product of the vectors
    dot_product = vector1[0] * vector2[0] + vector1[1] * vector2[1]

    # Calculate the magnitudes of the vectors
    magnitude1 = math.sqrt(vector1[0] ** 2 + vector1[1] ** 2)
    magnitude2 = math.sqrt(vector2[0] ** 2 + vector2[1] ** 2)

    # Compute the cosine of the angle
    cos_angle = dot_product / (magnitude1 * magnitude2)

    # Clamp the cosine value to the valid range of -1 to 1
    cos_angle = max(min(cos_angle, 1), -1)

    # Calculate the angle in radians
    angle_rad = math.acos(cos_angle)

    # Convert the angle to degrees
    angle_deg = math.degrees(angle_rad)

    # Calculate the intersection angle, which is the smaller angle
    intersection_angle = min(angle_deg, 360 - angle_deg)

    return intersection_angle

# plot the annotation
def plot_angle_annotation(image, top_point, center_point, bottom_point):
    human_img = image.copy()
    pts = np.array([top_point, center_point, bottom_point ],np.int32)
    pts = pts.reshape((-1, 1, 2))
    cv2.polylines(human_img, [pts], False, (0, 0, 255), thickness=2)
    cv2.circle(human_img, center_point, 3, (0, 0, 255), -1)
    
    
    return human_img

def compute_angle(img, ctr):
    top_point = tuple(ctr[ctr[:,:,1].argmin()][0])
    bottom_point = tuple(ctr[ctr[:,:,1].argmax()][0])
    left_point = tuple(ctr[ctr[:,:,0].argmin()][0])
    right_point = tuple(ctr[ctr[:,:,0].argmax()][0])

    # find the center part
    M = cv2.moments(ctr)
    if M['m00'] != 0:
        cx = int(M['m10']/M['m00'])
        cy = int(M['m01']/M['m00'])      
        center_point=(cx, cy)     
    # analyse the angle
    entry_angle = calculate_angle((center_point,bottom_point))
    bend_angle = compute_bend_angle([top_point, center_point], [bottom_point, center_point])

  
    human_img = plot_angle_annotation(img, top_point, center_point, bottom_point)
    
    return bend_angle, entry_angle, human_img

In [24]:
# Splash analysis
def increase_contrast(frame, min_val=0, max_val=255):
    gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    min_pixel = np.min(gray_frame)
    max_pixel = np.max(gray_frame)

    contrast_frame = (gray_frame - min_pixel) * ((max_val - min_val) / (max_pixel - min_pixel)) + min_val
    return contrast_frame.astype(np.uint8)


def get_brightness_mask(image, threshold_factor=1.2):
    # Calculate the average brightness
    average_brightness = np.mean(image)

    # Threshold the grayscale image to create a mask of areas with brightness above the average
    _, brightness_mask = cv2.threshold(image, threshold_factor * average_brightness, 255, cv2.THRESH_BINARY)

    return brightness_mask


def splash_size_pixel(frames, flow_threshold=2, skip_frames=2):
    prev_frame = increase_contrast(frames[0])
    splash_sizes = []

    for i in range(1, len(frames), skip_frames + 1):
        curr_frame = increase_contrast(frames[i])

        # Calculate the optical flow using Farneback method
        flow = cv2.calcOpticalFlowFarneback(prev_frame, curr_frame, None, 0.5, 3, 15, 3, 5, 1.2, 0)

        # Calculate the magnitude and angle of the 2D vectors
        magnitude, _ = cv2.cartToPolar(flow[..., 0], flow[..., 1])

        # Threshold the magnitude to capture significant motion
        motion_mask = np.where(magnitude > flow_threshold, 1, 0)

         # Threshold the magnitude to capture significant motion
        motion_mask = np.where(magnitude > flow_threshold, 1, 0)

        # Threshold the brightness to capture only the brighter parts
        bright_mask = get_brightness_mask(curr_frame)

        # Combine the motion and brightness masks to focus on the bright splash
        splash_mask = motion_mask * bright_mask

        splash_size = np.sum(splash_mask == 255)

        splash_sizes.append(splash_size)

        prev_frame = curr_frame

    # Calculate the mean and max splash sizes
    mean_splash_size = np.mean(splash_sizes)
    max_splash_size = max(splash_sizes)

    return mean_splash_size, max_splash_size

In [25]:
CTR_MEAN = 1463.82755

# range from 0 and 1
def compute_ratio(size, area):
    ratio = 0
    if area>0:
        ratio = size/area
    ratio = size/CTR_MEAN
    if ratio > 10:
        return 10
    return ratio
    
def analyze_entry(imgFolder):
    print('analyze entry')
    images = sorted(os.listdir(imgFolder))
    frames = [crop_image(cv2.imread(imgFolder+"\\"+image)) for image in images]
    
    with open("mean_splash_index_scaler.pkl", "rb") as f:
        mean_splash_index_scaler = pickle.load(f)

    with open("max_splash_index_scaler.pkl", "rb") as f:
        max_splash_index_scaler = pickle.load(f)

    # separate the action and splash
    entry_frames, splash_frames = get_entry_splash_frames(frames)

    # find key frame
    key_frame, ctr, ctr_area= find_key_frame(entry_frames)
    bend_angle = 0 
    entry_angle = 0
    human_imag = key_frame

    # if key frame not found, return angle = 0
    if (key_frame is None or ctr is None):
        print('no human found')
    else:
        bend_angle, entry_angle, human_img = compute_angle(key_frame, ctr)

    mean_splash_size, max_splash_size = splash_size_pixel(splash_frames)

    mean_splash_ratio = compute_ratio(mean_splash_size, ctr_area)
    max_splash_ratio = compute_ratio(max_splash_size, ctr_area)

    mean_splash_index = mean_splash_index_scaler.transform(np.array([mean_splash_ratio]).reshape(-1, 1))[0][0]
    max_splash_index = max_splash_index_scaler.transform(np.array([max_splash_ratio]).reshape(-1, 1))[0][0]

#     return bend_angle, entry_angle, ctr_area, \
#         mean_splash_size, max_splash_size, \
#         mean_splash_ratio, max_splash_ratio,\
#         mean_splash_index, max_splash_index\
        # human_img 
   
    return bend_angle, entry_angle, mean_splash_index, max_splash_index, human_img
        


In [26]:
def predict_final_score(autoscore, numSomersaults, numTwists, angleOfEntry, splashIndex):
    return -1

In [27]:
def processVideo(vidpath):
    print('processing ', vidpath)
    folder, shortfilename, _ = extractFolderAndFileNameFromAbsPath(vidpath)
    predict_temporal_segmentation(vidpath, '.\\images')
    autoscore = predict_autoscore(vidpath, '.\\images')
    predict_ss_or_twist(".\\images\\"+shortfilename+"\\Flight")
    numSomersaults = predict_num_somersaults(".\\images\\"+shortfilename+"\\Flight")
    numTwists = predict_num_twists(".\\images\\"+shortfilename+"\\Flight")
    bend_angle, entry_angle, mean_splash_index, max_splash_index, human_img = analyze_entry(".\\images\\"+shortfilename+"\\Entry")

    #     final_score = predict_final_score(autoscore, numSomersaults, numTwists, angleOfEntry, splashIndex)
    final_score = -1
    print('autoscore: ', autoscore, 
          ', numSomersaults: ', numSomersaults, 
          ', numTwists: ', numTwists,
          ', angleOfEntry: ', (bend_angle, entry_angle),
          ', splashIndex: ', (mean_splash_index, max_splash_index),
          ', final_score: ', final_score)
    
#     cv2.imshow('human', human_img)
  
#     # waits for user to press any key
#     # (this is necessary to avoid Python kernel form crashing)
#     cv2.waitKey(0)

#     # closing all open windows
#     cv2.destroyAllWindows()
    return final_score, autoscore, numSomersaults, numTwists, bend_angle, entry_angle, mean_splash_index, max_splash_index, human_img
    
    

In [28]:
def main():
    load_temporal_segment_model()
    load_autoscore_model()
    load_ss_twist_classifier_model()
    load_somersault_model()
    load_twist_model()
    load_angle_of_entry_model()
    load_splash_model()
    load_linear_regression_model()
    #final_score, autoscore, numSomersaults, numTwists, angleOfEntry, splashIndex = processVideo(".\\testdive5.mp4")

In [29]:
main()

loading temporal segment model
loading autoscore model
loading ss_twist_classifier model
loading somersault model
loading twist model
loading angle of entry model
loading splash model
loading linear regression model


In [None]:
import os
import urllib.request
from flask import Flask, flash, request, redirect, url_for, render_template, jsonify
from flask_cors import CORS
from werkzeug.utils import secure_filename
from werkzeug.wrappers import Request, Response
import json
import base64


app = Flask(__name__)
CORS(app)
app.secret_key = "secret key"
app.config["UPLOAD_FOLDER"] = ".\\uploads\\"

if not os.path.exists(app.config["UPLOAD_FOLDER"]):
    os.makedirs(app.config["UPLOAD_FOLDER"])

@app.route('/videoupload', methods=['POST'])

def upload_video():
    try:
        if "video" not in request.files:
            print("No video in request.files")
            return jsonify({"error": "No video file"}), 400

        video = request.files["video"]
        # if video.filename == "":
        #     return jsonify({"error": "Empty filename, No video file"}), 400

        if video and video.filename != "":
            filename = secure_filename(video.filename)
            video.save(os.path.join(app.config["UPLOAD_FOLDER"], filename))
            final_score, auto_score, numSomersaults, numTwists, bending_angle, entry_angle, mean_splash_index, max_splash_index,human_imag = processVideo(app.config['UPLOAD_FOLDER']+filename)
            _, img_encoded = cv2.imencode('.jpg', human_imag)
            img_str = base64.b64encode(img_encoded).decode('utf-8')


            # process video file
            result = {
                "final_score" : str(final_score),
                "auto_score" : str(auto_score),
                "numSomersaults" : str(numSomersaults),
                "numTwists" : str(numTwists),
                "EntryAngle" : str(entry_angle),
                "BendingAngle" : str(bending_angle),
                "MeanSplashIndex" : str(mean_splash_index),
                "MaxSplashIndex" : str(max_splash_index),
                "EntryImage": img_str
            }

            return jsonify(result), 200
        else:
            print("File not allowed")
            return jsonify({"error": "File not allowed"}), 400
        

    except Exception as e:
        print("An exception occurred: " + str(e))
        return jsonify({"error": "An internal server error occurred"}), 500

if __name__ == "__main__":
    from werkzeug.serving import run_simple
    run_simple('localhost', 5000, app)
    

 * Running on http://localhost:5000
Press CTRL+C to quit


processing  .\uploads\uploaded_video.mp4
predict autoscore
.\images  ===  uploaded_video
tkoff files  34
Flight files  37
Entry files  35
writing video to  .\images\uploaded_video\uploaded_video_N_all.mp4  framewidth  64  frameheight  64
using device  cpu
I3D Extracting for .\images\uploaded_video\uploaded_video_N_all.mp4


  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


predict ss or twist


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
127.0.0.1 - - [19/May/2023 19:41:17] "POST /videoupload HTTP/1.1" 200 -


numSSFrames  34
numTwistFrames  3
['SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'TW', 'TW', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'TW', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS']
analyze entry
autoscore:  0.701786 , numSomersaults:  -1 , numTwists:  -1 , angleOfEntry:  (156.2276511886353, 74.67848992513521) , splashIndex:  (0.19949546440049049, 0.24830287191520367) , final_score:  -1
processing  .\uploads\uploaded_video.mp4
predict autoscore
.\images  ===  uploaded_video
tkoff files  34
Flight files  37
Entry files  35
writing video to  .\images\uploaded_video\uploaded_video_N_all.mp4  framewidth  64  frameheight  64
I3D Extracting for .\images\uploaded_video\uploaded_video_N_all.mp4
predict ss or twist


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
127.0.0.1 - - [19/May/2023 19:48:12] "POST /videoupload HTTP/1.1" 200 -


numSSFrames  34
numTwistFrames  3
['SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'TW', 'TW', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'TW', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS']
analyze entry
autoscore:  0.7116236 , numSomersaults:  -1 , numTwists:  -1 , angleOfEntry:  (156.2276511886353, 74.67848992513521) , splashIndex:  (0.19949546440049049, 0.24830287191520367) , final_score:  -1
processing  .\uploads\uploaded_video.mp4
predict autoscore
.\images  ===  uploaded_video
tkoff files  21
Flight files  34
Entry files  31
writing video to  .\images\uploaded_video\uploaded_video_N_all.mp4  framewidth  64  frameheight  64
I3D Extracting for .\images\uploaded_video\uploaded_video_N_all.mp4
predict ss or twist
numSSFrames  13
numTwistFrames  21
['TW', 'TW', 'TW', 'TW', 'TW', 'TW', 'TW', 'TW', 'TW', 'TW', 'TW', 'TW', 'TW', 'TW', 'TW', 'SS', 'SS', 'TW', 'TW', 'TW', 'TW', 'TW', 'SS', 'TW', 'SS', 'SS', 'SS', 'SS', 'SS'

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
127.0.0.1 - - [19/May/2023 19:55:30] "POST /videoupload HTTP/1.1" 200 -


autoscore:  0.763241 , numSomersaults:  -1 , numTwists:  -1 , angleOfEntry:  (151.4901209022046, 78.02386755579664) , splashIndex:  (0.1546075464616179, 0.1849111937871365) , final_score:  -1
processing  .\uploads\uploaded_video.mp4
predict autoscore
.\images  ===  uploaded_video
tkoff files  21
Flight files  34
Entry files  31
writing video to  .\images\uploaded_video\uploaded_video_N_all.mp4  framewidth  64  frameheight  64
I3D Extracting for .\images\uploaded_video\uploaded_video_N_all.mp4
predict ss or twist


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
127.0.0.1 - - [19/May/2023 20:02:17] "POST /videoupload HTTP/1.1" 200 -


numSSFrames  13
numTwistFrames  21
['TW', 'TW', 'TW', 'TW', 'TW', 'TW', 'TW', 'TW', 'TW', 'TW', 'TW', 'TW', 'TW', 'TW', 'TW', 'SS', 'SS', 'TW', 'TW', 'TW', 'TW', 'TW', 'SS', 'TW', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS']
analyze entry
autoscore:  0.8049362 , numSomersaults:  -1 , numTwists:  -1 , angleOfEntry:  (151.4901209022046, 78.02386755579664) , splashIndex:  (0.1546075464616179, 0.1849111937871365) , final_score:  -1
