In [1]:
import cv2
import os
import random
import math
import datetime as dt
from collections import deque
import pandas as pd

import matplotlib.pyplot as plt
%matplotlib inline

import numpy as np
from numpy.lib.scimath import sqrt # used for hoF
from numpy import arctan2 # used for hoF

from scipy import pi, cos, sin # used for HoF
from scipy.ndimage import uniform_filter # used for hoF

from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
import sklearn.metrics as metrics

from tensorflow.keras.layers import *
from tensorflow.keras.models import Sequential, Model, load_model
from tensorflow.keras.utils import to_categorical, plot_model
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint,CSVLogger
from tensorflow.keras.losses import categorical_crossentropy
from tensorflow.keras.optimizers import Adam
import tensorflow as tf
print("Tensorflow version: ", tf.__version__)
print(tf.test.gpu_device_name())

#for i3d extraction
from models.i3d.extract_i3d import ExtractI3D
from utils.utils import build_cfg_path
from omegaconf import OmegaConf
import torch

device = 'cuda' if torch.cuda.is_available() else 'cpu'
torch.cuda.get_device_name(0)

Tensorflow version:  2.6.0
/device:GPU:0


  from .autonotebook import tqdm as notebook_tqdm


'NVIDIA GeForce GTX 1650'

In [2]:
#Global Variables
dive_action_labels  = ['Entry', 'Flight', 'Takeoff']
temp_segment_model = None
autoscore_model = None
ss_twist_classifier_model = None
somersault_model = None
twist_model = None
angle_of_entry_model = None
splash_model = None
linear_regression_model = None
folderpath      = 'modelcheckpoints/'

#i3d variables
i3dextractor = None
stack_size = 12
step_size = 4

In [3]:
def load_temporal_segment_model():
    print('loading temporal segment model')
    global temp_segment_model
    image_height, image_width = 64, 64
    model = Sequential()
    model.add(Conv2D(filters = 64, kernel_size = (3, 3), activation = 'relu', input_shape = (image_height, image_width, 3)))
    model.add(Conv2D(filters = 64, kernel_size = (3, 3), activation = 'relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size = (2, 2)))
    model.add(GlobalAveragePooling2D())
    model.add(Dense(256, activation = 'relu'))
    model.add(BatchNormalization())
    model.add(Dense(3, activation = 'softmax'))
    model.compile(loss=categorical_crossentropy, optimizer=Adam(), metrics=['accuracy'])
    #model.summary()
    model.load_weights(folderpath+"model_2D.h5")
    temp_segment_model = model

In [4]:
def load_autoscore_model():
    print('loading autoscore model')
    global autoscore_model
    if (autoscore_model == None):
        autoscore_model = load_model('modelcheckpoints/fullyconnected_ID256D48D1_0.1.hdf5')

In [5]:
def load_ss_twist_classifier_model():
    print('loading ss_twist_classifier model')
    global ss_twist_classifier_model
    image_height, image_width = 64, 64
    optim = tf.keras.optimizers.Adam(0.0001)
    model = Sequential()
    model.add(Conv2D(filters = 64, kernel_size = (3, 3), activation = 'relu', input_shape = (image_height, image_width, 3)))
    model.add(Conv2D(filters = 64, kernel_size = (3, 3), activation = 'relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size = (2, 2)))
    model.add(Conv2D(filters = 64, kernel_size = (3, 3), activation = 'relu'))
    model.add(GlobalAveragePooling2D())
    model.add(Dense(256, activation = 'relu'))
    model.add(BatchNormalization())
    model.add(Dense(2, activation = 'softmax'))
    model.compile(loss=categorical_crossentropy, optimizer=optim, metrics=['accuracy'])
    #model.summary()
    model.load_weights(folderpath+"model_flightClassify_2D_3.h5")
    ss_twist_classifier_model = model
    

In [6]:
def load_somersault_model():
    print('loading somersault model')
    global somersault_model

In [7]:
def load_twist_model():
    print('loading twist model')
    global twist_model

In [8]:
def load_angle_of_entry_model():
    print('loading angle of entry model')
    global angle_of_entry_model

In [9]:
def load_splash_model():
    print('loading splash model')
    global splash_model

In [10]:
def load_linear_regression_model():
    print('loading linear regression model')
    global linear_regression_model

In [11]:
def ensureDirectoryClean(dirpath):
    if not os.path.exists(dirpath):
        os.makedirs(dirpath)
    else:
        for f in os.listdir(dirpath):
            os.remove(os.path.join(dirpath, f))

def extractFolderAndFileNameFromAbsPath(absFilePath):
    filename_sep = absFilePath.rindex('\\')
    extension_sep = absFilePath.rindex(".")
    folder = absFilePath[0: filename_sep]
    shortfilename = absFilePath[filename_sep+1:extension_sep]
    ext = absFilePath[extension_sep+1:len(absFilePath)]
    return folder, shortfilename, ext

def extractEventNoAndDiveNo(folderPath):
    tokens = folderPath.split("\\")
    diveno = tokens[len(tokens)-1]
    eventno = tokens[len(tokens)-2]
    return eventno, diveno

In [12]:
def predict_temporal_segmentation(vidpath, imgOutputDir):
    global temp_segment_model
    window_size=3
    predicted_label_list = []
    predicted_labels_probabilities_deque = deque(maxlen = window_size)
 
    video_reader = cv2.VideoCapture(vidpath)
 
    original_video_width = int(video_reader.get(cv2.CAP_PROP_FRAME_WIDTH))
    original_video_height = int(video_reader.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(video_reader.get(cv2.CAP_PROP_FPS))
    
    folder, shortfilename, _ = extractFolderAndFileNameFromAbsPath(vidpath)
    for dive_action_label in dive_action_labels:
        subdir = imgOutputDir+"\\"+shortfilename+"\\"+ dive_action_label 
        ensureDirectoryClean(subdir)
    
    #video_writer = cv2.VideoWriter(imgOutputDir, cv2.VideoWriter_fourcc('M', 'P', '4', 'V'), fps, (original_video_width, original_video_height))
    count_seg=0
    image_height, image_width = 64, 64
    while True: 
        count_seg += 1 
        status, frame = video_reader.read() 
 
        if not status:
            break
         
        # predict frame type (Takeoff/Flight/Entry)
        resized_frame = cv2.resize(frame, (image_height, image_width))
        normalized_frame = resized_frame / 255 
        predicted_labels_probabilities = temp_segment_model.predict(np.expand_dims(normalized_frame, axis = 0))[0] 
        predicted_labels_probabilities_deque.append(predicted_labels_probabilities)
 
        if len(predicted_labels_probabilities_deque) == window_size:
            predicted_labels_probabilities_np = np.array(predicted_labels_probabilities_deque)
            #print('predicted_labels_probabilities_np : ',predicted_labels_probabilities_np)
            predicted_labels_probabilities_averaged = predicted_labels_probabilities_np.mean(axis = 0)
            #print('predicted_labels_probabilities_averaged',predicted_labels_probabilities_averaged)
            predicted_label = np.argmax(predicted_labels_probabilities_averaged)
            
        else: # len(predicted_labels_probabilities_deque) < window_size
            predicted_label = np.argmax(predicted_labels_probabilities) 
        
        predicted_label_list.append(predicted_label)
        predicted_class_name = dive_action_labels[predicted_label]
        #print('predicted_class' , predicted_class_name)
        
        subdir = imgOutputDir+"\\"+shortfilename+"\\"+ predicted_class_name 
        imagename = subdir+"\\"+("frame%04d.jpg" % count_seg)
        #print('write to ', imagename)
        cv2.imwrite(imagename, frame)

    video_reader.release()
    #video_writer.release()
    return predicted_label_list

In [13]:
#resizeFrameDim [width, height]
def createVideo(image_folder, video_folder, divephase, vidname, resizeFrame=False, resizeFrameDim=[64,64]):
    images = []
    #folders = [image_folder+"\\Ntakeoff", image_folder+"\\Nflight", image_folder+"\\Nentry"]
    subfolder_images = sorted(os.listdir(image_folder))
    for subfolder_image in subfolder_images:
        if subfolder_image.endswith(".jpg"):
            images.append(image_folder+"\\"+subfolder_image)
    if (len(images)==0):
        return
    
    frame = cv2.imread(images[0])

    height, width, layers = frame.shape
    if (resizeFrame == True):
        height = resizeFrameDim[1]
        width = resizeFrameDim[0]
    vidFullName = video_folder+'\\'+vidname+"_"+divephase+".mp4"
    print('writing video to ', vidFullName , ' framewidth ', width, ' frameheight ', height)
    fps = 25
    video = cv2.VideoWriter(vidFullName, cv2.VideoWriter_fourcc('M', 'P', '4', 'V'), fps, (width,height))

    
    for image in images:
        frame = cv2.imread(image)
        if (resizeFrame == True):
            frame = cv2.resize(frame, (height, width))
        video.write(frame)

    cv2.destroyAllWindows()
    video.release()
    return vidFullName

In [14]:
#cleanUp flag to delete temp folder created for normalizing number of images
def createNormalizedVideos(vidpath, imgOutputDir, numImages, cleanUp=True):
    video_list = []
    folder, shortfilename, _ = extractFolderAndFileNameFromAbsPath(vidpath)
    subdirNorm = imgOutputDir+"\\"+shortfilename+"\\N_all" 
    ensureDirectoryClean(subdirNorm)
    count_norm = 0
    for dive_action_label in ['Takeoff', 'Flight', 'Entry']: #Need to redefine the oder instead of using dive_action_label
        subdir = imgOutputDir+"\\"+shortfilename+"\\"+ dive_action_label 
        index = {}
        files = os.listdir(subdir)
        numFiles = len(files)
        if (numFiles == 0):
            os.rmdir(subdirNorm)
            continue
        for file in files:
            index[file] = 1
        if (numFiles < numImages):
            diff = numImages - numFiles
            count_diff = 0;
            while (count_diff < diff):
                rand = random.choice(files)
                index[rand] = index[rand]+1
                count_diff += 1
            
        elif (numFiles > numImages):
            diff = numFiles - numImages
            count_diff = 0;
            while (count_diff < diff):
                rand = random.choice(files)
                if (index[rand] > 0):
                    index[rand] = index[rand]-1
                    count_diff += 1
        sortedkeys = list(index.keys())
        sortedkeys.sort()
        #count = 0
 
        for key in sortedkeys:
            frame = cv2.imread(subdir+"\\"+key)
            for i in range(index[key]):
                count_norm +=1
                imagename = subdirNorm+"\\"+("frame%04d.jpg" % count_norm)
                cv2.imwrite(imagename, frame)
    #write video to directory
    imgInputDir = imgOutputDir+"\\"+shortfilename+ "\\"+ "N_all"
    vidFullName = createVideo(imgInputDir, imgOutputDir+"\\"+shortfilename, "N_all", shortfilename,
                             resizeFrame=True, resizeFrameDim=[64,64])
    video_list.append(vidFullName)
        
    #cleanup
    if(cleanUp == True):
        for f in os.listdir(subdirNorm):
            os.remove(subdirNorm+"\\"+f)
        os.rmdir(subdirNorm) 
            
    return video_list
#createNormalizedVideos(".\\uploads\\01_10_all.mp4", '.\\images', 32)

In [15]:
def videosAreComplete(vidpath, imgOutputDir):
    hasTakeoff = False
    hasFlight = False
    hasEntry = False
    folder, shortfilename, _ = extractFolderAndFileNameFromAbsPath(vidpath)
    print (imgOutputDir, ' === ' , shortfilename)
    print('tkoff files ', getNumFiles(imgOutputDir+"\\" + shortfilename+"\\Takeoff"))
    print('Flight files ', getNumFiles(imgOutputDir+"\\" + shortfilename+"\\Flight"))
    print('Entry files ', getNumFiles(imgOutputDir+"\\" + shortfilename+"\\Entry"))
    
    if (getNumFiles(imgOutputDir+"\\" + shortfilename+"\\Takeoff") > 0):
        hasTakeoff = True
    else:
        print('Error: No Takeoff files!')
        
    if (getNumFiles(imgOutputDir+"\\" + shortfilename+"\\Flight") > 0):
        hasFlight = True
    else:
        print('Error: No Flight files!')
        
    if (getNumFiles(imgOutputDir+"\\" + shortfilename+"\\Entry") > 0):
        hasEntry = True
    else:
        print('Error: No Entry files!')
    
    return hasTakeoff and hasFlight and hasEntry

def getNumFiles(folderpath):
    return len([f for f in os.listdir(folderpath)  if f.endswith('.jpg') and os.path.isfile(os.path.join(folderpath, f))])


#videosAreComplete = videosAreComplete(".\\uploads\\01_10_all.mp4", ".\\images")


In [16]:
def extractI3DFeatures(vidpaths):
    global i3dextractor, stack_size, step_size
    if (i3dextractor == None):
        args = OmegaConf.load(build_cfg_path('i3d'))
        args.stack_size = stack_size
        args.step_size = step_size
        args.extraction_fps = 25
        args.device='cpu'#force to cpu to prevent OOM error
        args.flow_type = 'raft' # 'pwc' is not supported on Google Colab (cupy version mismatch)
        i3dextractor = ExtractI3D(args)
    rgb_features = [] 
    for vidpath in vidpaths:
        print(f'I3D Extracting for {vidpath}')
        feature_dict = i3dextractor.extract(vidpath)
        vidname = vidpath.replace('.mp4','')
        for k, v in feature_dict.items():
            if (k=='rgb'): # or k=='flow'):
                rgb_features.append(v)
    i3d_features = np.concatenate(rgb_features, axis=None)
    numWindows = (96-stack_size)/step_size
    return i3d_features.reshape(1, int(numWindows*1024))

In [17]:
def predict_autoscore(vidpath, imgOutputDir):
    print('predict autoscore')
    global autoscore_model
    # check that there are 3 videos, else decline to proceed
    if (videosAreComplete(vidpath, imgOutputDir)==False):
        print('videos are incomplete! missing either entry, flight or takeoff phase')
        return -1
    numImages=32
    norm_vidpaths = createNormalizedVideos(vidpath, imgOutputDir, numImages)

    videos_features = extractI3DFeatures(norm_vidpaths)
    score = autoscore_model.predict(videos_features)[0][0]
    for video in norm_vidpaths:
        os.remove(video)
    return score

In [18]:
def predict_ss_or_twist(imgFolder):
    print('predict ss or twist')
    global ss_twist_classifier_model
    window_size = 3
    images = sorted(os.listdir(imgFolder))
    predicted_label_list = []
    predicted_labels_probabilities_deque = deque(maxlen = window_size)
    eventno, diveno = extractEventNoAndDiveNo(imgFolder)
    numTwistFrames = 0
    numSSFrames = 0
    fps = 25
    frame = cv2.imread(imgFolder+"\\"+images[0])
    image_height = frame.shape[0]
    image_width = frame.shape[1]
    videoSS = cv2.VideoWriter(imgFolder+eventno+"_"+diveno+"_ss.mp4", cv2.VideoWriter_fourcc(*'mp4v'), fps, (image_width,image_height))
    videoTW = cv2.VideoWriter(imgFolder+eventno+"_"+diveno+"_tw.mp4", cv2.VideoWriter_fourcc(*'mp4v'), fps, (image_width,image_height))
    for i in range(len(images)):
        frame = cv2.imread(imgFolder+"\\"+images[i])
        
        # Resize the Frame to fixed Dimensions
        resized_frame = cv2.resize(frame, (64,64))
        
        # Normalize the resized frame by dividing it with 255 so that each pixel value then lies between 0 and 1
        normalized_frame = resized_frame / 255
 
        # Passing the Image Normalized Frame to the model and receiving Predicted Probabilities.
        predicted_labels_probabilities = ss_twist_classifier_model.predict(np.expand_dims(normalized_frame, axis = 0))[0]
 
        # Appending predicted label probabilities to the deque object
        predicted_labels_probabilities_deque.append(predicted_labels_probabilities)
 
        # Assuring that the Deque is completely filled before starting the averaging process
        if len(predicted_labels_probabilities_deque) == window_size:
 
            # Converting Predicted Labels Probabilities Deque into Numpy array
            predicted_labels_probabilities_np = np.array(predicted_labels_probabilities_deque)
            #print('predicted_labels_probabilities_np : ',predicted_labels_probabilities_np)
 
            # Calculating Average of Predicted Labels Probabilities Column Wise 
            predicted_labels_probabilities_averaged = predicted_labels_probabilities_np.mean(axis = 0)
            #print('predicted_labels_probabilities_averaged',predicted_labels_probabilities_averaged)
 
            # Converting the predicted probabilities into labels by returning the index of the maximum value.
            predicted_label = np.argmax(predicted_labels_probabilities_averaged)
 
        else: # len(predicted_labels_probabilities_deque) < window_size
            predicted_label = np.argmax(predicted_labels_probabilities) 
 

        # Accessing The Class Name using predicted label.
        #predicted_class_name = dive_action_labels[predicted_label]
        
        if (predicted_label == 0): #SS
            numSSFrames += 1
            videoSS.write(frame)
            predicted_label_list.append('SS')
        elif (predicted_label == 1):
            numTwistFrames += 1
            videoTW.write(frame)
            predicted_label_list.append('TW')
    print('numSSFrames ', numSSFrames)
    print('numTwistFrames ', numTwistFrames)
    print(predicted_label_list)
    cv2.destroyAllWindows()
    videoSS.release()
    videoTW.release()
        
    

In [19]:
def predict_num_somersaults(imgFolder):
    return -1

In [20]:
def predict_num_twists(imgFolder):
    return -1

In [21]:
def predict_angle_of_entry(imgFolder):
    return -1

In [22]:
def predict_splash_index(imgFolder):
    return -1

In [23]:
def predict_final_score(autoscore, numSomersaults, numTwists, angleOfEntry, splashIndex):
    return -1

In [24]:
def processVideo(vidpath):
    print('processing ', vidpath)
    folder, shortfilename, _ = extractFolderAndFileNameFromAbsPath(vidpath)
    predict_temporal_segmentation(vidpath, '.\\images')
    autoscore = predict_autoscore(vidpath, '.\\images')
    predict_ss_or_twist(".\\images\\"+shortfilename+"\\Flight")
    numSomersaults = predict_num_somersaults(".\\images\\"+shortfilename+"\\Flight")
    numTwists = predict_num_twists(".\\images\\"+shortfilename+"\\Flight")
    angleOfEntry = predict_angle_of_entry(".\\images\\"+shortfilename+"\\Entry")
    splashIndex = predict_splash_index(".\\images\\"+shortfilename+"\\Entry")
    final_score = predict_final_score(autoscore, numSomersaults, numTwists, angleOfEntry, splashIndex)
    print('autoscore: ', autoscore, 
          ', numSomersaults: ', numSomersaults, 
          ', numTwists: ', numTwists,
          ', angleOfEntry: ', angleOfEntry,
          ', splashIndex: ', splashIndex,
          ', final_score: ', final_score)
    return final_score, autoscore, numSomersaults, numTwists, angleOfEntry, splashIndex
    
    

In [25]:
def main():
    load_temporal_segment_model()
    load_autoscore_model()
    load_ss_twist_classifier_model()
    load_somersault_model()
    load_twist_model()
    load_angle_of_entry_model()
    load_splash_model()
    load_linear_regression_model()
    #final_score, autoscore, numSomersaults, numTwists, angleOfEntry, splashIndex = processVideo(".\\testdive5.mp4")

In [26]:
main()

loading temporal segment model
loading autoscore model
loading ss_twist_classifier model
loading somersault model
loading twist model
loading angle of entry model
loading splash model
loading linear regression model


In [None]:
import os
import urllib.request
from flask import Flask, flash, request, redirect, url_for, render_template
from werkzeug.utils import secure_filename
from werkzeug.wrappers import Request, Response
import json

app = Flask(__name__)
app.secret_key = "secret key"
app.config['UPLOAD_FOLDER'] = '.\\uploads\\'

@app.route('/videoupload', methods=['POST'])
def upload_video():
    if 'file' not in request.files:
        return {"error" : "no file in request"}
    file = request.files['file']
    filename = secure_filename(file.filename)
    file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename))
    final_score, autoscore, numSomersaults, numTwists, angleOfEntry, splashIndex = processVideo(app.config['UPLOAD_FOLDER']+filename)
    
    result = {
        "file" : file.filename,
        "final_score" : str(final_score),
        "autoscore" : str(autoscore),
        "numSomersaults" : str(numSomersaults),
        "numTwists" : str(numTwists),
        "angleOfEntry" : str(angleOfEntry),
        "splashIndex" : str(splashIndex)
    }
    result_json = json.dumps(result)
    print('result', result_json)
    return result_json

if __name__ == "__main__":
    from werkzeug.serving import run_simple
    run_simple('localhost', 5000, app)
    

 * Running on http://localhost:5000
Press CTRL+C to quit


processing  .\uploads\01_100_all.mp4
predict autoscore
.\images  ===  01_100_all
tkoff files  33
Flight files  30
Entry files  33
writing video to  .\images\01_100_all\01_100_all_N_all.mp4  framewidth  64  frameheight  64
using device  cpu
I3D Extracting for .\images\01_100_all\01_100_all_N_all.mp4


  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


predict ss or twist


127.0.0.1 - - [30/Apr/2023 23:04:23] "POST /videoupload HTTP/1.1" 200 -


numSSFrames  30
numTwistFrames  0
['SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS']
autoscore:  0.6137163 , numSomersaults:  -1 , numTwists:  -1 , angleOfEntry:  -1 , splashIndex:  -1 , final_score:  -1
result {"file": "01_100_all.mp4", "final_score": "-1", "autoscore": "0.6137163", "numSomersaults": "-1", "numTwists": "-1", "angleOfEntry": "-1", "splashIndex": "-1"}
processing  .\uploads\testdive5.mp4
predict autoscore
.\images  ===  testdive5
tkoff files  29
Flight files  82
Entry files  35
writing video to  .\images\testdive5\testdive5_N_all.mp4  framewidth  64  frameheight  64
I3D Extracting for .\images\testdive5\testdive5_N_all.mp4
predict ss or twist


127.0.0.1 - - [30/Apr/2023 23:16:11] "POST /videoupload HTTP/1.1" 200 -


numSSFrames  53
numTwistFrames  29
['TW', 'TW', 'TW', 'TW', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'TW', 'TW', 'TW', 'SS', 'SS', 'SS', 'TW', 'SS', 'SS', 'TW', 'TW', 'TW', 'TW', 'TW', 'TW', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'TW', 'TW', 'TW', 'TW', 'TW', 'TW', 'TW', 'TW', 'TW', 'TW', 'TW', 'TW', 'TW', 'TW', 'TW', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS', 'SS']
autoscore:  0.62222165 , numSomersaults:  -1 , numTwists:  -1 , angleOfEntry:  -1 , splashIndex:  -1 , final_score:  -1
result {"file": "testdive5.mp4", "final_score": "-1", "autoscore": "0.62222165", "numSomersaults": "-1", "numTwists": "-1", "angleOfEntry": "-1", "splashIndex": "-1"}
