In [1]:
import cv2
import os
import random
import math
import datetime as dt
from collections import deque
import pandas as pd

import matplotlib.pyplot as plt
%matplotlib inline

import numpy as np
from numpy.lib.scimath import sqrt # used for hoF
from numpy import arctan2 # used for hoF

from scipy import pi, cos, sin # used for HoF
from scipy.ndimage import uniform_filter # used for hoF

from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
import sklearn.metrics as metrics

from tensorflow.keras.layers import *
from tensorflow.keras.models import Sequential, Model, load_model
from tensorflow.keras.utils import to_categorical, plot_model
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint,CSVLogger
from tensorflow.keras.losses import categorical_crossentropy
from tensorflow.keras.optimizers import Adam
import tensorflow as tf
print("Tensorflow version: ", tf.__version__)
print(tf.test.gpu_device_name())

#for i3d extraction
from models.i3d.extract_i3d import ExtractI3D
from utils.utils import build_cfg_path
from omegaconf import OmegaConf
import torch

device = 'cuda' if torch.cuda.is_available() else 'cpu'
torch.cuda.get_device_name(0)

Tensorflow version:  2.6.0
/device:GPU:0


  from .autonotebook import tqdm as notebook_tqdm


'NVIDIA GeForce GTX 1650'

In [2]:
#Global Variables
dive_action_labels  = ['Entry', 'Flight', 'Takeoff']
temp_segment_model = None
autoscore_model = None
somersault_model = None
twist_model = None
angle_of_entry_model = None
splash_model = None
linear_regression_model = None
folderpath      = 'modelcheckpoints/'
i3dextractor = None

In [3]:
def load_temporal_segment_model():
    print('loading temporal segment model')
    global temp_segment_model
    image_height, image_width = 64, 64
    model = Sequential()
    model.add(Conv2D(filters = 64, kernel_size = (3, 3), activation = 'relu', input_shape = (image_height, image_width, 3)))
    model.add(Conv2D(filters = 64, kernel_size = (3, 3), activation = 'relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size = (2, 2)))
    model.add(GlobalAveragePooling2D())
    model.add(Dense(256, activation = 'relu'))
    model.add(BatchNormalization())
    model.add(Dense(3, activation = 'softmax'))
    model.compile(loss=categorical_crossentropy, optimizer=Adam(), metrics=['accuracy'])
    #model.summary()
    model.load_weights(folderpath+"model_2D.h5")
    temp_segment_model = model

In [4]:
def load_autoscore_model():
    print('loading autoscore model')
    global autoscore_model

In [5]:
def load_somersault_model():
    print('loading somersault model')
    global somersault_model

In [6]:
def load_twist_model():
    print('loading twist model')
    global twist_model

In [7]:
def load_angle_of_entry_model():
    print('loading angle of entry model')
    global angle_of_entry_model

In [8]:
def load_splash_model():
    print('loading splash model')
    global splash_model

In [9]:
def load_linear_regression_model():
    print('loading linear regression model')
    global linear_regression_model

In [10]:
def ensureDirectoryClean(dirpath):
    if not os.path.exists(dirpath):
        os.makedirs(dirpath)
    else:
        for f in os.listdir(dirpath):
            os.remove(os.path.join(dirpath, f))

def extractFolderAndFileNameFromAbsPath(absFilePath):
    filename_sep = absFilePath.rindex('\\')
    extension_sep = absFilePath.rindex(".")
    folder = absFilePath[0: filename_sep]
    shortfilename = absFilePath[filename_sep+1:extension_sep]
    ext = absFilePath[extension_sep+1:len(absFilePath)]
    return folder, shortfilename, ext

In [11]:
def predict_temporal_segmentation(vidpath, imgOutputDir):
    global temp_segment_model
    window_size=3
    predicted_label_list = []
    predicted_labels_probabilities_deque = deque(maxlen = window_size)
 
    video_reader = cv2.VideoCapture(vidpath)
 
    original_video_width = int(video_reader.get(cv2.CAP_PROP_FRAME_WIDTH))
    original_video_height = int(video_reader.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(video_reader.get(cv2.CAP_PROP_FPS))
    
    folder, shortfilename, _ = extractFolderAndFileNameFromAbsPath(vidpath)
    for dive_action_label in dive_action_labels:
        subdir = imgOutputDir+"\\"+shortfilename+"\\"+ dive_action_label 
        ensureDirectoryClean(subdir)
    
    #video_writer = cv2.VideoWriter(imgOutputDir, cv2.VideoWriter_fourcc('M', 'P', '4', 'V'), fps, (original_video_width, original_video_height))
    count=0
    image_height, image_width = 64, 64
    while True: 
        count += 1 
        status, frame = video_reader.read() 
 
        if not status:
            break
         
        # predict frame type (Takeoff/Flight/Entry)
        resized_frame = cv2.resize(frame, (image_height, image_width))
        normalized_frame = resized_frame / 255 
        predicted_labels_probabilities = temp_segment_model.predict(np.expand_dims(normalized_frame, axis = 0))[0] 
        predicted_labels_probabilities_deque.append(predicted_labels_probabilities)
 
        if len(predicted_labels_probabilities_deque) == window_size:
            predicted_labels_probabilities_np = np.array(predicted_labels_probabilities_deque)
            #print('predicted_labels_probabilities_np : ',predicted_labels_probabilities_np)
            predicted_labels_probabilities_averaged = predicted_labels_probabilities_np.mean(axis = 0)
            #print('predicted_labels_probabilities_averaged',predicted_labels_probabilities_averaged)
            predicted_label = np.argmax(predicted_labels_probabilities_averaged)
            
        else: # len(predicted_labels_probabilities_deque) < window_size
            predicted_label = np.argmax(predicted_labels_probabilities) 
        
        predicted_label_list.append(predicted_label)
        predicted_class_name = dive_action_labels[predicted_label]
        #print('predicted_class' , predicted_class_name)
        
        subdir = imgOutputDir+"\\"+shortfilename+"\\"+ predicted_class_name 
        imagename = subdir+"\\"+("frame%04d.jpg" % count)
        #print('write to ', imagename)
        cv2.imwrite(imagename, frame)

    video_reader.release()
    #video_writer.release()
    return predicted_label_list

In [12]:
#resizeFrameDim [width, height]
def createVideo(image_folder, video_folder, divephase, vidname, resizeFrame=False, resizeFrameDim=[64,64]):
    images = [img for img in os.listdir(image_folder) if img.endswith(".jpg")]
    if (len(images)==0):
        return
    frame = cv2.imread(os.path.join(image_folder, images[0]))

    height, width, layers = frame.shape
    if (resizeFrame == True):
        height = resizeFrameDim[1]
        width = resizeFrameDim[0]
    vidFullName = video_folder+'\\'+vidname+"_"+divephase+".mp4"
    print('writing video to ', vidFullName , ' framewidth ', width, ' frameheight ', height)
    fps = 25
    video = cv2.VideoWriter(vidFullName, cv2.VideoWriter_fourcc('M', 'P', '4', 'V'), fps, (width,height))

    for image in images:
        frame = cv2.imread(os.path.join(image_folder, image))
        if (resizeFrame == True):
            frame = cv2.resize(frame, (height, width))
        video.write(frame)

    cv2.destroyAllWindows()
    video.release()
    return vidFullName

In [13]:
#cleanUp flag to delete temp folder created for normalizing number of images
def createNormalizedVideos(vidpath, imgOutputDir, numImages, cleanUp=True):
    video_list = []
    folder, shortfilename, _ = extractFolderAndFileNameFromAbsPath(vidpath)
    for dive_action_label in dive_action_labels:
        subdir = imgOutputDir+"\\"+shortfilename+"\\"+ dive_action_label 
        subdirNorm = imgOutputDir+"\\"+shortfilename+"\\N"+ dive_action_label 
        ensureDirectoryClean(subdirNorm)
        index = {}
        files = os.listdir(subdir)
        numFiles = len(files)
        if (numFiles == 0):
            os.rmdir(subdirNorm)
            continue
        for file in files:
            index[file] = 1
        if (numFiles < numImages):
            diff = numImages - numFiles
            count = 0;
            while (count < diff):
                rand = random.choice(files)
                index[rand] = index[rand]+1
                count += 1
            
        elif (numFiles > numImages):
            diff = numFiles - numImages
            count = 0;
            while (count < diff):
                rand = random.choice(files)
                if (index[rand] > 0):
                    index[rand] = index[rand]-1
                    count += 1
        sortedkeys = list(index.keys())
        sortedkeys.sort()
        count = 0
        for key in sortedkeys:
            frame = cv2.imread(subdir+"\\"+key)
            for i in range(index[key]):
                count+=1
                imagename = subdirNorm+"\\"+("frame%04d.jpg" % count)
                cv2.imwrite(imagename, frame)
        #write video to directory
        vidFullName = createVideo(subdirNorm, imgOutputDir+"\\"+shortfilename, "N"+dive_action_label, shortfilename,
                                 resizeFrame=True, resizeFrameDim=[64,64])
        video_list.append(vidFullName)
        
        #cleanup
        if(cleanUp == True):
            for f in os.listdir(subdirNorm):
                os.remove(subdirNorm+"\\"+f)
            os.rmdir(subdirNorm) 
            
    return video_list
#createNormalizedVideos("C:\\Users\\Grace\\MTech Jupyter\\Intelligent Sensing Systems\\PracticeMod\\testdive.mp4", 
#                       '.\\images', 33)

In [14]:
def videosAreComplete(list_videos):
    hasTakeoff = False
    hasFlight = False
    hasEntry = False
    for video in list_videos:
        if ('Takeoff' in video):
            hasTakeoff = True
        elif ('Flight' in video):
            hasFlight = True
        elif ('Entry' in video):
            hasEntry = True
    return hasEntry and hasTakeoff and hasFlight

In [15]:
def extractI3DFeatures(vidpaths):
    global i3dextractor
    if (i3dextractor == None):
        args = OmegaConf.load(build_cfg_path('i3d'))
        args.stack_size = 32
        args.step_size = 32
        # args.extraction_fps = 25
        args.device='cpu'#force to cpu to prevent OOM error
        args.flow_type = 'raft' # 'pwc' is not supported on Google Colab (cupy version mismatch)
        i3dextractor = ExtractI3D(args)
    rgb_features = [] 
    for vidpath in vidpaths:
        print(f'Extracting for {vidpath}')
        feature_dict = i3dextractor.extract(vidpath)
        vidname = vidpath.replace('.mp4','')
        for k, v in feature_dict.items():
            if (k=='rgb'): # or k=='flow'):
                rgb_features.append(v)
    i3d_features = np.concatenate(rgb_features, axis=None)
    return i3d_features.reshape(1, 3072)
#extractI3DFeatures(['.\\images\\testdive\\testdive_NFlight.mp4',
#                     '.\\images\\testdive\\testdive_NTakeoff.mp4',
#                       '.\\images\\testdive\\testdive_NTakeoff.mp4'])

In [16]:
def predict_autoscore(vidpath, imgOutputDir):
    print('predict autoscore')
    global autoscore_model
    numImages=33
    norm_vidpaths = createNormalizedVideos(vidpath, imgOutputDir, numImages)
    # check that there are 3 videos, else decline to proceed
    if (videosAreComplete(norm_vidpaths)==False):
        print('videos are incomplete! missing either entry, flight or takeoff phase')
        return -1
    videos_features = extractI3DFeatures(norm_vidpaths)
    if (autoscore_model == None):
        autoscore_model = load_model('modelcheckpoints/fullyconnected_ID1024D512D1_0.01.hdf5')
    score = autoscore_model.predict(videos_features)
    for video in norm_vidpaths:
        os.remove(video)
    return score


In [17]:
def predict_num_somersaults(imgFolder):
    return -1

In [18]:
def predict_num_twists(imgFolder):
    return -1

In [19]:
def predict_angle_of_entry(imgFolder):
    return -1

In [20]:
def predict_splash_index(imgFolder):
    return -1

In [21]:
def predict_final_score(autoscore, numSomersaults, numTwists, angleOfEntry, splashIndex):
    return -1

In [22]:
def processVideo(vidpath):
    print('processing ', vidpath)
    folder, shortfilename, _ = extractFolderAndFileNameFromAbsPath(vidpath)
    predict_temporal_segmentation(vidpath, '.\\images')
    autoscore = predict_autoscore(vidpath, '.\\images')
    numSomersaults = predict_num_somersaults(".\\images\\"+shortfilename+"\\Flight")
    numTwists = predict_num_twists(".\\images\\"+shortfilename+"\\Flight")
    angleOfEntry = predict_angle_of_entry(".\\images\\"+shortfilename+"\\Entry")
    splashIndex = predict_splash_index(".\\images\\"+shortfilename+"\\Entry")
    final_score = predict_final_score(autoscore, numSomersaults, numTwists, angleOfEntry, splashIndex)
    print('autoscore: ', autoscore, 
          ', numSomersaults: ', numSomersaults, 
          ', numTwists: ', numTwists,
          ', angleOfEntry: ', angleOfEntry,
          ', splashIndex: ', splashIndex,
          ', final_score: ', final_score)
    return final_score, autoscore, numSomersaults, numTwists, angleOfEntry, splashIndex
    
    

In [23]:
def main():
    load_temporal_segment_model()
    load_autoscore_model()
    load_somersault_model()
    load_twist_model()
    load_angle_of_entry_model()
    load_splash_model()
    load_linear_regression_model()
    final_score, autoscore, numSomersaults, numTwists, angleOfEntry, splashIndex = processVideo(".\\testdive5.mp4")

In [24]:
main()

loading temporal segment model
loading autoscore model
loading somersault model
loading twist model
loading angle of entry model
loading splash model
loading linear regression model
processing  .\testdive5.mp4
predict autoscore
writing video to  .\images\testdive5\testdive5_NEntry.mp4  framewidth  64  frameheight  64
writing video to  .\images\testdive5\testdive5_NFlight.mp4  framewidth  64  frameheight  64
writing video to  .\images\testdive5\testdive5_NTakeoff.mp4  framewidth  64  frameheight  64
using device  cpu
Extracting for .\images\testdive5\testdive5_NEntry.mp4


  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


Extracting for .\images\testdive5\testdive5_NFlight.mp4
Extracting for .\images\testdive5\testdive5_NTakeoff.mp4
autoscore:  [[0.48972556]] , numSomersaults:  -1 , numTwists:  -1 , angleOfEntry:  -1 , splashIndex:  -1 , final_score:  -1
