In [2]:
import os
import cv2
import math
import pafy
import random
import numpy as np
import datetime as dt
import tensorflow as tf
import tensorflow_hub as hub

from moviepy.editor import *
from collections import deque
import matplotlib.pyplot as plt

%matplotlib inline

from sklearn.model_selection import train_test_split
 
from tensorflow.keras.layers import *
from tensorflow.keras.models import Sequential
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.utils import plot_model

In [3]:
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)

In [4]:
model = hub.load('https://tfhub.dev/google/movenet/multipose/lightning/1')
movenet = model.signatures['serving_default']

In [5]:
# image_height, image_width = 64,64
sequence_length = 20
 
dataset_directory = os.path.join('UCF50')
classes_list = ["Punch", "Biking", "SoccerJuggling", "HorseRace"]
 
model_output_size = len(classes_list)

In [6]:
def frames_extraction(video_path):
    # Empty List declared to store video frames
    frames_list = []
     
    # Reading the Video File Using the VideoCapture
    video_reader = cv2.VideoCapture(video_path)
    
    #get the total number of frames in the video
    video_frames_count = int(video_reader.get(cv2.CAP_PROP_FRAME_COUNT))
    skip_frames_window = max(int(video_frames_count/sequence_length),1)


    for frame_counter in range(sequence_length):
        video_reader.set(cv2.CAP_PROP_POS_FRAMES, frame_counter * skip_frames_window)
        success, frame = video_reader.read()

        if not success:
            break
        
        img = frame.copy()
        img = cv2.resize(img,(320,240))
        img = tf.image.resize_with_pad(tf.expand_dims(img, axis=0), 192,256)
        input_img = tf.cast(img, dtype=tf.int32)

        results = movenet(input_img)
        ROIs = results['output_0'].numpy()[:,:,51:]
        keypoints_with_scores = results['output_0'].numpy()[:,:,:51].reshape((6,17,3))
        normalized_points = keypoints_with_scores.copy()

        # print(ROIs)
        k=0
        while True:
            if ROIs[0][k][-1]>0.1:
                normalized_points[k,:,0] = normalized_points[k,:,0]-ROIs[0][k][0]
                normalized_points[k,:,1] = normalized_points[k,:,1]-ROIs[0][k][1]
            else:
                normalized_points[k] = normalized_points[np.random.randint(k+1)]
            k+= 1
            if k==6:
                break
        # print('methanata')
        frames_list.append(normalized_points.reshape(6*17*3))
     
    video_reader.release()
 
    return frames_list

In [7]:
def create_dataset():
 
    # Declaring Empty Lists to store the features and labels values.
 
    features = []
    labels = []
    video_file_paths = []
     
    # Iterating through all the classes mentioned in the classes list
    for class_index, class_name in enumerate(classes_list):
        print(f'Extracting Data of Class: {class_name}')
         
        # Getting the list of video files present in the specific class name directory
        files_list = os.listdir(os.path.join(dataset_directory, class_name))
 
        # Iterating through all the files present in the files list
        for file_name in files_list:
 
            # Construct the complete video path
            video_file_path = os.path.join(dataset_directory, class_name, file_name)
 
            # Calling the frame_extraction method for every video file path
            frames = frames_extraction(video_file_path)
 
            
            if len(frames) == sequence_length:
                features.append(frames)
                labels.append(class_index)
                video_file_paths.append(video_file_path)
    features = np.array(features)
    labels = np.array(labels)
 
    return features, labels, video_file_paths

In [9]:
features, labels, video_file_paths = create_dataset() 


Extracting Data of Class: Punch
Extracting Data of Class: Biking
Extracting Data of Class: SoccerJuggling
Extracting Data of Class: HorseRace


In [10]:
one_hot_encoded_labels = to_categorical(labels)
seed_constant = 23


In [11]:
features_train, features_test, labels_train, labels_test = train_test_split(features, one_hot_encoded_labels, test_size = 0.25, shuffle = True, random_state = seed_constant)

In [12]:
print(features_train.shape)
labels_train.shape

(441, 20, 306)


(441, 4)

In [13]:
train_set_X,test_set_X=features_train,features_test
def get_lnumber_of_data(train_set_Y,test_set_Y,):
    number_of_train_samples=train_set_Y.shape[1] #Assuming one coloumn equals to one data set
    number_of_test_samples=test_set_Y.shape[1]
    
    return(number_of_train_samples,number_of_test_samples)

def flatten_X_data(train_set_X,test_set_X):
    train_X_flatten=train_set_X.reshape(train_set_X.shape[0],-1).T
    test_X_flatten=test_set_X.reshape(test_set_X.shape[0],-1).T
    
    return(train_X_flatten,test_X_flatten)

In [14]:
train_X_flatten,test_X_flatten=flatten_X_data(train_set_X,test_set_X)


faltten input data shape= (n,m)
m= number of data sets
n=number of features in one data set


In [19]:
def laye_size():
    input_layer_size=train_X_flatten.shape[0]
    #hidden_layer_1_Size=
    #hidden_layer_2_size=
    #.....
    output_layer_size=2 #number of final outputs

    return(input_layer_size,output_layer_size)


In [15]:
def init_parameters(layers_dims):
    np.random.seed(3)
    parameters = {}
    L = len(layers_dims) - 1 # integer representing the number of layers
     
    for l in range(1, L + 1):
       
        parameters['W'+str(l)]=np.random.randn(layers_dims[l],layers_dims[l-1])*np.sqrt((2./layers_dims[l-1]))
        parameters['b'+str(l)]=np.zeros((layers_dims[l],1))
   
    return parameters

In [16]:
def Linear_forward(A,W,b):
    Z=np.dot(W,A)+b
    cache=(A,W,b)
    return Z,cache

In [17]:
def sigmoid(Z):
    return 1/(1+np.exp(-Z)) ,Z

def relu(Z):
    return np.maximum(0,Z),Z

def relu_backward(dA, cache):
    Z = cache
    dZ = np.array(dA, copy=True) 
    dZ[Z <= 0] = 0
    assert (dZ.shape == Z.shape)
    return dZ
def sigmoid_backward(dA, cache):
    Z = cache
    s = 1/(1+np.exp(-Z))
    dZ = dA * s * (1-s)
    assert (dZ.shape == Z.shape)
    return dZ


In [18]:
def linear_activation_forward(A_prev,W,b,Activation):
    if Activation=="sigmoid":
        Z,Linear_cache=Linear_forward(A_prev,W,b)
        A_new ,activation_cache=sigmoid(Z)
    elif Activation=="relu":
        Z,Linear_cache=Linear_forward(A_prev,W,b)
        A_new ,activation_cache=relu(Z)
    cache=(Linear_cache,activation_cache)
    return A_new,cache

In [19]:
def Forward_propagation(X,parameters):
    caches=[]
    A=X
    L=len(parameters)//2
    for l in range(1,L):
        A_prev=A
        A,cache=linear_activation_forward(A_prev,parameters["W"+str(l)],parameters["b"+str(l)],Activation="relu")
        caches.append(cache)
    AL,cache=linear_activation_forward(A,parameters["W"+str(L)],parameters["b"+str(L)],Activation="sigmoid")
    caches.append(cache)
    assert(AL.shape==(4,X.shape[1]))
    return AL,caches

In [20]:
def compute_cost(AL,Y,parameters,lambd):
    m=Y.shape[1]
    L=len(parameters)//2
    reg_value=0
    for l in range(1,L+1):
        reg_value=reg_value+np.sum(np.square(parameters["W"+str(l)]))
    
    cost=(-1/m)*np.sum(np.multiply(Y,np.log(AL))+np.multiply((1-Y),np.log(1-AL)))+reg_value*(lambd/(2*m))
    cost=np.squeeze(cost)
    return cost

In [65]:
def linear_backward(dZ,cache,lambd):
    A_prev,W,b=cache
    m=A_prev.shape[1]

    dW=(dZ@A_prev.T)/m+(lambd/m)*W
    db=np.sum(dZ,axis=1,keepdims=True)/m
    dA_prev=np.dot(W.T,dZ)
    return dA_prev,dW,db

def linear_activation_backward(dA,cache,activation,lambd):
    linear_cache,activation_cache=cache
    if activation=="relu":
        dZ=relu_backward(dA,activation_cache)
    elif activation=="sigmoid":
        dZ=sigmoid_backward(dA,activation_cache)
    dA_prev,dW,db=linear_backward(dZ,linear_cache,lambd)

    return dA_prev,dW,db

In [66]:
def Backward_propagation(AL,Y,caches,lambd):
    grads={}
    L=len(caches)
    m=AL.shape[1]
    #print(Y.shape)
    #Y=Y.rashape(AL.shape)
    dAL=-(np.divide(Y,AL)-np.divide(1-Y,1-AL))
    #print(AL.shape)
    current_cache=caches[-1]
    grads["dA"+str(L-1)],grads["dW"+str(L)],grads["db"+str(L)]=linear_activation_backward(dAL,current_cache,"sigmoid",lambd)
                            
    for l in reversed(range(L-1)):
        current_cache=caches[l]
        da_prev_temp,dW_temp,db_temp=linear_activation_backward(grads["dA"+str(l+1)],current_cache,"relu",lambd)
        grads["dA"+str(l)]=da_prev_temp
        grads["dW"+str(l+1)]=dW_temp                             
        grads["db"+str(l+1)]=db_temp                                                                      
                                                                   
    return grads            
                                                    

In [67]:
def update_params(params,grads,learning_rate):
    parameters=params.copy()
    L=len(parameters)//2
    for l in range(1,L):
        parameters["W"+str(l)]=parameters["W"+str(l)]-learning_rate*grads["dW"+str(l)]
        parameters["b"+str(l)]=parameters["b"+str(l)]-learning_rate*grads["db"+str(l)]
    return parameters

In [68]:
Layer_dims=[train_X_flatten.shape[0],1000,100,20,10,labels_train.T.shape[0]]#should change the values


In [69]:
def model(X,Y,Layer_dims,learning_rate,number_of_iterations,lambd):
    costs=1
    params=init_parameters(Layer_dims)
    for i in range(0,number_of_iterations):
        AL,caches= Forward_propagation(X,params)
        cost=compute_cost(AL,Y,params,lambd)
        grads=Backward_propagation(AL,Y,caches,lambd)
        params=update_params(params,grads,learning_rate)
        if i%100==0:
            print(cost)
    return params

In [71]:
parameters1=model(train_X_flatten,labels_train.T,Layer_dims,0.075,5000,0.7)

4.743341777119012
4.012086169784755
3.233412948623884
2.443088614944114
2.383186757535256
2.711255315439671
1.7324315463309723
1.6223788845708957
1.5727783445490071
1.5346216665112773
1.49877096928897
1.4641821567018918
1.4305744191469716
1.3978348954183935
1.365922691146283
1.3347800321039314
1.3044002769854013
1.274743834549128
1.24579055881869
1.2175252485795964
1.1899255578013368
1.1629797956524144
1.13667344042926
1.1110072919373075
1.085940915547013
1.061483740754761
1.0376051996578821
1.0142906077051814
0.9915287572841978
0.9693154647608666
0.9476132981374179
0.926432713030166
0.9057514839520295
0.8855627623591611
0.8658522922593382
0.8465989964427388
0.8278131544313101
0.8094586761035352
0.7915430936865728
0.7740434146341824
0.7569593685087354
0.740279364113638
0.7239952223795042
0.7081033672037845
0.6925739652684033
0.6774139962913038
0.662615372093345
0.6481643137460655
0.634053363550355
0.6202770536400265


In [None]:
def predict(X, y, parameters):
    """
    This function is used to predict the results of a  L-layer neural network.

    Arguments:
    X -- data set of examples you would like to label
    parameters -- parameters of the trained model

    Returns:
    p -- predictions for the given dataset X
    """

    m = X.shape[1]
    n = len(parameters) // 2 # number of layers in the neural network
    p = np.zeros((y.shape[0],y.shape[1]))

    # Forward propagation
    probas, caches = Forward_propagation(X, parameters)


    # convert probas to 0/1 predictions
    for r in range(0,m):
        max_number=np.max(probas[:,r])
        pos=np.where(probas[:,r]==max_number)[0][0]
        p[pos,r]=1
    print("Accuracy: "  + str(np.sum((p == y)/m)))

    return p