# Importing Libraries

In [16]:
import tensorflow as tf
import numpy as np
import cv2
import os
from google.colab.patches import cv2_imshow
import random
import keras
import sys
from keras.layers import Lambda, Flatten, Dense, Layer, ConvLSTM2D, Input, Dropout
from keras.models import Sequential, Model
import matplotlib.pyplot as plt
%matplotlib inline

# Video Generator

In [17]:
class VideoGenerator:
  '''
    for generating triplets required for loss function

    Parameters:
      'path': str
        path to main folder containing subfolders of videos of each class in their separate folder
      
      'seq_len': int
        number of frames to be picked from each video. total frames = seq_len + 1
      
      'resize' : tuple (width, height)
        frame will be resized to this shape

      'batch': int
        number of videos in each batch

      'split_ratio' : tuple of int or float (train_size_percent, valid_size_percent, test_size_percent) Default = (60,20,20) (optional)
        ratio in which data will be split. The sum of the values must be equal to 100. 

    '''
  def __init__(self, path, seq_len, resize, split_ratio=(60,20,20)):
    self.path = path
    self.classes = [c for c in os.listdir(self.path) if c[0]!='.']
    self.classes.sort()
    
    self.noOfClasses = -1
    self.class_label =[]
    self.seq_len =seq_len
    self.resize =resize
    self.noOfTriplets = 0
    self.data = []
    self.generate_paths_triplet()

    #Splits the dataset
    if len(split_ratio) != 3:
      sys.exit("Invalid Values provided for parameter 'split_ratio'. It can take can only three values as tuple (train, valid, test)")
    
    elif np.sum(split_ratio) != 100:
      sys.exit("Invalid Values provided for parameter 'split_ratio'. train, valid and test must add up to 100.")
    
    else:
      self.train_data, self.valid_data, self.test_data = self.SplitData(split_ratio)

  def SplitData(self, split_ratio):
      '''
      Splits the dataset and returns training, validation and test dataset, each as tuple of (data, labels):
      
      Parameters:
      
      'split_ratio' : tuple of int or float (train_size_percent, valid_size_percent, test_size_percent)
            ratio in which data will be split. The sum of the values must be equal to 100. 
      
      '''
      #Shuffle data    
      np.random.shuffle(self.data)
      
      #Split data
      train_data, valid_data, test_data = np.split(self.data, [int((split_ratio[0]/100)*len(self.data)), int(((split_ratio[0]+split_ratio[1])/100)*len(self.data))])
        
      return train_data, valid_data, test_data

  def generate_random_number (self, low, high):
    '''
    retrun random integer number between low and high.
    
    Parameters:
      'low': int (inclusive)
      'high':int (exlcusive)
    '''
    return np.random.randint(low, high)

 
  def generate_paths_triplet(self):
    ''' 
    Generator : generates paths to triplets as (positive, anchor, positive) 
    '''
    for index, label in enumerate(self.classes):
      n=os.path.join (self.path, label)

      videos = [vid for vid in os.listdir(n) if vid[0]!='.']

      while len(videos) >=2:

        #select anchor and positive video path
        anchor = os.path.join(n, videos.pop(self.generate_random_number(0, len(videos))))
        positive = os.path.join(n, videos.pop(self.generate_random_number(0, len(videos))))

        #select negative video path
        temp = self.classes [:]
        temp.pop(index)

        neg_label =temp[self.generate_random_number(0, len(temp))]

        temp2 = [t for t in os.listdir(os.path.join(self.path, neg_label)) if t[0] != '.']

        negative = os.path.join(os.path.join(self.path, neg_label), temp2[self.generate_random_number(0, len(temp2))])

        self.data.append( [(positive, index), (anchor, index), (negative, self.classes.index(neg_label))])
      
      self.total_videos = len(self.data)
  
  def load_video (self, video_path):
    '''
      selects specific frames using 'seq_len' in a video, resizes it, maps it between 0 and 1 
      and retruns the result as (seq_len, height, width, channel)


    '''
    video = np.zeros((self.seq_len+1, self.resize[1], self.resize[0], 3))
    cap = cv2.VideoCapture(video_path)
      
    if (cap.isOpened()== False):
      sys.exit("Error opening video")
    
    #determine step size 
    noOfFrames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    step = int(noOfFrames/self.seq_len)


    if noOfFrames == 0:
      self.noFrame_counter +=1
    else:
    
      count =0 
      while 1:

        #read specific frames
        cap.set(cv2.CAP_PROP_POS_FRAMES, step*count)
        ret, frame =cap.read()
        
        if ret == False or count > self.seq_len:   
          break
        
        else:
          frame =cv2.resize(frame, self.resize)
          video[count, :,:,:]=frame/255 #map between 0 and 1 values

          count +=1  
    return video

  def generate_video(self, flag, batch):
    '''
    Generator: for batch of videos with shape (batch, 3, seq_len+1, height, width, channels)
    and one hot encoded labels with shape(batch, total number of classes)
    
    Parameters:

       'batch': int
        number of videos in each batch
      
      'flag' : str = 'train', 'valid', or 'test'
        Specifies from which dataset to load the images
  '''

    self.class_label =  [[c, i] for i, c in enumerate(self.classes)]
    self.noOfClasses = len(self.classes)

    self.noOfTriplets = 0
    X_pos = np.zeros((batch, self.seq_len+1, self.resize[1], self.resize[0], 3))
    X_anch = np.zeros((batch, self.seq_len+1, self.resize[1], self.resize[0], 3))
    X_neg = np.zeros((batch, self.seq_len+1, self.resize[1], self.resize[0], 3))
    Y = np.zeros((batch, 3))
    self.noFrame_counter=0
    
    if flag == 'train':
      triplets = self.train_data

    elif flag == 'valid':
      triplets = self.valid_data

    elif flag == 'test':
      triplets =self.test_data

    else:
      sys.exit("Invalid value provided for parameter 'flag'. It can either be 'train', 'valid' or 'test'")
    
    while 1:
      
      for (positive, positive_label), (anchor, anchor_label), (negative, negative_label) in triplets:

        X_pos[self.noOfTriplets, :,:,:,:]=self.load_video(positive)
      
        X_anch[self.noOfTriplets, :,:,:,:]=self.load_video(anchor)
        
        X_neg[self.noOfTriplets, :,:,:,:]=self.load_video(negative) 
        
        #If no frame exist in a video, 
        if self.noFrame_counter != 0:
          self.noOfTriplets -= 1
          self.noFrame_counter = 0

        
        #generate and return batch of videos
        if self.noOfTriplets == batch-1:
          yield (X_pos, X_anch, X_neg), Y
          self.noOfTriplets = 0
          X_pos = np.zeros((batch, self.seq_len+1, self.resize[1], self.resize[0], 3))
          X_anch = np.zeros((batch, self.seq_len+1, self.resize[1], self.resize[0], 3))
          X_neg = np.zeros((batch, self.seq_len+1, self.resize[1], self.resize[0], 3))

        else:
          self.noOfTriplets +=1

In [18]:
train = VideoGenerator('/content/drive/MyDrive/RefinedData', 10, (32,18))
valid = VideoGenerator('/content/drive/MyDrive/RefinedData', 10, (32,18))
test = VideoGenerator('/content/drive/MyDrive/RefinedData', 10, (32,18))


# Triplet Loss Function

In [20]:
class TripletLossLayer(Layer):
    def __init__(self, alpha, **kwargs):
        self.alpha = alpha
        super(TripletLossLayer, self).__init__(**kwargs)
    
    def triplet_loss(self, inputs):
        positive, anchor, negative = inputs
        p_dist = tf.keras.backend.sum(tf.keras.backend.square(anchor-positive), axis=-1)
        n_dist = tf.keras.backend.sum(tf.keras.backend.square(anchor-negative), axis=-1)
        return tf.keras.backend.sum(tf.keras.backend.maximum(p_dist - n_dist + self.alpha, 0), axis=0)
    
    def call(self, inputs):
        loss = self.triplet_loss(inputs)
        self.add_loss(loss)
        return loss

# Model

In [21]:
input_shape = (10, 18,32, 3)

In [22]:
positive = Input(input_shape, name='positive')
anchor = Input(input_shape, name='anchor')
negative = Input(input_shape, name='negative')

model = Sequential()
model.add(ConvLSTM2D(filters = 16, kernel_size = (3, 3), return_sequences = False, data_format = "channels_last", input_shape =input_shape))
model.add(Flatten())
model.add(Dense(1024, activation="relu"))
model.add(Dropout(0.5))
model.add(Dense(256, activation=None ))
model.add(Lambda(lambda x: tf.math.l2_normalize(x, axis=1)))

#generate embeddings
emb_pos = model(positive)
emb_anch = model(anchor)
emb_neg = model(negative)

loss_layer = TripletLossLayer(0.2, name='triplet_loss_layer')([emb_pos, emb_anch, emb_neg])
AdNet = Model(inputs=[positive, anchor, negative], outputs = loss_layer)

In [23]:
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv_lst_m2d_2 (ConvLSTM2D)  (None, 16, 30, 16)        11008     
_________________________________________________________________
flatten_2 (Flatten)          (None, 7680)              0         
_________________________________________________________________
dense_3 (Dense)              (None, 1024)              7865344   
_________________________________________________________________
dropout_1 (Dropout)          (None, 1024)              0         
_________________________________________________________________
dense_4 (Dense)              (None, 256)               262400    
_________________________________________________________________
lambda_1 (Lambda)            (None, 256)               0         
Total params: 8,138,752
Trainable params: 8,138,752
Non-trainable params: 0
____________________________________________

In [24]:
AdNet.summary()

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
positive (InputLayer)           [(None, 10, 18, 32,  0                                            
__________________________________________________________________________________________________
anchor (InputLayer)             [(None, 10, 18, 32,  0                                            
__________________________________________________________________________________________________
negative (InputLayer)           [(None, 10, 18, 32,  0                                            
__________________________________________________________________________________________________
sequential_2 (Sequential)       (None, 256)          8138752     positive[0][0]                   
                                                                 anchor[0][0]               

# Training

In [25]:
AdNet.compile(optimizer=keras.optimizers.Adam(0.001))

In [None]:
history = AdNet.fit(train.generate_video( 'train', 10 ), validation_data=valid.generate_video('valid', 10), steps_per_epoch=train.total_videos//10, epochs= 2, verbose=1 )

Epoch 1/2

# Save Model

In [None]:
AdNet.save('AdNet.h5')
model.save('model.h5')

# Prediciton


In [None]:
results = model.predict(test.generate_video('test', 10))
print(results)

# Learning Curves

In [19]:
plt.plot(history.history['loss'])
plt.xlabel('epoch')
plt.ylabel('loss')
plt.show()

NameError: ignored

# Database

In [None]:
  def load_video (seq_len, resize, video_path):
    '''
      selects specific frames using 'seq_len' in a video, resizes it, maps it between 0 and 1 
      and retruns the result as (seq_len, height, width, channel)


    '''
    video = np.zeros((seq_len+1, resize[1], resize[0], 3))
    cap = cv2.VideoCapture(video_path)
      
    if (cap.isOpened()== False):
      sys.exit("Error opening video")
    
    #determine step size 
    noOfFrames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    step = int(noOfFrames/seq_len)


    if noOfFrames == 0:
      continue
    else:
    
      count =0 
      while 1:

        #read specific frames
        cap.set(cv2.CAP_PROP_POS_FRAMES, step*count)
        ret, frame =cap.read()
        
        if ret == False or count > seq_len:   
          break
        
        else:
          frame =cv2.resize(frame, resize)
          video[count, :,:,:]=frame/255 #map between 0 and 1 values

          count +=1  
    return video

In [None]:
#make folder
!mkdir /content/drive/MyDrive/Database

#store paths
database = /content/drive/MyDrive/Database
path ='/content/drive/MyDrive/RefinedData'


for label in os.listdir(path):
  
  n=os.path.join(path, label)
  
  #pick random exmple from a class  
  f = [vid for vid in os.listdir(n)]
  x = np.random.randint(len(f))
  example = os.path.join(n, f[x])

  
  out = model.predict(load_video(example))
  
  filename = database + '/' + label + '.npy'  
  with open(filename, 'wb') as file:
 
    np.save(file, out) #save feature vector