<a href="https://colab.research.google.com/github/Suaif/Video_descriptor/blob/master/OurOwnFunctions.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In order to have our code more organized, we decided to create this document `OurOwnFunctions.ipynb.`

This way we can implement this document as a module in the main document  `main.ipynb.`

---

**List of implemented functions:**

* `slice_video`
* `padding`
* `int2str`
* `clean_text`
* `decode_sequence`




In [None]:
# Necessary modules to use the functions

import cv2
import os

import numpy as np
import re    as re

from keras.preprocessing.sequence import pad_sequences

In [None]:
def slice_video(video_id, pathIn, pathOut, height=240, width=240, n_frames=6, format='.avi'):
 
  '''
  Given a video gets 'n_frames' images and save them in 'pathOut'.
  Image size will be (height, width)
 
  -
  Parameters:
    video_id         (string)  [VideoID]
    pathIn           (string)  [Videos filepath]
    pathOut          (string)  [Save filepath]
    height           (int)     [Frame size 1]
    width            (int)     [Frame size 2]
    n_frames         (int)     [Number of frames to obtain]
    format           (string)  [Video format '.*' ]
  '''

  # -- Parameters
  vidcap     = cv2.VideoCapture(pathIn + video_id + format)
  max_frames = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))

  num_img, count = 1, 1
  dim = (width, height)

  while max_frames % n_frames != 0:
     max_frames -= 1

  n_fps = max_frames / n_frames
  
  while vidcap.isOpened and count <= max_frames:

    vidcap.set(cv2.CAP_PROP_FPS, 1)    
    success,image = vidcap.read()
    
    # Resize of the image. 
    resized = cv2.resize(image, dim, interpolation = cv2.INTER_AREA)

    if count % n_fps == 0:
     
      # We create the route if it doesn't exist
      if not os.path.exists(pathOut):  
        os.mkdir(pathOut) 
     
      # Save the resized image in the given route
      cv2.imwrite(pathOut + video_id + '_' + str(num_img) + ".jpg", resized)
      print(pathOut + video_id + '_' + str(num_img) + ".jpg")
      num_img += 1
    count += 1

In [None]:
def padding(Y, maxlen=0, pad='post'):

  '''
  Fill in zeros in the remaining spaces of a ndarray.
  Also adds an extra 0 at the end
  -
  Parameters:
    Y       (list)    [Descriptions tokenized]
    maxlen  (int)     [Max length we want to fill with zeros, extra 0 will be added to this length]
    pad     (string)  [Padding type, see: tf.keras.preprocessing.sequence.pad_sequences]
  '''

  Yt = np.asarray(Y)
  Yt = Yt[:,np.newaxis]

  if maxlen == 0:
    
    maxlen = max([len(Yt[i].tolist()[0]) for i in range(len(Yt))]) 

  Ypad = np.zeros((len(Yt), maxlen+1))

  for i in range(len(Yt)):

    Ypad[i,:-1] = pad_sequences(Yt[i], maxlen=maxlen,  padding=pad, value=0.0)

  return Ypad

In [None]:
def int2str(sec, tokenizer):

  '''
  Convert a list of integers to a list of words.

  -
  Parámetros:
    sec        (list)        [List which will be converted to words]
    tokenizer  (dictionary)  [Contains the dictionary with the words assignations]
  '''

  # If the input secuence is a list, we convert it to an array

  if type(sec) is list:
    sec = np.asarray(sec)

  # If there is only one sequence we add another dimension

  if len(sec.shape) == 1:
    sec = sec[np.newaxis, :]

  nsecs, nints = sec.shape

  sentences = np.zeros((nsecs), dtype=object) 

  for i in range(nsecs):
    sentences[i] = sec[i].astype(int).tolist()

  return tokenizer.sequences_to_texts(sentences)

In [None]:
def clean_text(text):

  '''
  Clean the text removing unnecessary characters.
  
  -
  Parameters:
    text       (string) [Text to clean]
  '''

  text = text.lower()

  text = re.sub(r"i'm", "i am", text)
  text = re.sub(r"he's", "he is", text)
  text = re.sub(r"she's", "she is", text)
  text = re.sub(r"it's", "it is", text)
  text = re.sub(r"that's", "that is", text)
  text = re.sub(r"what's", "that is", text)
  text = re.sub(r"where's", "where is", text)
  text = re.sub(r"how's", "how is", text)
  text = re.sub(r"\'ll", " will", text)
  text = re.sub(r"\'ve", " have", text)
  text = re.sub(r"\'re", " are", text)
  text = re.sub(r"\'d", " would", text)
  text = re.sub(r"\'re", " are", text)
  text = re.sub(r"won't", "will not", text)
  text = re.sub(r"can't", "cannot", text)
  text = re.sub(r"n't", " not", text)
  text = re.sub(r"n'", "ng", text)
  text = re.sub(r"'bout", "about", text)
  text = re.sub(r"'til", "until", text)
  text = re.sub(r"[-()\"#/@;:<>{}`+=~|.!?,]", "", text)
      
  return text

In [None]:
def decode_sequence(input_seq, encoder_model, decoder_model, max_sentence_len):

  '''
  Uses the encoder and the decoer models to get the full description secuence
  
  -
  Parameters:
    input_seq         (array) [Frames sequence]
    encoder_model     (model) [Model to process frames and get states vector]
    decoder_model     (model) [Model to predict next word of the sequence]
    max_sentence_len  (int)   [Maximum length of the decoded sequence]
  '''

  
  # The encoder model process the frames
  states_value = encoder_model.predict(input_seq)

  # First character of the target sequence is the start character
  # 2 is the token that corresponds to the BOS character

  target_seq = [[2]]
  target_seq = np.asarray(target_seq)

  # To simplify, here we assume a batch of size 1.
  stop_condition = False
  decoded_sentence = []

  while not stop_condition:

    # Now, the decoder model predict the next word
    output_tokens, h, c = decoder_model.predict([target_seq] + states_value)

    # Sample a token
    sampled_token_index = np.argmax(output_tokens)
    decoded_sentence.append(sampled_token_index) #sampled_char

    # Exit condition: either hit max length or find stop character.
    # 3 is the token that corresponds to the EOS character

    if (len(decoded_sentence) > max_sentence_len) or (sampled_token_index==3):
        stop_condition = True

    # Update the target sequence (of length 1).    
    target_seq = [[sampled_token_index]]
    target_seq = np.asarray(target_seq)

    # Update states
    states_value = [h, c]

  return decoded_sentence