In [1]:
#1: Load model_weights and create caption a frame function
from keras.applications.resnet50 import ResNet50, preprocess_input, decode_predictions
from keras.preprocessing.sequence import pad_sequences
from keras.preprocessing import image
from keras.models import load_model, Model

import matplotlib.pyplot as plt
import pickle
import numpy as np

import warnings
warnings.filterwarnings("ignore")



model = load_model("./model_weights/model_9.h5")
model._make_predict_function()

model_temp = ResNet50(weights="imagenet", input_shape=(224,224,3))

# Create a new model, by removing the last layer (output layer of 1000 classes) from the resnet50
model_resnet = Model(model_temp.input, model_temp.layers[-2].output)
model_resnet._make_predict_function()


    
# Load the word_to_idx and idx_to_word from disk

with open("./storage/word_to_idx.pkl", "rb") as w2i:
    word_to_idx = pickle.load(w2i)

with open("./storage/idx_to_word.pkl", "rb") as i2w:
    idx_to_word = pickle.load(i2w)
    

max_len = 35


def preprocess_image(img):
    img = image.load_img(img, target_size=(224,224))
    img = image.img_to_array(img)
    img = np.expand_dims(img, axis=0)
    img = preprocess_input(img)
    return img

def encode_image(img):
    img = preprocess_image(img)
    feature_vector = model_resnet.predict(img)
    feature_vector = feature_vector.reshape(1, feature_vector.shape[1])
    return feature_vector



def predict_caption(photo):
    in_text = "startseq"

    for i in range(max_len):
        sequence = [word_to_idx[w] for w in in_text.split() if w in word_to_idx]
        sequence = pad_sequences([sequence], maxlen=max_len, padding='post')

        ypred =  model.predict([photo,sequence])
        ypred = ypred.argmax()
        word = idx_to_word[ypred]
        in_text+= ' ' +word

        if word =='endseq':
            break


    final_caption =  in_text.split()
    final_caption = final_caption[1:-1]
    final_caption = ' '.join(final_caption)

    return final_caption




def caption_this_image(input_img): 

    photo = encode_image(input_img)
    

    caption = predict_caption(photo)
    # keras.backend.clear_session()
    return caption

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where




In [2]:
#test function
caption_this_image('E:/AI/Flicker8k_Dataset/1000268201_693b08cb0e.jpg')

'woman in red shirt is sitting on bench'

In [3]:
# importing the necessary libraries 
import cv2 
import os
import pyodbc

#get the video's path and get video name i.e: D:/video/name.mp4 -> name
def get_video_name(input_video, suffix):
    end = int(input_video.rindex(suffix))
    start = int(input_video.rindex('/')) + 1
    video_name = input_video[start:end]
    return video_name

#calculate the similar rate between 2 strings ie: string1 = 'dog running in the grass', string2 = 'dog is running on the grass'
#>>>same_rate(string1, string2) >>> 0.8
def same_rate(prev, cur):
    temp = cur.split(' ')
    same = list(filter(lambda x: (prev.find(x) > -1) , temp))
    rate = len(same)/len(prev.split(' '))
    return rate

#initialize the connection to SQL server
conn = pyodbc.connect('Driver={SQL Server};'
                      'Server=LAPTOP-OELBU86R\SQLEXPRESS;'
                      'Database=CaptionImage;'
                      'Trusted_Connection=yes;')
#define the cursor in pyodbc
cursor = conn.cursor()

#generate captions from video and optimize them, save them to server SQL
def caption_this_video(input_video, similar_rate):
    #create temporary folder called data to save the temporary caption image
    try:
        #creating a folder named data
        if not os.path.exists('data'):
            os.mkdir('data')
    except OSError:
        print('Error: Existed folder!')

    #delete the previous data in Caption Table
    delete_all_records = '''truncate table Caption'''
    cursor.execute(delete_all_records)
    conn.commit()    
    
    cap = cv2.VideoCapture(input_video)
    frame_number = cap.get(cv2.CAP_PROP_FRAME_COUNT)
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    seconds = float(frame_number / fps)
    duration = int(seconds*1000)
    print('frame num = ' + str(frame_number) + ' fps = ' + str(fps) + ' duration = ' + str(duration) + 'ms')
    video_name = get_video_name(input_video, '.mp4')
    print('Video name: ' + video_name + '.mp4')
    count = 0
    start = 0
    stop = 1
    prev_caption = ''
    #Insert query
    insert_records = '''INSERT INTO Caption(Start, Stop, Decription) VALUES(?,?,?)'''
    while(True):
        ret, frame = cap.read()
        if ret:
            cap.set(cv2.CAP_PROP_POS_MSEC, (count*500))
            temp_path = './data/' + video_name + '.jpg'
            cv2.imwrite(temp_path, frame)
            current_caption = caption_this_image(temp_path)
            r = same_rate(current_caption, prev_caption)
            if count == 0:
                prev_caption = current_caption
            elif (count > 0) and (r >= similar_rate):
                delta = duration-(count*500)
                if (delta < 500 and delta > 0):
                    cursor.execute(insert_records, str(start*500) + 'ms' , str(duration) + 'ms', current_caption)
                    conn.commit()
                    print('Done')
                #else:
                    #print('skip-------------------------------------------------------' + str(count))
            else:
                stop = count
                cursor.execute(insert_records, str(start*500) + 'ms' , str((stop)*500) + 'ms', prev_caption)
                conn.commit()
                start = stop
                prev_caption = current_caption
                
            os.remove(temp_path)
            count += 1
        else:
            print('DONE 100%')
            break
    cap.release()
    cv2.destroyAllWindows()

In [15]:
#run this function to get the db
#input_video = 'D:/University/cat2.mp4'
input_video = 'D:/Adobe/Output/demo3.mp4'
caption_this_video(input_video,0.6)

frame num = 2868.0 fps = 30 duration = 95600ms
Video name: demo3.mp4
Done
DONE 100%


In [3]:
import pyodbc
#initialize the connection to SQL server
conn = pyodbc.connect('Driver={SQL Server};'
                      'Server=LAPTOP-OELBU86R\SQLEXPRESS;'
                      'Database=CaptionImage;'
                      'Trusted_Connection=yes;')
#define the cursor in pyodbc
cursor = conn.cursor()
#view all of the records in Caption table
select_record = '''SELECT * FROM Caption'''
cursor.execute(select_record)
for row in cursor:
    print(row)

('demo2_30fps15533mslong                            ', '0ms       ', '3000ms    ', 'dog jumping in the air')
('demo2_30fps15533mslong                            ', '3000ms    ', '6500ms    ', 'two dogs are playing in the snow')
('demo2_30fps15533mslong                            ', '6500ms    ', '7000ms    ', 'dog jumping in the air')
('demo2_30fps15533mslong                            ', '7000ms    ', '9000ms    ', 'two dogs are playing with tennis ball in the grass')
('demo2_30fps15533mslong                            ', '9000ms    ', '11000ms   ', 'dog is jumping up in the air')
('demo2_30fps15533mslong                            ', '11000ms   ', '15000ms   ', 'two dogs are playing with tennis ball in the grass')
('demo2_30fps15533mslong                            ', '15000ms   ', '15533ms   ', 'woman in red jacket is sitting on the street')
('demo_30fps59433mslong                             ', '0ms       ', '2500ms    ', 'woman with long hair and woman in black dress are sitting i

In [17]:
"\\dob.mp4".replace("\\", "/")

'/dob.mp4'

In [10]:
#[NOTICE]this is the caption video function had the progress details
def caption_this_video_details(input_video, similar_rate):
    try:
        #creating a folder named data
        if not os.path.exists('data'):
            os.mkdir('data')
    except OSError:
        print('Error: Existed folder!')

    video_to_capture = cv2.VideoCapture(input_video)
    video_name = get_video_name(input_video, '.mp4')
    print('Video name: ' + video_name + '.mp4')
    count = 0
    prev_caption = ''
    while(True):
        ret, frame = video_to_capture.read()
        if ret:
            video_to_capture.set(cv2.CAP_PROP_POS_MSEC, (count*500))
            temp_path = './data/' + video_name + '.jpg'
            cv2.imwrite(temp_path, frame)
            current_caption = caption_this_image(temp_path)
            r = same_rate(current_caption, prev_caption)
            if count == 0:
                print('Start at: '  + str(count*500) + 'ms :' + current_caption)
                print('-------------------------------------------------------' + str(count))
                prev_caption = current_caption
            elif (count > 0) and (r >= similar_rate):
                print('Skip ' + str(count*500) + ' ms : similar_rate = ' + str(r*100) +'%')
                print('-------------------------------------------------------' + str(count))
            else:
                print((count > 0) and (r >= similar_rate))
                print('current: ' + current_caption)
                print('prev: ' + prev_caption)
                print('similar_rate =' + str(same_rate(current_caption, prev_caption)*100) + '%')
                print(str(count*500) + 'ms :' + current_caption)
                print('-------------------------------------------------------' + str(count))
                prev_caption = current_caption
                
            os.remove(temp_path)
            count += 1
        else:
            break
    video_to_capture.release()
    cv2.destroyAllWindows()

#caption_this_video_details('D:/University/cat2.mp4', 0.6)
#caption_this_video_details('C:/\DeployImageCaptioning/cat.mp4', 0.6)

In [6]:
import re
from functools import reduce
#split function: split a list into list of list by a element inside the list
def split(iterable, where):
    def splitter(acc, item, where=where):
        if item == where:
            acc.append([])
        else:
            acc[-1].append(item)
        return acc
    return reduce(splitter, iterable, [[]])

#get timeline of a object (keyword)
def get_timeline(keyword):
    timeline = [0]
    cursor.execute("SELECT * FROM Caption WHERE Decription like ?", '%{}%'.format(keyword)) 
    rows = cursor.fetchall()
    for row in rows:
        start = re.sub("[^0-9]", "",row.Start)
        end = re.sub("[^0-9]", "",row.Stop)
        if (int(start) - timeline[-1]) >= 2000:
            timeline.append('break')
            timeline.append(int(start))
        else:
            if int(start) not in timeline:
                timeline.append(int(start))
        timeline.append(int(end))
    timeline = split(timeline, 'break')
    return timeline

In [19]:
time = get_timeline("dog")
print(time)

[[0], [6000, 6500, 7000, 8500, 10000, 11000, 12000, 12500, 14000, 14500, 16500, 17000, 18500, 19000, 19680, 20000]]


In [12]:
import shutil
from moviepy.video.io.ffmpeg_tools import ffmpeg_extract_subclip
from moviepy.editor import VideoFileClip, concatenate_videoclips

def process_video(path, timeline):
    #create temporary folder called data to save the temporary caption image
    #shutil.rmtree('C:/DeployImageCaptioning/video')
    folder = 'video/'
    for filename in os.listdir(folder):
        file_path = os.path.join(folder, filename)
        try:
            if os.path.isfile(file_path) or os.path.islink(file_path):
                os.unlink(file_path)
            elif os.path.isdir(file_path):
                shutil.rmtree(file_path)
        except Exception as e:
            print('Failed to delete %s. Reason: %s' % (file_path, e))
    try:
        #creating a folder named data
        if not os.path.exists('video'):
            os.mkdir('video')
    except OSError:
        print('Error: Existed folder!')
    video_name = get_video_name(path, '.mp4')
    clips = []
    for t in timeline:
        if len(t) > 1:
            starttime = t[0]/1000
            endtime = t[-1]/1000
            targetname="video/"+video_name+str(timeline.index(t)+1)+".mp4"
            ffmpeg_extract_subclip(path, starttime, endtime, targetname)
            clips.append(VideoFileClip(targetname))
            #clips.append([starttime, endtime])
    print(clips)
    #final_clip = concatenate_videoclips(clips)
    #final_clip.write_videofile("video/"+video_name+"final.mp4")
timeline = get_timeline("man")
process_video(input_video, timeline)

Failed to delete video/demo2.mp4. Reason: [WinError 32] The process cannot access the file because it is being used by another process: 'video/demo2.mp4'
Failed to delete video/demo3.mp4. Reason: [WinError 32] The process cannot access the file because it is being used by another process: 'video/demo3.mp4'
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
[<moviepy.video.io.VideoFileClip.VideoFileClip object at 0x0000018DBB21B7C8>, <moviepy.video.io.VideoFileClip.VideoFileClip object at 0x0000018DBF76FB48>, <moviepy.video.io.VideoFileClip.VideoFileClip object at 0x0000018DBF76C248>]
