In [1]:
from datetime import datetime, timedelta 
import pandas as pd
import datetime as dt

import os, glob, subprocess
from shutil import copyfile

In [2]:
def sanity_check_ffmpeg():

    try:
        check = subprocess.check_output("ffmpeg -version", shell=True)
    except:
        return 'Not installed.'
    return check[0:35]


In [19]:
sanity_check_ffmpeg()

b'ffmpeg version 2.7 Copyright (c) 20'

In [3]:
pip install ffmpeg


Note: you may need to restart the kernel to use updated packages.


In [3]:
class Extract_Prey_Frames:

    def __init__(self):
        pass
    
    def set_movie_name(self, movie_name):
        self.movie_name = movie_name
        
    def set_movie_number(self, movie_number):
        self.movie_number = movie_number
        
    def set_data_number(self, data_number):
        self.data_number = data_number
        
    def set_category_name(self, category_name):
        self.category_name = category_name
    
    def set_data_file(self, file_name):
        self.file_name = file_name
        
    def set_first_good_frame_time(self, time):
        self.first_frame_time = time
        
    def set_true_movie_start_time(self, time):
        self.movie_start_time = time
        
    def set_video_date(self, date):
        self.video_date = date
        
    def read_data_file(self, separator='\t'):
        self.df = pd.read_csv(self.file_name, sep=separator)
    
    def get_df_columns(self):
        return self.df.columns
    
    def apply_ffmpeg(self, fps):
        
        path = 'Video_Files/'+self.category_name+'/'+self.movie_name+'.avi'
        os.system("ffmpeg -i {0} -r {1} -f image2 {0}_image-%2d.png".format(path, fps))
            
    def create_directory_for_images(self):
        # Create the directory incase it doesn't exist
        try:
            os.makedirs('Images/'+self.category_name+'/'+self.movie_name)
        except:
            print('exists')

    def get_files(self, directory):
        files = []
        os.chdir(directory)
        for file in glob.glob("*.png"):
            files.append(file)

        os.chdir('../..')
        return files
    
    def get_dataframe_start_time(self, max_video_count=17):

        for i in range (2, max_video_count+1):
            video = self.df[self.df['Video']==i]
            print('Dataframe file', i,'movie:', i-1,'starts at :',video.iloc[0]['dateW'])
    
    def contains_feature(self, max_video_count=17, feature_column='Preycapture', feature_code=1):

        for i in range (1, max_video_count+1):
            video = self.df[self.df['Video']==i]
            print('Dataframe file', i,'contains:',len(video[video[feature_column] == feature_code]))
    
    def move_prey_frames(self, times):
    
    
        list_of_files = self.get_files('Images/'+self.category_name+'/'+self.movie_name)
        os.chdir('..')
        
        # Create the directory incase it doesn't exist
        try:
            os.makedirs('Extracted_frames'+'/'+str(self.category_name)+'/'+self.movie_name)
        except:
            print('exists')

        for timestamp in times:
            formatted_stamp=str(timestamp).replace(":","_")
            formatted_stamp=formatted_stamp.replace(" ","_")
            
            print (str(timestamp))
            # Get all the files that match the timestamp substring
            matching_files = [s for s in list_of_files if str(formatted_stamp) in s]

            for matching_file in matching_files:
                old_name = os.path.join('Images/'+self.category_name+'/'+self.movie_name, matching_file)
                new_file_name = self.category_name+'_'+str(self.data_number)+'_'+matching_file
                
                new_name = os.path.join('Extracted_frames'+'/'+str(self.category_name)+'/'+self.movie_name, new_file_name)
                print ('move ',  old_name , 'to', new_name)
                print(copyfile(old_name, new_name))
                
    
    def get_frames_from_data(self, feature_column, feature_code, df_time):
        
        # Find rows in the dataframe for which the video number match
        # Note that this is not the true movie number.
        video = self.df[self.df['Video']==self.data_number]

        # Get the rows that match the particular feature and value.
        # For example, Preycapture == 2 or Preycapture == 3
        prey_capture_df = video[video[feature_column] == feature_code]

        # If there are no rows there there are no frames to extract and
        # the function can terminate
        if len(prey_capture_df) == 0:
            print ('No prey captures found.')
            return None

        prey_capture_df['Date'] = pd.to_datetime(prey_capture_df['dateW'])
        prey_capture_df['Date'] = prey_capture_df['Date'].apply(lambda x: dt.datetime.strftime(x, '%d-%m-%Y_%H_%M_%S'))
        prey_capture_df = prey_capture_df.groupby('Date').first().reset_index().drop(['dateW','depth','aX','aY','aZ','Position'],axis=1)

        list_of_times = []
        df_time_start = datetime.strptime(df_time, '%d-%m-%Y_%H_%M_%S')
        for index, row in prey_capture_df.iterrows():
            print('Prey dataframe_time: ',datetime.strptime(row['Date'], '%d-%m-%Y_%H_%M_%S'))
            val = datetime.strptime(row['Date'], '%d-%m-%Y_%H_%M_%S') - df_time_start
            print('Relative change in time: ',datetime.strptime(row['Date'], '%d-%m-%Y_%H_%M_%S') - df_time_start)
            print('Correct time in movie file: ',datetime.strptime(self.movie_start_time, '%d-%m-%Y_%H_%M_%S') +  val)
            
            list_of_times.append(datetime.strptime(self.movie_start_time, '%d-%m-%Y_%H_%M_%S') +  val)
            print()

        return list_of_times
    
    def update_names_in_folder(self, start_index, frame_rate):
        '''
        This function iterates over each image and renames it based on the timestamp in the frame.
        The resulting file names will be in this format dd-mm-yyyy_hh-mm-ss_frame_number
        Depending on the frame rate there will be various frame numbers for a particular time stamp.
        `folder_name` is the folder with the original extracted frames
        `destination_folder_name` is the folder for which the re-named frames should be placed
        `start_index` is used to offset the renaming as it appears that the video is not completely synced in time.
        The idea here is to find the first frame in the video which corresponds to the next time stamp. E.g if the 
        first frame in the video is 10:04:22, then find the first frame for 10:04:23. If that frame was the 10th one
        then start_index = 10.
        `time` denotes the starting time in the first frame of the video eg: 10:02:09
        `date` denotes the date of the video eg: 26/12/2017
        '''

        files = self.get_files('Video_Files/'+self.category_name)
        time_stamp = datetime.strptime(self.video_date+str('-')+str(self.first_frame_time), '%d/%m/%Y-%H:%M:%S')
        current_time = time_stamp

        i = 0

        frame_counter = 0

        while i <= len(files)-frame_rate-start_index+1:
            current=str(current_time).replace(":","_")
            current=current.replace(" ", "_")

            for j in range (0, frame_rate):
                
                old_name = os.path.join('Video_Files/'+self.category_name, self.movie_name+'.avi_'+'image-{:02d}.png'.format(i+j+start_index))
                new_name = os.path.join('Images/'+self.category_name+'/'+self.movie_name, self.movie_name+'.avi_'+'image_{}_{}.png'.format(current,frame_counter))
                os.rename(old_name,new_name) 
                frame_counter = frame_counter + 1

            i += frame_rate

            current_time = current_time + timedelta(seconds = 1)
    
    

Initialise the class

In [74]:
extract = Extract_Prey_Frames()

Set the name of the spreadsheet

In [75]:
extract.set_data_file('GI-LP0242-(1)-aligned-corrected.txt')
extract.read_data_file()

View the columns in the spreadsheet

In [76]:
extract.get_df_columns()

Index(['dateW', 'aX', 'aY', 'aZ', 'latitude', 'longitude', 'depth', 'Position',
       'Preytype', 'Preyabundance', 'Preycapture', 'Cons.het',
       'Cons_hetabundance', 'Video', 'Comments'],
      dtype='object')

The `contains_feature` column let's you see how many rows of data are found based on the column and value. In this case the 'preycapture' column is used along with a value of 3 which corresponds to 'Handling (H)'. Different values can be explored. This is useful as it gives us information as to which video file we should process. Files which have no row data means that there might be no useful frame in that particular video.

In [73]:
extract.contains_feature(feature_column='Preytype',feature_code=5)

Dataframe file 1 contains: 0
Dataframe file 2 contains: 47
Dataframe file 3 contains: 0
Dataframe file 4 contains: 0
Dataframe file 5 contains: 0
Dataframe file 6 contains: 0
Dataframe file 7 contains: 0
Dataframe file 8 contains: 0
Dataframe file 9 contains: 0
Dataframe file 10 contains: 0
Dataframe file 11 contains: 0
Dataframe file 12 contains: 494
Dataframe file 13 contains: 687
Dataframe file 14 contains: 0
Dataframe file 15 contains: 0
Dataframe file 16 contains: 74
Dataframe file 17 contains: 0


Check the number of frames in each video which have `Preycapture` with a value of 2.

A value of zero means that for that particular video/data time there were not frames matching the value of 2. A value of 2 denotes: "Strike left (L)"] <- "2" "Strike forward (F)"] <- "2" "Strike right (R)"]. This information was obtained from the file: Little penguin cam alignment read me.txt

In [57]:
extract.contains_feature(feature_column='Preycapture',feature_code=5)

Dataframe file 1 contains: 0
Dataframe file 2 contains: 0
Dataframe file 3 contains: 0
Dataframe file 4 contains: 0
Dataframe file 5 contains: 0
Dataframe file 6 contains: 0
Dataframe file 7 contains: 0
Dataframe file 8 contains: 0
Dataframe file 9 contains: 0
Dataframe file 10 contains: 0
Dataframe file 11 contains: 0
Dataframe file 12 contains: 0
Dataframe file 13 contains: 0
Dataframe file 14 contains: 0
Dataframe file 15 contains: 0
Dataframe file 16 contains: 0
Dataframe file 17 contains: 0


Get the start time (from the spreadsheet) of each video.

Note that this is not the start time in the movie file.

The start time in the movie file has to be manually obtained.

It seems that in general, the movie number is one less than the dataframe video number. The mapping of videos to movie number is found in the file: LP.movie.and.corresponding.video.order(1).xlsx

In [77]:
extract.get_dataframe_start_time()

Dataframe file 2 movie: 1 starts at : 2016-10-20 06:06:15.96
Dataframe file 3 movie: 2 starts at : 2016-10-20 06:26:16.96
Dataframe file 4 movie: 3 starts at : 2016-10-20 07:06:49.96
Dataframe file 5 movie: 4 starts at : 2016-10-20 07:26:50.96
Dataframe file 6 movie: 5 starts at : 2016-10-20 08:07:26.96
Dataframe file 7 movie: 6 starts at : 2016-10-20 08:27:27.96
Dataframe file 8 movie: 7 starts at : 2016-10-20 09:07:57.96
Dataframe file 9 movie: 8 starts at : 2016-10-20 09:27:58.96
Dataframe file 10 movie: 9 starts at : 2016-10-20 10:08:26.96
Dataframe file 11 movie: 10 starts at : 2016-10-20 10:28:27.96
Dataframe file 12 movie: 11 starts at : 2016-10-20 11:08:49.96
Dataframe file 13 movie: 12 starts at : 2016-10-20 11:28:50.96
Dataframe file 14 movie: 13 starts at : 2016-10-20 12:09:33.96
Dataframe file 15 movie: 14 starts at : 2016-10-20 12:29:34.96
Dataframe file 16 movie: 15 starts at : 2016-10-20 13:10:14.96
Dataframe file 17 movie: 16 starts at : 2016-10-20 13:30:15.96


Set the name of the movie file. Do not add the extension.

In [78]:
extract.set_movie_name('MOVI0011')

Set the movie number.

This is the true movie file number.

In [79]:
extract.set_movie_number(11)

Set the video number.

This is not the true movie number, but the number in the spreadsheet.

In [80]:
extract.set_data_number(12)

Set the name of the category.

In [81]:
extract.set_category_name('GI-LP0242')

Set the true start time from the very first frame in the movie file. Open the movie and pause it immediately, you'll see the timestamp for the very first frame.

The format should be as follows: `dd-mm-yyyy_HH_mm_ss`

In [82]:
extract.set_true_movie_start_time('20-11-2016_10_41_07')

Set the date from the video. This is the date displayed on the video frames

In [83]:
extract.set_video_date('20/11/2016')

This will create a folder called "Images" where the final images will be stored in.

In [84]:
extract.create_directory_for_images()

Extract the frames from the video. This operation makes use of ffmpeg (https://www.ffmpeg.org/)

Here you must specify the frame rate. 15 seems to be a good value. A value of 30 will result in a lot more frames which could be useful for ML; but this will also result in more disk space being used to store all the images.

In [85]:
extract.apply_ffmpeg(30)

At this point you'll have many image files in the Image folder. Open the first one which gives you the first timestamp.

The framerate doesn't seem to be constant throughout the video, and the first timestamp seems to last longer than the others. For this reason, we set the second timestamp to be the first "good" one. To do this, go to the next image until the timestamp changes. Set the value below to the second timestamp. Format: HH:MM:SS

As a result of this, the frames prior to this "second" timestamp will be ignored. You can manually delete those files later on.

In [86]:
extract.set_first_good_frame_time('10:41:08') 

The next function will rename each file with the corresponding timestamp name. Of course given that the frame rate is not consistent it is possible that there is a slight lag and the file name does not match the correct timestamp in the image.

The resulting file names will have this format: `movie_name.avi_image_date time_index.png` for example: MOVI0015.avi_image_2016-10-18 13:01:56_0.png the 'index' is just a counter which starts from 0. This was done because for a given time there may be several images if a high frame rate was used with ffmpeg. 

Here you must specify the frame rate you used above.

You must also also specify the start_index. This value is the index number of the "second" timestamp discussed above. For example, if the video starts at 13:30:01 (note that the very first frame will be called ...13:30:01\_0.png) then after a few frames the timestamp changes to 13:30:02. This timestamp is the "second" timestamp. This file will have an index number, for example ...13:30:02\10.png (11th file, we start from zero). In this case, start_index=10

In [87]:
extract.update_names_in_folder(start_index=14, frame_rate=30)


Specify the feature and the value to use.

Specify the dataframe start time for the particular movie file. This can be obtained using the function `get_dataframe_start_time` which was done earlier in this notebook. This is not the start time of the movie but rather the start time in the spreadsheet. In this example the preycapture feature is used with a value of 3. It appears that preycapture is better than prey_type.

In [92]:
times = extract.get_frames_from_data(feature_column = 'Preytype', 
                                     feature_code = 3, 
                                     df_time= '20-11-2016_11_08_49')

No prey captures found.


Finally, this function extracts the corresponding frames and places them in the Image folder.

In [93]:
extract.move_prey_frames(times)

exists


TypeError: 'NoneType' object is not iterable