In [1]:
import os
import pandas as pd
import pims
import imageio as io
from tqdm import tqdm
from joblib import Parallel, delayed

In [2]:
datafolder = r"C:\Users\serce\Desktop\string_pullingxDLC_sonia_paxiao"
labeled_data = os.path.join(datafolder, 'labeled-data')
if not os.path.exists(labeled_data):
    os.mkdir(labeled_data)

avis = [os.path.join(datafolder, avi) for avi in os.listdir(datafolder) if avi.endswith('.avi')]

# for avi in avis.copy():
for avi in avis:
    folder_name = os.path.split(avi)[-1][:-4]
    csv = folder_name+'.csv'
    if csv not in os.listdir(datafolder):
        print(csv, 'not in', datafolder)
    else:
        columns = pd.MultiIndex.from_product([['sonia_paxiao'], ['paw_f_right', 'paw_f_left'], ['x','y']], names=['scorer', 'bodyparts', 'coords'])
        df = pd.DataFrame(columns=columns)
        data = pd.read_csv(os.path.join(datafolder, csv))
        if not (data.columns == ['X_RightPaw', 'Y_RightPaw', 'X_LeftPaw', 'Y_LeftPaw']).mean() == 1:
            print('Check', csv, 'column names!!!')
        else:
            for i, col in enumerate(df.columns):
                df.loc[:, col] = data.iloc[:, i].values
            df = df.dropna(how='all')
            videofolder = os.path.join(labeled_data, folder_name)
            if not os.path.exists(videofolder):
                os.mkdir(videofolder)
            df.to_csv(os.path.join(videofolder, 'CollectedData_sonia_paxiao.csv'))

In [3]:
def generate_video(labeled_data_subfolder, videos_folder, csvname, inputvideotype, outputvideotype='.mp4'):

    video = pims.Video(os.path.join(videos_folder, os.path.split(labeled_data_subfolder)[-1]+inputvideotype))
    fps = video.frame_rate
    df = pd.read_csv(os.path.join(labeled_data_subfolder, csvname), index_col=0, header=[0,1,2])
    frames = []
    count = 0
    for i in tqdm(df.index):
        frames.append(video[int(i)])
        df.loc[i, 'new_index'] = os.path.join("labeled-data", os.path.split(labeled_data_subfolder)[-1],
                                              'img'+str(count).zfill(6)+'.png')
        io.imwrite(uri=os.path.join(labeled_data_subfolder, 'img'+str(count).zfill(6)+'.png'), im=video[i])
        count += 1
    vidname = os.path.join(labeled_data_subfolder, os.path.split(labeled_data_subfolder)[-1]+outputvideotype)
    print('Writing the video and the csv file...')
    print(vidname)
    kwargs = {'macro_block_size': None}
    io.mimwrite(uri=vidname, ims=frames, fps=fps, **kwargs)
    print('Video is written')
    df.index = df['new_index'].values
    df = df.drop(columns='new_index')
    df.to_csv(os.path.join(labeled_data_subfolder, csvname))
    print(os.path.join(labeled_data_subfolder, csvname))
    print('Csv is written')

In [4]:
def generate_videos(labeled_data_folder, videos_folder, csvname, inputvideotype, outputvideotype='.mp4', n_jobs=16):

    Parallel(n_jobs=n_jobs, verbose=100)(delayed(
        generate_video)(os.path.join(labeled_data_folder, sub),
                                     videos_folder, csvname,
                                     inputvideotype, outputvideotype)
                        for sub in os.listdir(
        labeled_data_folder) if os.path.isdir(os.path.join(labeled_data_folder, sub)))

In [5]:
labeled_data_folder = labeled_data
videos_folder = r"C:\Users\serce\Desktop\string_pullingxDLC_sonia_paxiao"
csvname = 'CollectedData_sonia_paxiao.csv'
inputvideotype = ".avi"

generate_videos(labeled_data_folder, videos_folder, csvname, inputvideotype, outputvideotype='.mp4', n_jobs=32)
print('All done!')

[Parallel(n_jobs=32)]: Using backend LokyBackend with 32 concurrent workers.
[Parallel(n_jobs=32)]: Done   1 tasks      | elapsed:  1.0min
[Parallel(n_jobs=32)]: Done   2 out of  43 | elapsed:  2.6min remaining: 52.4min
[Parallel(n_jobs=32)]: Done   3 out of  43 | elapsed:  2.8min remaining: 36.9min
[Parallel(n_jobs=32)]: Done   4 out of  43 | elapsed:  3.3min remaining: 32.3min
[Parallel(n_jobs=32)]: Done   5 out of  43 | elapsed:  4.3min remaining: 32.3min
[Parallel(n_jobs=32)]: Done   6 out of  43 | elapsed:  4.4min remaining: 27.3min
[Parallel(n_jobs=32)]: Done   7 out of  43 | elapsed:  4.8min remaining: 24.5min
[Parallel(n_jobs=32)]: Done   8 out of  43 | elapsed:  4.8min remaining: 20.9min
[Parallel(n_jobs=32)]: Done   9 out of  43 | elapsed:  4.8min remaining: 18.1min
[Parallel(n_jobs=32)]: Done  10 out of  43 | elapsed:  4.9min remaining: 16.0min
[Parallel(n_jobs=32)]: Done  11 out of  43 | elapsed:  5.1min remaining: 14.8min
[Parallel(n_jobs=32)]: Done  12 out of  43 | elapse