# DLC data management

### Add new videos

In [None]:
add_new_videos(r'CONFIG_FILE_PATH', [r'NEW_VIDEO_PATHS'], copy_videos = True)

### Update the project path and video directories

In [None]:
update_paths(r'PROJECT_PATH')

### Merge the source project to the target project

In [None]:
merge_projects(r'CONFIG_FILE_PATH_OF_THE_TARGET_PROJECT',
               r'CONFIG_FILE_PATH_OF_THE_SOURCE_PROJECT')

In [None]:
merge_projects2(r'TARGET_PROJECT_PATH',
                r'SOURCE_PROJECT_PATH')

### Copy the project

In [None]:
copy_project(r'SOURCE_PROJECT_PATH',
             r'DESTINATION_PATH')

In [None]:
copy_project2(r'SOURCE_PROJECT_PATH',
              r'DESTINATION_DIRECTORY')

### Create the DLC Data index

In [None]:
create_dlc_data_ndex(r'DLC_DATA_FOLDER_PATH')

### Copy DLC Data

In [None]:
copy_dlc_data(r'SOURCE_DLC_DATA_FOLDER_PATH',
              r'DESTINATION_PATH')

In [None]:
copy_dlc_data2(r'SOURCE_DLC_DATA_FOLDER_PATH',
               r'DESTINATION_DIRECTORY')

## *Run all below before calling them*

### Add new videos

In [None]:
from deeplabcut.create_project import add_new_videos

### Update the project path and video directories

In [None]:
from deeplabcut.utils import auxiliaryfunctions
from pathlib import Path

In [None]:
def update_paths(prj_path):
    cnf_path = Path(prj_path)/'config.yaml'
    vid_dir = Path(prj_path)/'videos'
    
    
    cnf = auxiliaryfunctions.read_config(cnf_path)
    cnf['project_path'] = str(prj_path)

    old_vid_sets, new_vid_sets = cnf['video_sets'], {}
    videos = old_vid_sets.keys()
    for i in videos:
        new_vid_sets[str(vid_dir/Path(i).name)] = old_vid_sets[i]
    cnf['video_sets'] = new_vid_sets

    auxiliaryfunctions.write_config(cnf_path, cnf)

### Merge the source project to the target project

In [None]:
from deeplabcut.utils import auxiliaryfunctions
from pathlib import Path
import os
import pandas as pd
import shutil
import numpy as np

In [None]:
def merge_projects(trg_cnf_path, src_cnf_path):
    trg_cnf = auxiliaryfunctions.read_config(trg_cnf_path)
    trg_scr = trg_cnf['scorer']
    trg_prj_path = trg_cnf['project_path']
    trg_videos = trg_cnf['video_sets'].keys()
    trg_vid_stems = [Path(_).stem for _ in trg_videos]

    src_cnf = auxiliaryfunctions.read_config(src_cnf_path)
    src_scr = src_cnf['scorer']
    src_prj_path = src_cnf['project_path']
    src_videos = src_cnf['video_sets'].keys()
    src_vid_stems = [Path(_).stem for _ in src_videos]
    
    # Intersection of 
    # Stems of videos in the target project and 
    # Stems of videos in the source project, excluding those without CollectedData_scorer.csv 
    ints = list(set(trg_vid_stems) &
                set(_ for _ in src_vid_stems 
                    if os.path.exists(Path(src_prj_path)/'labeled-data'/Path(_)/('CollectedData_'+src_scr+'.csv'))))
    for i in ints:
        trg_dat_dir = Path(trg_prj_path)/'labeled-data'/Path(i)
        src_dat_dir = Path(src_prj_path)/'labeled-data'/Path(i)
    
        if not os.path.exists(trg_dat_dir/('CollectedData_'+trg_scr+'.csv')):
            trg_bod_parts = trg_cnf['bodyparts']
            new_coll_dat = [['scorer'] + [trg_scr] * 2 * len(trg_bod_parts),
                            ['bodyparts'] + [_i for _i in trg_bod_parts for _j in range(2)],
                            ['coords'] + ['x', 'y'] * len(trg_bod_parts)]
            pd.DataFrame(new_coll_dat).to_csv(trg_dat_dir/('CollectedData_'+trg_scr+'.csv'), header=False, index=False)
            
        trg_coll_dat = pd.read_csv(trg_dat_dir/('CollectedData_'+trg_scr+'.csv'), header=None, index_col=0)
        src_coll_dat = pd.read_csv(src_dat_dir/('CollectedData_'+src_scr+'.csv'), header=None, index_col=0, skiprows=3)
    
        mrg = pd.concat([trg_coll_dat, src_coll_dat])
        # Regarding duplicates, keep those form target collected data
        mrg = mrg[~mrg.index.duplicated()]
        mrg.to_csv(trg_dat_dir/('CollectedData_'+trg_scr+'.csv'), header=False)
    
        for j in mrg[3:].index:
            shutil.copy2(Path(src_prj_path)/Path(j), trg_dat_dir)
    
        # Converse .csv to .h5; adapted from utils/conversioncode.py
        data=pd.read_csv(trg_dat_dir/('CollectedData_'+trg_scr+'.csv'))
    
        #nlines,numcolumns=data.shape
    
        orderofbpincsv=list(data.values[0,1:-1:2])
        imageindex=list(data.values[2:,0])       
    
        #assert(len(orderofbpincsv)==len(cfg['bodyparts']))
        print(orderofbpincsv)
        print(trg_cnf['bodyparts'])
    
        #TODO: test len of images vs. len of imagenames for another sanity check
    
        index = pd.MultiIndex.from_product([[trg_scr], orderofbpincsv, ['x', 'y']],names=['scorer', 'bodyparts', 'coords'])
        frame = pd.DataFrame(np.array(data.values[2:,1:],dtype=float), columns = index, index = imageindex)
    
        frame.to_hdf(trg_dat_dir/('CollectedData_'+trg_scr+'.h5'), key='df_with_missing', mode='w')

In [None]:
def merge_projects2(trg_prj_path, src_prj_path):
    update_paths(trg_prj_path)
    update_paths(src_prj_path)
    merge_project(Path(trg_prj_path)/'config.yaml',
                  Path(src_prj_path)/'config.yaml')

### Copy the project

In [None]:
import os
from pathlib import Path
from deeplabcut.utils import auxiliaryfunctions
import pandas as pd
import shutil

In [None]:
def copy_project(src, dst):
    if not os.path.exists(dst):
        os.makedirs(dst)
        
        
    src_cnf_path = Path(src)/'config.yaml'
    dst_cnf_path = Path(dst)/'config.yaml'

    cnf = auxiliaryfunctions.read_config(src_cnf_path)

    scr = cnf['scorer']
    videos = cnf['video_sets'].keys()
    vid_stems = [Path(_).stem for _ in videos]
    
    
    # Copy labelled data
    print('    Labelled data')
    if not os.path.exists(Path(dst)/'labeled-data'):
        os.makedirs(Path(dst)/'labeled-data')

    for i in vid_stems:
        src_dat_dir = Path(src)/'labeled-data'/Path(i)
        dst_dat_dir = Path(dst)/'labeled-data'/Path(i)
        if not os.path.exists(dst_dat_dir):
            os.makedirs(dst_dat_dir)
        
        if os.path.exists(src_dat_dir/('CollectedData_'+scr+'.csv')):
            coll_dat = pd.read_csv(src_dat_dir/('CollectedData_'+scr+'.csv'), header=None, index_col=0, skiprows=3)
    
            for j in coll_dat.index:
                shutil.copy2(Path(src)/Path(j), dst_dat_dir)

            shutil.copy2(src_dat_dir/('CollectedData_'+scr+'.csv'), dst_dat_dir)
            shutil.copy2(src_dat_dir/('CollectedData_'+scr+'.h5'), dst_dat_dir)
        
    
    # Copy videos
    print('    Videos')
    if not os.path.exists(Path(dst)/'videos'):
        os.makedirs(Path(dst)/'videos') 
    
    for i in videos:
        shutil.copy2(i, Path(dst)/'videos')
        
    
    # Write and update config.yaml
    print('    config.yaml')
    cnf['iteration'] = 0
    auxiliaryfunctions.write_config(dst_cnf_path, cnf)
    
    update_paths(dst)
    
    # Create dlc-models and training-datasets folders
    if not os.path.exists(Path(dst)/'dlc-models'):
        os.makedirs(Path(dst)/'dlc-models')
    if not os.path.exists(Path(dst)/'training-datasets'):
        os.makedirs(Path(dst)/'training-datasets')

In [None]:
def copy_project2(src, dst_dir):
    copy_project(src,
                Path(dst_dir)/Path(src).name)

### Create the DLC Data index

In [None]:
from pathlib import Path
from deeplabcut.utils import auxiliaryfunctions
import os
from operator import itemgetter
import pandas as pd

In [None]:
def create_dlc_data_ndex(dlc_dat):
    # vid: Video
    # prj: Project
    # labd_frm_nmb: Number of Labelled Frames
    # h5: .h5
    # labd_vid: Labelled Video
    ind = []

    prj_paths = [_ for _ in Path(dlc_dat).iterdir() if _.is_dir()]
    for i in prj_paths:
        update_paths(i)
    
        cnf_path = i/'config.yaml'
        cnf = auxiliaryfunctions.read_config(cnf_path)
        scr = cnf['scorer']
        vid_sets = cnf['video_sets']
    
        for j in vid_sets:
            vid_stem = Path(j).stem
        
            # Number of Labelled Frames
            coll_dat_path = i/'labeled-data'/vid_stem/('CollectedData_'+scr+'.csv')
            if not os.path.exists(coll_dat_path):
                labd_frm_nmb = 'Not labelled'
            else:
                coll_dat = pd.read_csv(coll_dat_path, header=None, skiprows=3)
                labd_frm_nmb = coll_dat.shape[0]
        
            # .h5
            h5_paths = sorted((i/'videos').glob(vid_stem+'*.h5'))
            if not h5_paths:
                h5 = 'Not analysed'
            else:
                h5_t_names = []
                for k in h5_paths:
                    # Iteration: Best trained comes first
                    it = k.stem.split('_')[-1]
                    h5_t_names.append({'it': it, 'name': k.name})
                h5_t_names = sorted(h5_t_names, key=itemgetter('it'), reverse=True)
                h5 = h5_t_names[0]['name']
                
            # Labelled Video
            if h5 == 'Not analysed':
                labd_vid = 'Not analysed'
            elif not os.path.exists(i/'videos'/(h5.split('.h5')[0]+'_labeled.mp4')):
                labd_vid = 'Not created'
            else:
                labd_vid = h5.split('.h5')[0]+'_labeled.mp4'
        
            ind.append({'vid': Path(j).name, 'prj': Path(i).name, 'labd_frm_nmb': labd_frm_nmb, 'h5': h5, 'labd_vid': labd_vid})

    ind = pd.DataFrame(ind)
    ind = ind.rename(columns={'vid': 'Video',
                              'prj': 'Project',
                              'labd_frm_nmb': 'Number of Labelled Frames',
                              'h5': '.h5',
                              'labd_vid': 'Labelled Video'})
    ind.to_csv(Path(dlc_dat)/'index.csv', columns=['Video',
                                                   'Project',
                                                   'Number of Labelled Frames',
                                                   '.h5',
                                                   'Labelled Video'], index=False)

### Copy DLC Data

In [None]:
import os
import pandas as pd
from pathlib import Path
import shutil

In [None]:
def copy_dlc_data(src, dst):
    if not os.path.exists(dst):
        os.makedirs(dst)
        
        
    create_dlc_data_ndex(src)
    
    
    ind = pd.read_csv(Path(src)/'index.csv', index_col=1)

    prjs = ind.index.drop_duplicates()
    for i in prjs:
        print('Copying Project: ' + i)
        # Copy project
        copy_project2(Path(src)/i, dst)
    
        # Copy .h5 and labelled videos
        print('    .h5 and labelled videos')
        ind_curr_prjs = ind.loc[i]
    
        # When the project contains only one video
        if type(ind_curr_prjs['.h5']) is str:
            h5 = ([ind_curr_prjs['.h5']] 
                  if ind_curr_prjs['.h5'] != 'Not analysed'
                  else [])
        else:
            h5 = [_ for _ in ind_curr_prjs['.h5'] if _ != 'Not analysed']
    
        if type(ind_curr_prjs['Labelled Video']) is str:
            labd_vid = ([ind_curr_prjs['Labelled Video']] 
                        if ind_curr_prjs['Labelled Video'] != 'Not analysed' and ind_curr_prjs['Labelled Video'] != 'Not created'
                        else [])
        else:
            labd_vid = [_ for _ in ind_curr_prjs['Labelled Video'] if _ != 'Not analysed' and _ != 'Not created']
    
        for j in h5 + labd_vid:
            shutil.copy2(Path(src)/i/'videos'/j, Path(dst)/i/'videos')
        
        # Copy the example video
        print('    Example video')
        ex_vid = sorted(Path(src).glob(i+'.*'))
        shutil.copy2(ex_vid[0], dst)
    
    print('Copying index.csv')
    shutil.copy2(Path(src)/'index.csv', dst)        

In [None]:
def copy_dlc_data2(src, dst_dir):
    copy_dlc_data(src,
                  Path(dst_dir)/Path(src).name)