# File ordering algorithm

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import os
import pyidi
import pandas as pd
import re
import warnings
import json as js

  np.bool8: (False, True),


In [2]:
def generate_figure(file_path, sequential_image_n=0, bins=100, subset_size=10000, save=False):
    if 'fig' not in globals():
        fig, (ax1, ax2) = plt.subplots(nrows=1, ncols=2, figsize=(14, 5))
        wd, = ax1.plot([], [], c='b', marker='.', linestyle='None')
        bd, = ax1.plot([], [], c='r', marker='.', linestyle='None')
    video = pyidi.pyIDI(file_path, sequential_image_n=0)
    mraw = video.mraw
    still_image = mraw[sequential_image_n]
    white_indices = np.where(still_image >= 2**16-6)
    black_indices = np.where(still_image <= 5)
    wd.set_data(white_indices[1], white_indices[0])
    bd.set_data(black_indices[1], black_indices[0])
    ax1.imshow(still_image, cmap='gray')
    ax1.set_title(file_path)
    p_int_subset = np.random.choice(still_image[51:512-51,102:1024-102].flatten(), size=subset_size, replace=False)
    ax2.clear()
    ax2.hist(p_int_subset, bins=bins)
    fig.canvas.draw()
    if save:
        fig.savefig("lighting/" + file_path[:-5]+"_saturation" + '.png')
    return fig
    #

def find_file_description(file, file_comments, new_design = False, taut = False, mass = True, rev = 0, version = 0, pluck_or_impact = 'impact', web = 0):
    if bool(re.findall(r'\d+', file[0])):
        file_description = file[16:-4].split('_')
    else:
        file_description = file[:-4].split('_')
    for prop in file_description:
        if "ecc" in prop:
            ecc = prop[-1]
        elif "Floc" in prop:
            Floc = int(re.findall(r'\d+', prop)[0])
        elif "v" in prop and "rev" not in prop:
            version = prop[-1]
        elif "rev" in prop:
            rev = prop[-1]
        elif "new" in prop:
            new_design = True
            if prop[-1] == "w" or prop[-1] == "0":
                web = 0
            elif prop[-1] == "2" or prop[-1] == "1":
                web = 1
    if 'taut' in file:
        taut = True
    if 'pluck' in file:
        pluck_or_impact = 'pluck'
    elif 'mass' in file:
        mass = False
    if file in file_comments:
        file_comment = file_comments[file]
    else:
        file_comment = None
    return (ecc, Floc, web, rev, version, new_design, pluck_or_impact, mass, taut, file_comment)
    

In [3]:
with open('C:/Users/thijs/Documents/GitHub/LDAQ/examples/file_comments.json', 'r') as f:
    file_comments = js.load(f)

folder_paths_video_local = ['D:/HSC', 'F:/', 'C:/Users/thijs/Documents/HSC/']
folder_path_video_drive = 'H:/My Drive/PHD/HSC'
folder_path_sensors = 'C:/Users/thijs/Documents/GitHub/LDAQ/examples/Data'
dict_list = []
unlinked_files = []
for file in os.listdir(folder_path_sensors):
    sensor_file_path = os.path.join(os.path.normpath(folder_path_sensors), file)
    file_path_local = None
    file_path_drive = None
    time_stamp_video_local = None
    time_stamp_video_drive = None
    time_stamp_sensors = os.path.getmtime(sensor_file_path)
    if not file.endswith('.pkl'):
        continue
    if bool(re.findall(r'\d+', file[0])):
        file_name_video = file[16:-4] + "_S01.mraw"
    else:
        file_name_video = file[:-4] + "_S01.mraw"            
    # Search for the file in the folders and their subdirectories
    for folder_path_local in folder_paths_video_local:
        folder_path_local = os.path.normpath(folder_path_local)
        for root, dirs, files in os.walk(folder_path_local, topdown=False):
            if "$" in root:
                continue
            if file_name_video in files:
                file_path_local = os.path.join(root, file_name_video)
                time_stamp_video_local = os.path.getmtime(file_path_local)
                break
    for root, dirs, files in os.walk(os.path.normpath(folder_path_video_drive), topdown=False):
        if file_name_video in files:
            file_path_drive = os.path.join(os.path.normpath(root), file_name_video)
            time_stamp_video_drive = os.path.getmtime(file_path_drive)
            break
    if not file_path_local and not file_path_drive:
        if 'load' in file_name_video or 'uned' in file_name_video or 'ensor' in file_name_video or 'brat' in file_name_video:
            continue
        warnings.warn(file_name_video + ': File not found')
        unlinked_files.append(file_name_video)
        file_path_local = 'Video data can not be found'
        file_path_drive = 'Video data can not be found'
        # continue
    (ecc, Floc, web, rev, version, new_design, pluck_or_impact, mass, taut, file_comment) = find_file_description(file, file_comments)

    try:
        current_dict = {'file name video': file_name_video[:-5],
                        'ecc': ecc,
                        'with mass': mass,
                        'bottom radial taut': taut,
                        'pluck or impact': pluck_or_impact,
                        'Floc': Floc, 
                        'web': web, 
                        'rev': rev, 
                        'version': version,
                        'new design': new_design, 
                        'file path video local': file_path_local,
                        'date modified video local': time_stamp_video_local ,
                        'file path video google drive': file_path_drive,
                        'date modified video google drive': time_stamp_video_drive,
                        'file path sensors': sensor_file_path,
                        'date modified sensors': time_stamp_sensors,
                        'file comment': file_comment}
    except:
        warnings.warn(file_name_video + ': File was found, but could not be ordered properly')
        continue
    dict_list.append(current_dict)
    del ecc, Floc, web, file_path_local, file_path_drive, sensor_file_path, file_name_video
df = pd.DataFrame(dict_list)
df.to_excel('file_ordering.xlsx')
with open('unlinked_files.txt', 'w') as f:
    for item in unlinked_files:
        f.write("%s\n" % item)



In [4]:
unlinked_video_paths = []
unlinked_videos = []
dict_list_copy = dict_list.copy()
for local_path in folder_paths_video_local + [folder_path_video_drive]:
    for root, dirs, files in os.walk(local_path):
        for file in files:
            time_stamp_video_local = None
            time_stamp_video_drive = None
            time_stamp_sensors = None
            if not file.endswith('.mraw'):
                continue
            if df['file name video'].isin([file[:-5]]).any():
                continue
            else:
                if file[:-5] in unlinked_videos:
                    continue
                elif 'ingle' in file or 'Full_web_' not in file:
                    continue
                (ecc, Floc, web, rev, version, new_design, pluck_or_impact, mass, tau, file_comment) = find_file_description(file, file_comments)
                if root == folder_path_video_drive:
                    file_path_drive = os.path.join(root, file)
                    time_stamp_video_drive = os.path.getmtime(file_path_drive)
                    file_path_local = None
                else:
                    file_path_local = os.path.join(root, file)
                    time_stamp_video_local = os.path.getmtime(file_path_local)
                    file_path_drive = None
                current_dict = {'file name video': file[:-5],
                                'ecc': ecc,
                                'with mass': mass,
                                'bottom radial taut': taut,
                                'pluck or impact': pluck_or_impact,
                                'Floc': Floc, 
                                'web': web, 
                                'rev': rev, 
                                'version': version,
                                'new design': new_design, 
                                'file path video local': file_path_local,
                                'date modified video local': time_stamp_video_local ,
                                'file path video google drive': file_path_drive,
                                'date modified video google drive': time_stamp_video_drive,
                                'file path sensors': 'The sensor data was not saved!',
                                'date modified sensors': time_stamp_sensors,
                                'file comment': file_comment}
                dict_list_copy.append(current_dict)
                unlinked_videos.append(file[:-5])
                unlinked_video_paths.append(os.path.join(os.path.normpath(root), file))
                print(os.path.join(os.path.normpath(root), file))
df = pd.DataFrame(dict_list_copy)
df.to_excel('file_ordering_missing_info.xlsx')

with open('unlinked_video.txt', 'w') as f:
    for file, path in zip(unlinked_videos, unlinked_video_paths):
        f.write("%s, %s\n" % (file, path))
        # f.write("%s\n" % item)
# with open('unlinked_video.txt', 'w') as f:
#     for item in unlinked_videos:
#         f.write("%s\n" % item)

I:\ecc1\Full_web_ecc1_new2_rev3_Floc4_v0_S01\Full_web_ecc1_new2_rev3_Floc4_v0_S01.mraw
I:\ecc2\Full_web_ecc2_new2_Floc15_taut_impact_v0_S02\Full_web_ecc2_new2_Floc15_taut_impact_v0_S02.mraw
D:\HSC\ecc2\web0\rev1\Full_web_ecc2_new_Floc5_v0_S02\Full_web_ecc2_new_Floc5_v0_S02.mraw
D:\HSC\ecc2\web0\rev1\Full_web_ecc2_new_Floc5_v1_S01\Full_web_ecc2_new_Floc5_v1_S01.mraw
D:\HSC\ecc2\web0\rev1\Full_web_ecc2_new_Floc5_v1_S02\Full_web_ecc2_new_Floc5_v1_S02.mraw
F:\temp\Full_web_ecc1_new_Floc1_v3_S02\Full_web_ecc1_new_Floc1_v3_S02.mraw
F:\temp\Full_web_ecc1_new_Floc8_v1_S01\Full_web_ecc1_new_Floc8_v1_S01.mraw
H:\My Drive\PHD\HSC\ecc1\web0\rev1\Full_web_ecc1_new_Floc7_v1_S01\Full_web_ecc1_new_Floc7_v1_S01.mraw


In [5]:
# loop all the files in E:\thijs\. if a file is of type .mraw, check if the file is also saved somewhere on D:, F: or I:.
duplicate_files = []
duplicate_file_paths = []
duplicate_file_paths_E = []
files_only_E = []
for root_E, dirs_E, files_E in os.walk('E:/thijs/', topdown=False):
    root_E = os.path.normpath(root_E)
    for file in files_E:
        if not file.endswith('.mraw'):
            continue
        for folder_path in folder_paths_video_local[:3]:
            folder_path = os.path.normpath(folder_path)
            for root, dirs, files in os.walk(folder_path, topdown=False):
                root = os.path.normpath(root)
                if "$" in root:
                    continue
                if file in files:
                    duplicate_files.append(file)
                    duplicate_file_paths.append(os.path.join(root, file))
                    duplicate_file_paths_E.append(os.path.join(root_E, file))
                    break
        files_only_E.append(os.path.join(root_E, file))
with open('duplicate_local_files.txt', 'w') as f:
    for file, path, path_E in zip(duplicate_files, duplicate_file_paths, duplicate_file_paths_E):
        f.write("%s, %s, %s\n" % (file, path, path_E))
    f.write("\n\n\n")
    for file in files_only_E:
        f.write("%s\n" % file)

In [6]:
#  loop over duplicate_file_paths and delete the folder in which the file is located
# import shutil
# for path in duplicate_file_paths_E:
#     shutil.rmtree(os.path.dirname(path), ignore_errors=True)