### Imports

In [1]:
import os
import numpy as np
import pandas as pd
import tqdm
from copy import deepcopy
from leaf_measurements import find_lamina_width_and_leaf_length, dist_two_points
import re
import shutil\

### Functions

In [2]:
def get_bbox(bbox,original=True):
    if original:
        a,b,c,d = bbox[1:]
        bbox = [(a,b),(c,b),(c,d),(a,d)]
    else:
        (a,b),_,(c,d),_ = bbox

    x = [a,a,c,c,a]
    y = [b,d,d,b,b]
    return bbox,[x,y]

In [3]:
def get_results(filename,data,whole_or_partial="whole",check_zeros=False):

    if check_zeros:
        results = []
    else:
        results = {}

    if len(data) > 0:
        if whole_or_partial == "whole":
            segmentation = 'Segmentation_Whole_Leaf'
            bbox_key = 'Whole_Leaf_BBoxes'
            cropped = 'Whole_Leaf_Cropped'
        else:
            segmentation = 'Segmentation_Partial_Leaf'
            bbox_key = 'Partial_Leaf_BBoxes'
            cropped = 'Partial_Leaf_Cropped'

        
        if segmentation in data[filename].keys():
            keynames = [list(v.keys())[0] for v in data[filename][segmentation]]        
            for i,keyname in enumerate(keynames):
                # Original bounding box:
                bbox = data[filename][bbox_key][i]
                bbox__ = deepcopy(bbox)
                box_original,bbox_coords = get_bbox(bbox)
                # Cropped image:
                cropped_image = data[filename][cropped][i][keyname]
                # Translated bounding box:
                res = data[filename][segmentation][i][keyname]

                if check_zeros:
                    if len(res) > 0:
                        results.append(1)
                else:
                    if len(res) > 0:
                        name = list(res[0].keys())[0]
                        box_translated = res[0][name]['bbox']
                        _,bbox_coords_translated = get_bbox(box_translated,original=False)
                        
                        # Translated contour:
                        polygon = res[0][name]['polygon_closed']
                        y = polygon[:,1]
                        x = polygon[:,0]
                        
                        # Approximate scale factor
                        kx1,ky1 = np.array(box_original[0])-np.array(box_translated[0])
                        kx2,ky2 = np.array(box_original[1])-np.array(box_translated[1])
                        kx3,ky3 = np.array(box_original[2])-np.array(box_translated[2])
                        kx4,ky4 = np.array(box_original[3])-np.array(box_translated[3])
                        kx = np.average([kx1,kx2,kx3,kx4])
                        ky = np.average([ky1,ky2,ky3,ky4])
                        
                        # Move contour back:
                        polygon_moved = [x+kx,y+ky]
                        bbox_coords_translated[0] = bbox_coords_translated[0]+kx
                        bbox_coords_translated[1] = bbox_coords_translated[1]+ky
                
                        # Other traits:
                        long = res[0][name]['long']
                        short = res[0][name]['short']
                        convexity = res[0][name]['convexity']
                        concavity = res[0][name]['concavity']
                        aspect_ratio = res[0][name]['aspect_ratio']
                        cx,cy = res[0][name]['centroid']
                        centroid = [cx+kx,cy+ky]
                
                        # Measurements:
                        old_measurements = find_lamina_width_and_leaf_length(x, y, [cx,cy])
                        measurements = find_lamina_width_and_leaf_length(polygon_moved[0], polygon_moved[1], centroid)
                        ## Width
                        ########
                        [x1,x2],[y1,y2] = measurements["width"]
                        d_width = dist_two_points(x1, y1, x2, y2)
                        ## Length
                        ########
                        [x1,x2],[y1,y2] = measurements["length"]
                        d_length = dist_two_points(x1, y1, x2, y2)
                
                        results.update({filename+'__'+str(i): {"contour": polygon_moved, "boundingBox": bbox_coords_translated, "long": long, "short": short,
                                            "convexity": convexity, "concavity": concavity, "aspect_ratio":aspect_ratio, "centroid":centroid,
                                            "width_line": measurements["width"], "length_line": measurements["length"], "length_ASJ":d_length,
                                            "width_ASJ":d_width, "perimeter":res[0][name]['perimeter'],"area":res[0][name]['area'],
                                            "bbox_recentred":bbox__, "bbox_moved":box_translated, "contour_original_output":polygon,
                                                              "scale":[kx,ky],"cropped_image":cropped_image,"centroid_old":[cx,cy],
                                                              "width_line_old": old_measurements["width"], "length_line_old": old_measurements["length"]}})

    return results

In [4]:
def shorten_results(results,filename,main_keys):
    results_short = deepcopy(results)
    all_names = list(results.keys())
    for key in results[all_names[0]].keys():
        if key not in main_keys:
            for i in range(len(results)):
                del results_short[all_names[i]][key]
    return results_short

In [9]:
def rename_file(filename):
    filename = filename+'.jpg'
    names = df[df['file_name']==filename]
    try:
        name = names['species'].iloc[0].replace(' ','_')+'_'+str(names['gbif_id'].iloc[0])
    except:
        try:
            name = names['genus'].iloc[0].replace(' ','_')+'_'+str(names['gbif_id'].iloc[0])
        except:
            try:
                name = names['scientificName'].iloc[0].replace(' ','_')+'_'+str(names['gbif_id'].iloc[0])
            except:
                name = str(gbif_id)
    name = name.lower()
    return name

In [31]:
def shorten_table_and_get_jsons(results):
    filename = list(results.keys())[0][:-3]
    res = pd.DataFrame(shorten_results(results,filename,main_keys)).T
    
    res = res.rename(columns={'long':'length_LM','short':'width_LM','length_ASJ':'longest_length','width_ASJ':'perpendicular_width'})
    json_table = res.median()
    json_table['total_leaves_or_leaflets'] = int(len(res))
    #json_table_formatted = json_table.to_json()
    
    final_table = res.reset_index().rename(columns={'index':'leaf_index'})
    final_table.insert(0, "herbarium_sheet", new_name)    
    try:
        new_name = rename_file(filename)
    except:
        new_name = filename

    return new_name,json_table,final_table

### Loop

In [14]:
numpy_folder_path = '/Volumes/ARCHIVE 5/numpy_files'

In [15]:
files = os.listdir(numpy_folder_path)

In [13]:
path_to_jsons = '/Users/arias1/Documents/Github/LeafMachine2_usual/json_results'

In [23]:
main_keys = ['long','short','perimeter','area','length_ASJ','width_ASJ']

In [27]:
df = pd.read_csv('/Users/arias1/Downloads/multimedia-taxon-mapping 1.csv')

In [None]:
all_tables = []
herb_names = {}
errors = []
for file in tqdm.tqdm(files):
    pth = numpy_folder_path+'/'+file
    project = np.load(pth,allow_pickle=True)
    for index in tqdm.tqdm(range(len(project))):
        filenames = list(project[index].keys())
        for filename in filenames:
            try:
                results = get_results(filename,project[index],whole_or_partial="whole")
                if len(results) > 0:
                    new_name,json_table,final_table = shorten_table_and_get_jsons(results)
                    if new_name not in list(herb_names.keys()):
                        herb_names[new_name] = 1
                    else:
                        k = herb_names[new_name]
                        k = k+1
                        herb_names[new_name] = k
                        new_name = new_name+'_'+str(k)
                    json_table.to_json(path_to_jsons+'/'+new_name+'.json', orient = 'split', compression = 'infer', index = 'true')
                    all_tables.append(final_table)
            except:
                errors.append([file,index,filename])

metadata = pd.concat(all_tables)

  0%|                                                     | 0/288 [00:00<?, ?it/s]
  0%|                                                     | 0/288 [00:00<?, ?it/s][A
100%|███████████████████████████████████████████| 288/288 [00:48<00:00,  5.92it/s][A
  0%|▏                                          | 1/288 [00:49<3:57:40, 49.69s/it]
  0%|                                                     | 0/288 [00:00<?, ?it/s][A
100%|███████████████████████████████████████████| 288/288 [00:37<00:00,  7.70it/s][A
  1%|▎                                          | 2/288 [01:28<3:25:07, 43.03s/it]
  0%|                                                     | 0/288 [00:00<?, ?it/s][A
100%|███████████████████████████████████████████| 288/288 [00:23<00:00, 12.33it/s][A
  1%|▍                                          | 3/288 [01:52<2:43:24, 34.40s/it]
  0%|                                                     | 0/288 [00:00<?, ?it/s][A
100%|███████████████████████████████████████████| 288/288 [01:32<0