### Creating Jsons

Reformatting jsons for training / testing data, for use in training [Mask RCNN](https://github.com/matterport/Mask_RCNN) model on ALICE data.

#### Imports

In [9]:
import os
import json
import skimage.io as io
import re
import pandas as pd

#### Loading Json Data

In [2]:
pth = "Data//Original_Data//Labels//all_labels.json"
f = open(pth)
data = json.load(f)

In [3]:
file_refs = list(data['_via_img_metadata'].keys())

In [4]:
sizes = [a['size'] for a in data['_via_img_metadata'].values()]

In [7]:
all_names = []

for u,file in enumerate(file_refs):
    k = re.search(str(sizes[u]),file).span()[0]
    all_names.append(file[:k])

In [10]:
index = pd.DataFrame([all_names,sizes,file_refs]).T
index = index.rename(columns={0:"Name",1:"Size",2:"Ref"})

#### Format Dictionaries

In [21]:
def get_new_dict(path,data,index,image_test=True):
    names = []
    errors = []
    # Retrieve names of files in path:
    for file in os.listdir(path):
        if "json" not in file:
            if image_test == True:
                # Test whether images will load correctly.
                # This step is in needed in case some images have been not correctly uploaded.
                try:
                    im = io.imread(path+"//"+file)
                    names.append(file)
                except:
                    errors.append(file)
            else:
                names.append(file)
    if image_test == True:
        print("Total errors: "+str(len(errors)))
    # Get alternative names from index:
    names_updated = [index[index['Name']==nt].Ref.iloc[0] for nt in names]
    # Create new dictionary:
    data_new = {}
    for nm in names_updated:
        a = data['_via_img_metadata'][nm]
        data_new[nm] = a
        
    return data_new

In [22]:
train_dict = get_new_dict("Data//Original_Data//All_Images//train",data,index,image_test=True)

Total errors: 0


In [26]:
val_dict = get_new_dict("Data//Original_Data//All_Images//val",data,index,image_test=True)

Total errors: 1


In [27]:
pd.DataFrame(train_dict).to_json('train_dict.json')
pd.DataFrame(val_dict).to_json('val_dict.json')