In [12]:
import json
import os
import glob

import pandas as pd
import numpy as np
from pathlib import Path

## Prepare directories

In [13]:
source_unsorted_dir = Path('../data/unsorted_jsons_nnunet/')

In [14]:
source_dir = Path('../data/jsons_nnunet/')

In [15]:
dest_dir = Path('../data/embeddings_nnunet')

In [16]:
if not os.path.exists(dest_dir):
    os.makedirs(dest_dir)
    print(f'Created dir: {dest_dir} ')

### Sort json annotations by frame number

In [17]:
def sort_json_files(source_folder, dest_folder):
    os.makedirs(dest_folder, exist_ok=True)

    json_files = glob.glob(os.path.join(source_folder, '*.json'))

    for json_file in json_files:
        with open(json_file, 'r') as file:
            data = json.load(file)

        # Sort by frame number
        sorted_data = sorted(data, key=lambda x: x['frame'])

        new_file_path = os.path.join(dest_folder, os.path.basename(json_file))

        with open(new_file_path, 'w') as new_file:
            json.dump(sorted_data, new_file, indent=4)

In [18]:
sort_json_files(source_unsorted_dir, source_dir)

## Get Mapping video_id -> video directory

In [19]:
mapping_path = Path('../data/maps/mapping.csv')

In [20]:
df = pd.read_csv(mapping_path)
df

Unnamed: 0,id,directory
0,id_101,Lewkowska 02062022 - Compression extrinseque
1,id_102,20230525-090005917_MIN.PROBE18-020-080004 - 20...
2,id_103,20230525-090005917_MIN.PROBE18-020-080004 - 20...
3,id_104,20230525-090005917_MIN.PROBE18-020-080004 - 20...
4,id_105,20230525-090005917_MIN.PROBE18-020-080004 - 20...
...,...,...
142,id_241,video_20210623-183451728 - 20210623-183451728
143,id_242,video_20210623-173742338 - 20210623-173742338
144,id_243,Patient 480815MP02 - 20210708 - 20210623-17420...
145,id_244,Patient 480815MP02 - 20210708 - 20210623-17422...


## Convert json embeddings to numpy

In [21]:
def save_embeddings_to_npy(json_file, npy_file):
    with open(json_file, 'r') as file:
        data = json.load(file)

    # Extracting "embedding" data from json
    embeddings = [item['embedding'] for item in data]

    # Convert to NumPy
    embeddings_array = np.array(embeddings)
    if embeddings_array.size == 0:
        raise ValueError(f'embeddings_array is empty. No embeddings could be extracted for file: {json_file}')
    
    # Save as '.npy' file
    np.save(npy_file, embeddings_array)

In [22]:
for index, row in df.iterrows():
    npy_file_name = row['id']
    json_file_name = row['directory'] + '.json'
    source_json_file = Path(source_dir / json_file_name)
    dest_file = Path(dest_dir / npy_file_name)
    
    print(f'index: {index}')
    save_embeddings_to_npy(source_json_file, dest_file)

index: 0
index: 1
index: 2
index: 3
index: 4
index: 5
index: 6
index: 7
index: 8
index: 9
index: 10
index: 11
index: 12
index: 13
index: 14
index: 15
index: 16
index: 17
index: 18
index: 19
index: 20
index: 21
index: 22
index: 23
index: 24
index: 25
index: 26
index: 27
index: 28
index: 29
index: 30
index: 31
index: 32
index: 33
index: 34
index: 35
index: 36
index: 37
index: 38
index: 39
index: 40
index: 41
index: 42
index: 43
index: 44
index: 45
index: 46
index: 47
index: 48
index: 49
index: 50
index: 51
index: 52
index: 53
index: 54
index: 55
index: 56
index: 57
index: 58
index: 59
index: 60
index: 61
index: 62
index: 63
index: 64
index: 65
index: 66
index: 67
index: 68
index: 69
index: 70
index: 71
index: 72
index: 73
index: 74
index: 75
index: 76
index: 77
index: 78
index: 79
index: 80
index: 81
index: 82
index: 83
index: 84
index: 85
index: 86
index: 87
index: 88
index: 89
index: 90
index: 91
index: 92
index: 93
index: 94
index: 95
index: 96
index: 97
index: 98
index: 99
index: 100

# Test data

In [23]:
aa

NameError: name 'aa' is not defined

In [None]:
source_unsorted_dir = Path('../data/unsorted_jsons_nnunet_test/') 

In [None]:
source_dir = Path('../data/jsons_nnunet_test/') 
dest_dir = Path('../data/embeddings_nnunet_test')

In [None]:
#sort_json_files(source_unsorted_dir, source_dir)

In [None]:
mapping_path = Path('../data/maps/mapping_test.csv')

In [None]:
for index, row in df.iterrows():
    npy_file_name = row['id']
    json_file_name = row['directory'] + '.json'
    source_json_file = Path(source_dir / json_file_name)
    dest_file = Path(dest_dir / npy_file_name)
    
    print(f'index: {index}')
    save_embeddings_to_npy(source_json_file, dest_file)

In [None]:
for file_path in glob.glob(os.path.join(source_dir, '*.json')):
    print(file_path)