In [1]:
import glob
import json
import timeit
import os
import numpy as np


In [2]:
data_folder = "../../data/"
files_to_load = glob.glob(data_folder + "*_vec2raster_gadm.json")
files_to_load


['../../data/precip_vec2raster_gadm.json',
 '../../data/tavg_vec2raster_gadm.json']

In [3]:
with open('../../data/tavg_vec2raster_gadm.json', 'r') as load_from_json:
    tavg_json = json.load(load_from_json)

with open('../../data/precip_vec2raster_gadm.json', 'r') as load_from_json:
    precip_json = json.load(load_from_json)


In [4]:
print(len(tavg_json["features"]))
print(len(precip_json["features"]))


47857
47857


In [5]:
tavg_json.keys()


dict_keys(['type', 'features'])

In [6]:
tavg_json['type']

'FeatureCollection'

In [7]:
tavg = tavg_json["features"][0]
tavg


{'id': '0',
 'type': 'Feature',
 'properties': {'tavg_value': -8.784,
  'GID_2': 'AFG.1.1_1',
  'tavg_color': '#e8f0f4'},
 'geometry': {'type': 'Polygon',
  'coordinates': [[[71.41149139400005, 36.55717468300003],
    [71.4095382700001, 36.552366257000074],
    [71.39558410700005, 36.55453491200001],
    [71.37394714300018, 36.55474090600018],
    [71.36436462400019, 36.55226135200019],
    [71.33878326500002, 36.54087448100012],
    [71.3184280390002, 36.5344619760001],
    [71.30189514200015, 36.5235519420001],
    [71.28774261600012, 36.52112579300018],
    [71.28182983400012, 36.51720810000012],
    [71.27996063100005, 36.50761795000017],
    [71.27594757100019, 36.499774933000026],
    [71.26815795800007, 36.49425125200014],
    [71.25976562500017, 36.4832534790001],
    [71.24686431800018, 36.477092743000185],
    [71.24159240800009, 36.47653961200018],
    [71.23185730000017, 36.47864914000007],
    [71.22167968800005, 36.48429870500007],
    [71.20222473100017, 36.4800262450000

In [8]:
start = timeit.default_timer()
#---------------------------------------------------------------------------

combined_list = []

for index in range(0, len(tavg_json["features"])):
    if(index % 100000 == 0 and index != 0):
        print(f"Done merging {index} features.")
    
    tavg = tavg_json["features"][index]
    precip = precip_json["features"][index]

    assert(tavg['id'] == precip['id'])

    combined = {
        'id': index,
        'type': tavg['type'],
        'properties': {
            'id': tavg['id'],
            'tavg_value': tavg['properties']['tavg_value'],
            'tavg_color': tavg['properties']['tavg_color'],
            'precip_value': precip['properties']['precip_value'],
            'precip_color': precip['properties']['precip_color'],
        },
        'geometry': tavg['geometry']
    }
    combined_list.append(combined)

#---------------------------------------------------------------------------
end = timeit.default_timer()
print()
print(f"Time to complete: {(end - start):.5f}s")



Time to complete: 0.05769s


In [9]:
print(len(combined_list))
assert(len(combined_list) == len(tavg_json["features"]))


47857


In [10]:
"geometry" in combined_list[0].keys()


True

In [11]:
combined_list[0]["properties"]


{'id': '0',
 'tavg_value': -8.784,
 'tavg_color': '#e8f0f4',
 'precip_value': 40.0,
 'precip_color': '#eef5fc'}

In [12]:
# create a dict that we can save to a json file that still maintains
# header information of input file
combined_dict = dict()
combined_dict["type"] = tavg_json["type"]
combined_dict["variables"] = ["tavg", "vpd"]
combined_dict["features"] = combined_list


In [13]:
# https://stackoverflow.com/questions/49865442/write-into-a-js-file-without-passing-in-string-quotes-in-python
# varname = "const raster_grid = "
# with open('../../data/raster_grid_gadm.js', 'w') as outfile:
#     outfile.write(varname)
#     # write the JSON value after the variable declaration
#     outfile.write(json.dumps(combined_dict))

# with open('../../data/raster_grid_gadm.json', 'w') as outfile:
#     outfile.write(json.dumps(combined_dict))


In [14]:
# the individual files are around ~200-300 MB
# the merged file for 2 variables is ~342 MB
# however, it does mean that our file will always be limited by the geometry field
filesize = np.round(os.path.getsize("../../data/raster_grid_gadm.js") / 1e6, 3)
print(f"The file is {filesize} MB")


The file is 391.028 MB
