-
Notifications
You must be signed in to change notification settings - Fork 0
/
merge.py
104 lines (90 loc) · 3.71 KB
/
merge.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import click
import glob
import json
import os
import sys
@click.command()
@click.option('--folder', '-f', required=False, default='./',
help=f'Input folder path (absolute/relative)', type=str)
@click.option('--inputbasename', '-i', required=True,
help=f'Input base file name', type=str)
@click.option('--outputbasename', '-o', required=True,
help=f'Output file base name', type=str)
@click.option('--maxfilesize', '-m', required=True, default=300,
help=f'Maximum file size of the output file (in bytes)',
type=int)
def merge(folder: str, inputbasename: str, outputbasename: str,
maxfilesize: int) -> None:
"""
Utility to merge multiple JSON files into a single JSON file.
"""
result = {}
# Change working directory to the given folder
os.chdir(folder)
filesList = glob.glob(f'{inputbasename}*.json')
# Raise an exeption, if no files with the given input base name are found
if not filesList:
sys.tracebacklimit = 0
raise Exception(f'No matching files found for base name:\
{inputbasename}!')
else:
for file in filesList:
# Support non-english characters by using UTF-8 encoding
with open(file, 'r', encoding='utf-8') as infile:
infile = dict(json.loads(infile.read()))
if (result):
infileKeys = list(infile.keys())
infileContentKeys = list(infile[infileKeys[0]][0].keys())
resultKeys = list(result.keys())
resultContentKeys = list(result[resultKeys[0]][0].keys())
if (infileContentKeys == resultContentKeys):
result[resultKeys[0]].extend(infile[infileKeys[0]])
else:
result = infile
prepareOutput(outputbasename, maxfilesize, result)
sys.stdout.write('Merge complete!')
def prepareOutput(opfileprefix: str, maxFileSize: int, jsonData: dict) -> None:
"""
Prepares the output to be written to files
"""
if (maxFileSize <= 200):
raise Exception(f'Invalid max file size.\
Please select a value above 200 bytes.')
# Remove 100 bytes from permissible limit for writing the individual array
# structure for each file.
permissibleSize = maxFileSize - 100
# Contains all the entries of the merged JSON
allEntries = list(jsonData.values())[0]
# Fetches the keyname from the input files
keyName = list(jsonData.keys())[0]
# Carries data entries for each file while writing
currArray = []
# Tracks the current size of the data, if greater than the permissibleSize,
# then start writing to a new file
currSize = 0
# Keeps count of the current output file
fileCount = 1
# Iterate over all entries and write output to file
for val in allEntries:
currSize += len(str(val))
currArray.append(val)
if (currSize >= permissibleSize):
writeToFile(opfileprefix, fileCount, keyName, currArray)
currSize = 0
currArray = []
fileCount += 1
# Write any leftovers to file
if (currArray):
writeToFile(opfileprefix, fileCount, keyName, currArray)
def writeToFile(opfileprefix: str, fileCount: int, keyName: str,
data: dict) -> None:
"""
Writes the output to file as generated by the prepareOutput method.
"""
content = {keyName: data}
opFileName = f'{opfileprefix}{fileCount}.json'
# Support non-english characters by using UTF-8 encoding
with open(opFileName, 'w', encoding='utf-8') as f:
f.write(json.dumps(content))
if __name__ == f'__main__':
merge()