In [1]:
import sys
import re 

In [2]:
def arrayPositions(data):
    bracketMatches = list(re.finditer('\\[|\\]', data))
    prevBracket = ''
    openBracketPositions = []
    openBracketDuplicates = []
    closeBracketPositions = []
    closeBracketDuplicates = []
    for match in bracketMatches:
        bracketIndex = match.span()[0]
        bracket = data[bracketIndex]
        if(bracket == '['):
            openBracketPositions.append(bracketIndex)
            if(bracket == prevBracket):
                openBracketDuplicates.append(len(openBracketPositions) - 1)
        else:
            closeBracketPositions.append(bracketIndex)
            if(bracket == prevBracket):
                closeBracketDuplicates.append(len(closeBracketPositions) - 1)
        prevBracket = bracket
    for i in reversed(openBracketDuplicates):
        del openBracketPositions[i]
    for i in reversed(closeBracketDuplicates):
        del closeBracketPositions[i]
    return openBracketPositions, closeBracketPositions   

In [3]:
fName = 'He_SAS_191213_0935.json'
with open(fName, 'r', encoding="utf-8") as JSONfile:
    data = JSONfile.read()
    print(data[:2000])

{
    "FileVersion":0.00000000000000,
    "Summary":{
        "Apparatus":0,
        "ExperimentType":"Symmetric Angular Scan",
        "Species":"He",
        "Description":"Angular sweep to check health of analysers' spot welds.",
        "CountType":"Coincidence Counts",
        "StartDate":"191213_0935",
        "EstimatedExperimentDuration":"70:00",
        "ActualExperimentDuration":"",
        "StepTime":3000,
        "NumberOfSteps":84,
        "TAC":{
            "FullScale (ns)":1000.00000000000,
            "NumberOfBins":1000,
            "WindowStart (ns)":410,
            "WindowStop (ns)":610
        },
        "FCup":{
            "Current (µA)":0.646306674862373,
            "Voltage (V)":300.000000000000
        },
        "Pressure(Torr)":5.76300081042067E-6,
        "Gun":{
            "Energies":[
                45.2400000000000
            ],
            "Angles":[
                0.00000000000000
            ]
        },
        "A1":{
            "RSE":[
      

In [89]:
def getArrayPositions(data):
    """Given a JSON string, returns a list of the indices of the start [ and end ] list brackets."""
    bracketMatches = list(re.finditer('\\[|\\]', data))
    openIndices = []
    arrayIndices = []
    for match in bracketMatches:
        bracketIndex = match.span()[0]
        bracket = data[bracketIndex]
        if(bracket == '['):
            openIndices.append(bracketIndex)
        else:
            latestOpenBracketIndex = openIndices.pop()
            arrayIndices.append([latestOpenBracketIndex, bracketIndex])
    return arrayIndices

def getNumericArrayPositions(data):
    """Given a JSON string, returns a list of the start and end indices of numeric type lists."""
    arrayIndices = getArrayPositions(data)
    numericArrayIndices = []
    for array in arrayIndices:
        strToList = eval(data[array[0]:array[1]+1])
        if len(strToList) > 0:
            typeOfListEntry = type(strToList[0])
            if typeOfListEntry != list:
                numericArrayIndices.append(array)
    return numericArrayIndices

numericListIndices = getNumericArrayPositions(data)
numericListIndices[:10]

[[828, 875],
 [899, 946],
 [992, 1039],
 [1098, 3967],
 [4013, 4060],
 [4119, 6988],
 [7036, 9905],
 [13903, 34927],
 [38906, 59922],
 [63900, 84905]]

In [83]:
def roundDecimalPlaces(data, maxDP=-1):
    """Given an input JSON string, truncates floating point numbers to a maximum number of decimal places, maxDP."""
    floatMatches = list(re.finditer('\d+\.\d+', data))
    dataList = []
    prevEndIndex = 0
    for floatMatch in floatMatches:
        floatStartI = floatMatch.span()[0]
        floatEndI = floatMatch.span()[1]
        floatStr = data[floatStartI:floatEndI]
        if(maxDP == -1):
            floatStr = str(float(floatStr))
        else:
            floatStr = str(round(float(floatStr) * 10**maxDP) / 10**maxDP)
        dataList.append(data[prevEndIndex:floatStartI])
        dataList.append(floatStr)
        prevEndIndex = floatEndI
    # add remaining data after last float
    lastFloatEndIndex = floatMatches[-1].span()[1]
    dataList.append(data[lastFloatEndIndex:])
    # make in to string
    return ''.join(dataList)

roundedData = roundDecimalPlaces(data, 3)

In [86]:
def reformatArrays(data, everyN = 20):
    numericListIndices = getNumericArrayPositions(data)
    openBracketPositions, closeBracketPositions = arrayPositions(data)
    newlinesToRemove = []
    for indices in numericListIndices:
        start, end = indices
        # find the commas followed by newlines
        arrayNewlines = list(re.finditer(',\\n +', data[start: end]))
        del arrayNewlines[everyN-1::everyN] # keep these ones!
        # get the indices of where these are in the data file
        for newline in arrayNewlines:
            commaStart = start + newline.span()[0]
            commaEnd = start + newline.span()[1]
            newlinesToRemove.append((commaStart, commaEnd))
    # add all data before first array
    dataList = []
    dataList.append(data[0:newlinesToRemove[0][0]])
    for i in range(len(newlinesToRemove) - 1):
        # add a comma
        dataList.append(', ')
        # now append the normal data up to the next comma
        startI = newlinesToRemove[i][1]
        stopI = newlinesToRemove[i+1][0]
        dataList.append(data[startI:stopI])
    # add remaining data after last array
    dataList.append(', ')
    dataList.append(data[newlinesToRemove[-1][1]:])
    # make in to string
    return ''.join(dataList)

formattedData = reformatArrays(roundedData, 10)
print(formattedData[:16000])

{
    "FileVersion":0.0,
    "Summary":{
        "Apparatus":0,
        "ExperimentType":"Symmetric Angular Scan",
        "Species":"He",
        "Description":"Angular sweep to check health of analysers' spot welds.",
        "CountType":"Coincidence Counts",
        "StartDate":"191213_0935",
        "EstimatedExperimentDuration":"70:00",
        "ActualExperimentDuration":"",
        "StepTime":3000,
        "NumberOfSteps":84,
        "TAC":{
            "FullScale (ns)":1000.0,
            "NumberOfBins":1000,
            "WindowStart (ns)":410,
            "WindowStop (ns)":610
        },
        "FCup":{
            "Current (µA)":0.646,
            "Voltage (V)":300.0
        },
        "Pressure(Torr)":5.763E-6,
        "Gun":{
            "Energies":[
                45.24
            ],
            "Angles":[
                0.0
            ]
        },
        "A1":{
            "RSE":[
                10.28
            ],
            "AM":15.86,
            "Angles":[
   