## Commented JSON

#### Goal

This Notebook demonstrates a few simple hacks to deal with comments in JSON strings or files. Note that the code is far from "production ready" and is only meant as a proof-of-concept.

In [1]:
import re
import json


class cJSONStr(str):
    """Class for processing commented JSON strings."""

    def __new__(cls, cjson_str):
        # self.string = self._clean(cjson_str)
        return str.__new__(str, cls._clean(cjson_str))
    
    @staticmethod
    def _clean(cjson_str):
        """Cleans comments from a JSON string."""
        parts = re.split("(\".*\"|'.*'|#[^\r\n]*|//[^\r\n]*)", cjson_str, re.MULTILINE)
        return "".join([
            p.strip(" ") for p in parts if not (p.startswith("//") or p.startswith("#"))
        ])

    
class cJSONFile:
    """Class for processing commented JSON files."""
    
    def __init__(self, file_path, mode, encoding="utf-8"):
        binary_modes = "rb", "rb+", "wb", "wb+", "ab", "xb", "xb+"
        if mode in binary_modes:
            raise ValueError(f"Binary mode {mode!r} is not supported, use text mode.")
        
        self._file_path = file_path
        self._mode = mode
        self._encoding = encoding
        self._file = self._open()
    
    def __getattr__(self, attribute):
        """Aliases file properties"""
        if hasattr(self._file, attribute):
            return getattr(self._file, attribute)
        else:
            raise AttributeError
    
    def __enter__(self):
        """Called on entering context."""
        if self._file is None:
            self._file = self._open()
        return self
    
    def __exit__(self, type, value, traceback):
        """Called on exiting context."""
        self._close()
    
    def __iter__(self):
        """Called when starting iteration."""
        if not self._file:
            self._file = self._open()
        return self
    
    def __next__(self):
        """Returns next line, skipping comments."""
        while True:
            try:
                line = next(self._file)
            except StopIteration:
                self._close()
                raise
            if not self._is_comment(line):
                return self._clean(line)

    def _open(self):
        """Opens the file."""
        return open(self._file_path, self._mode, encoding=self._encoding)

    def _close(self):
        """Closes the file."""
        self._file.close()
        self._file = None
        
    @staticmethod
    def _is_comment(line):
        """Checks whether a line contains a comment or not."""
        line = line.strip()
        return line.startswith("//") or line.startswith("#")

    def _clean(self, line):
        """Strips trailing comments froma line."""
        if "#" in line or "//" in line:
            parts = re.split("(\".*\"|'.*'|#.*|//.*)", line)
            return "".join([
                p.strip(" ") for p in parts if not self._is_comment(p)
            ])
        return line
    
    def read(self):
        """Returns file contents excluding comments."""
        return "".join(self.readlines())
        
    def readline(self):
        """Returns next line, skipping comments."""
        while True:
            try:
                line = next(self._file)
            except StopIteration:
                return ''
            if not self._is_comment(line):
                return self._clean(line)
        
    def readlines(self):
        """Reads all lines that are not comments."""
        return [
            self._clean(line) for line in self._file
            if not self._is_comment(line)
        ]
        

## Raw file contents

Note: comments using either `//` or `#`. Multi-line comments (`/* ... */`) are not supported!

In [2]:
with open("commented.json", "r") as jc:
    raw_cjson = jc.read()

print(raw_cjson)

{
	// Integer
	"integer": 1,
	"float": 1.0, # Float
	"text": "// text //",  // Trailing comment
	"list": [1, 2, 3]
}


In [3]:
# Note that the raw file contents are not valid JSON
json.loads(raw_cjson)

JSONDecodeError: Expecting property name enclosed in double quotes: line 2 column 2 (char 3)

## Commented JSON strings

In [4]:
# Convert to cJSONStr
cjson_str = cJSONStr(raw_cjson)

In [5]:
# Note that comments were removed
cjson_str

'{\n\t\n\t"integer": 1,\n\t"float": 1.0,\n\t"text": "// text //",\n\t"list": [1, 2, 3]\n}'

In [6]:
# And now it parses as valid JSON
json.loads(cjson_str)

{'integer': 1, 'float': 1.0, 'text': '// text //', 'list': [1, 2, 3]}

In [7]:
# cJSONStr retains normal string properties
len(cjson_str)

77

In [8]:
# And supports slicing etc.
cjson_str[6:13]

'integer'

## Commented JSON files

In [9]:
# Read a file as commented JSON
with cJSONFile("commented.json", "r") as jf:
    cjson = jf.read()

print(cjson)

{
	"integer": 1,
	"float": 1.0,
	"text": "// text //",
	"list": [1, 2, 3]
}


In [10]:
# Parses with standard JSON parser
json.loads(cjson)

{'integer': 1, 'float': 1.0, 'text': '// text //', 'list': [1, 2, 3]}

In [11]:
# Or pass it directly to json.load
with cJSONFile("commented.json", "r") as jf:
    print(json.load(jf))

{'integer': 1, 'float': 1.0, 'text': '// text //', 'list': [1, 2, 3]}


In [12]:
# Supports typical file functionality
jf = cJSONFile("commented.json", "r")
jf.name

'commented.json'

In [13]:
jf.mode

'r'

In [14]:
jf.closed

False

In [15]:
jf.readline()

'{\n'

In [16]:
next(jf)

'\t"integer": 1,\n'

In [17]:
# Line iteration
jf = cJSONFile("commented.json", "r")
[line for line in jf]

['{\n',
 '\t"integer": 1,\n',
 '\t"float": 1.0,\n',
 '\t"text": "// text //",\n',
 '\t"list": [1, 2, 3]\n',
 '}']