In [1383]:
from collections import Counter

import sexpdata

In [1384]:
s_data = sexpdata.load(open("example.sch"))

Let's explore the data types involved for a bit.

In [1385]:
type(s_data)
len(s_data)

219

So the top level of the parsed s-expression is a list with 219 elements.  What types are those?

In [1386]:
print({type(elem) for elem in s_data})

{<class 'list'>, <class 'sexpdata.Symbol'>}


So, there are only sexpdata.Symbols and lists.  Let's look at the symbols.

In [1387]:
symbol_list = [elem for elem in s_data if isinstance(elem, sexpdata.Symbol)]
print(f"There are {len(symbol_list)} symbols")
print(symbol_list)

There are 1 symbols
[Symbol('kicad_sch')]


Of the 219 items in the top level list, only one is a symbol and all the rest are lists.  Let's see how long they are

In [1388]:
res_list = []
for elem in s_data:
    if isinstance(elem, sexpdata.Symbol):
        res_list.append(str(elem))
    elif isinstance(elem, list):
        res_list.append(len(elem))
    else:
        raise TypeError(f"I wasn't expecting {elem} of type {type(elem)}")

freq_table = Counter([res for res in res_list if isinstance(res, int)])
print(freq_table.most_common())

[(4, 118), (5, 34), (6, 27), (17, 22), (2, 6), (18, 6), (10, 1), (32, 1), (64, 1), (19, 1), (55, 1)]


So, most but not all of the sublists at the top level of the data structure are quite small.  Let's get a feel for how many of each type there are in the whole structure.

Let's be lazy at first and assume that the only iterable we're going to want to descend into is a list, since that's the only one at the top level.

In [1389]:
def flatten_nested_lists(starting_elem):
    for elem in starting_elem:
        if not isinstance(elem, list):
            yield type(elem)
        else:
            yield list
            yield from flatten_nested_lists(elem)

In [1390]:
flattened_type_counts = Counter(flatten_nested_lists(s_data))
print(flattened_type_counts)

Counter({<class 'sexpdata.Symbol'>: 6734, <class 'list'>: 5468, <class 'float'>: 2618, <class 'str'>: 1513, <class 'int'>: 844})


Let's take a look at some of the early sublists

In [1391]:
s_data[1]

[Symbol('version'), 20231120]

In [1392]:
s_data[2]

[Symbol('generator'), 'eeschema']

In [1393]:
s_data[3]

[Symbol('generator_version'), '8.0']

Let's take a look at the Symbol class that this library uses.

In [1394]:
sample_symbol = s_data[3][0]
help(type(sample_symbol))

Help on class Symbol in module sexpdata:

class Symbol(String)
 |  Method resolution order:
 |      Symbol
 |      String
 |      builtins.str
 |      builtins.object
 |
 |  Methods inherited from String:
 |
 |  __eq__(self, other)
 |      >>> from itertools import permutations
 |      >>> S = 'a', String('a'), Symbol('a')
 |      >>> all(x == x for x in S)
 |      True
 |      >>> any(x != x for x in S)
 |      False
 |      >>> any(x == y for x, y in permutations(S, 2))
 |      False
 |      >>> all(x != y for x, y in permutations(S, 2))
 |      True
 |
 |  __hash__(self)
 |      >>> D = {'a': 1, String('a'): 2, Symbol('a'): 3}
 |      >>> len(D)
 |      3
 |
 |  __ne__(self, other)
 |      Return self!=value.
 |
 |  __repr__(self)
 |      Return repr(self).
 |
 |  value(self)
 |
 |  ----------------------------------------------------------------------
 |  Class methods inherited from String:
 |
 |  quote(string)
 |
 |  unquote(string)
 |
 |  ----------------------------------------

Symbol is a sublcass of string, and the only function it seems to add is value, let's see if that has any help.

In [1395]:
help(type(sample_symbol).value)

Help on function value in module sexpdata:

value(self)



So, no then.  Let's see what happens when we call it. 

In [1396]:
sample_symbol.value()

'generator_version'

It looks like unquoted strings in the original data file get converted into the "Symbol" class to differentiate them from quoted strings.

I have a suspicion that the first value in each list is going to turn out to be a symbol that acts as the name for the rest of the list.  Let's check.

In [1397]:
def get_first_element_types(s_expression):
    for elem in s_expression:
        if isinstance(elem, list) and elem:
            yield type(elem[0])
            yield from get_first_element_types(elem[1:])

In [1398]:
first_element_type_freqs = Counter(get_first_element_types(s_data))
first_element_type_freqs

Counter({sexpdata.Symbol: 5468})

The number of Symbols that are the first elements of their list corresponds to the number of lists we found earlier.  So it looks like the answer is yes.  Each list starts with a symbol.

In [1399]:
def get_list_names(s_expression):
    for elem in s_expression:
        if isinstance(elem, list) and elem:
            yield elem[0]
            yield from get_list_names(elem[1:])

In [1400]:
list_names = Counter(get_list_names(s_data))
list_names

Counter({Symbol('size'): 501,
         Symbol('effects'): 481,
         Symbol('font'): 481,
         Symbol('at'): 430,
         Symbol('uuid'): 360,
         Symbol('xy'): 319,
         Symbol('pin'): 265,
         Symbol('type'): 252,
         Symbol('property'): 220,
         Symbol('stroke'): 195,
         Symbol('width'): 195,
         Symbol('hide'): 168,
         Symbol('alternate'): 149,
         Symbol('pts'): 147,
         Symbol('length'): 117,
         Symbol('name'): 117,
         Symbol('number'): 117,
         Symbol('wire'): 92,
         Symbol('unit'): 64,
         Symbol('justify'): 62,
         Symbol('fill'): 57,
         Symbol('symbol'): 54,
         Symbol('fields_autoplaced'): 46,
         Symbol('exclude_from_sim'): 42,
         Symbol('in_bom'): 41,
         Symbol('on_board'): 41,
         Symbol('path'): 33,
         Symbol('lib_id'): 32,
         Symbol('dnp'): 32,
         Symbol('instances'): 32,
         Symbol('project'): 32,
         Symbol('reference

Since each list starts with a symbol that is its name, we could potentially convert this to a nested dictionary.  But this is tricky.  Each level is a list, but the key names for that level are the first elements of each list it contains.

First, let's check if there are any lists that contain a mixture of lists and other values.  Those would throw a wrench into the conversion plan.

In [1401]:
def get_list_types(s_expression):
    for elem in s_expression:
        if isinstance(elem, list) and elem and isinstance(elem[0], sexpdata.Symbol):
            yield (elem[0].value(), frozenset({type(x) for x in elem[1:]}))
            yield from get_list_types(elem[1:])

In [1402]:
types_of_list = list(get_list_types([s_data]))
types_of_list

[('kicad_sch', frozenset({list})),
 ('version', frozenset({int})),
 ('generator', frozenset({str})),
 ('generator_version', frozenset({str})),
 ('uuid', frozenset({str})),
 ('paper', frozenset({str})),
 ('lib_symbols', frozenset({list})),
 ('symbol', frozenset({list, str})),
 ('pin_names', frozenset({list, sexpdata.Symbol})),
 ('offset', frozenset({float})),
 ('exclude_from_sim', frozenset({sexpdata.Symbol})),
 ('in_bom', frozenset({sexpdata.Symbol})),
 ('on_board', frozenset({sexpdata.Symbol})),
 ('property', frozenset({list, str})),
 ('at', frozenset({float, int})),
 ('effects', frozenset({list})),
 ('font', frozenset({list})),
 ('size', frozenset({float})),
 ('property', frozenset({list, str})),
 ('at', frozenset({float, int})),
 ('effects', frozenset({list})),
 ('font', frozenset({list})),
 ('size', frozenset({float})),
 ('property', frozenset({list, str})),
 ('at', frozenset({int})),
 ('effects', frozenset({list})),
 ('font', frozenset({list})),
 ('size', frozenset({float})),
 ('h

In [1403]:
challening_lists = {x for x in types_of_list if list in x[1] and len(x[1]) > 1}
challening_lists

{('label', frozenset({list, str})),
 ('name', frozenset({list, str})),
 ('number', frozenset({list, str})),
 ('path', frozenset({list, str})),
 ('pin', frozenset({list, sexpdata.Symbol})),
 ('pin', frozenset({list, str})),
 ('pin_names', frozenset({list, sexpdata.Symbol})),
 ('project', frozenset({list, str})),
 ('property', frozenset({list, str})),
 ('symbol', frozenset({list, str})),
 ('text', frozenset({list, str}))}

Above the set of symbols I found in the data that don't follow the pattern that would allow them to be converted to dicts in a straightforward way.