# Частичный разбор объявлений о продаже квартир


Пример объявления:

```
text1. ann1. Арбатская м. 1-комн. кв-ру, или м.Кропоткинская 7 мин/п, Староконюшенный пер., д.30. 35/21 кв.м, кухня 10, 10/12 эт, блоч. дома, балкон, тел., хор. сост., с/у совм., окна в тихий сквер, освобождение сразу, продаю, 45 тыс.$. Торг Т 557-00-73 
```

Результат разбора я представляю в виде отдельного файла в формате json, где каждое объявление -- это отдельный объект:

```
{
    "flat_sale_ads": [
        {
            "address": {
                "house": "30",
                "street": "Староконюшенный пер."
            },
            "floor": {
                "floor_num": "10",
                "floor_ttl": "12"
            },
            "floor_area": {
                "area_1": "35",
                "area_2": "21",
                "kitchen": "10"
            },
            "metro": [
                "Арбатская",
                "Кропоткинская"
            ],
            "price": {
                "amount": 45000,
                "currency": "USD"
            },
            "rooms": "1",
            "telephone": [
                "557-00-73"
            ]
        },
        ...
    ]
}
```

In [1]:
# Проверить, что я нахожусь в правильном виртуальном окружении
import sys
sys.executable # !which python on Mac
# /Users/alexskrn/opt/anaconda3/envs/yargy/bin/python

'/Users/alexskrn/opt/anaconda3/envs/yargy/bin/python3'

In [2]:
import os
import re
import json

from yargy import Parser
from yargy import rule
from yargy import predicates
from yargy import or_, and_, not_
from yargy import pipelines
from yargy.interpretation import fact, attribute
from ipymarkup import show_markup

In [3]:
# Вспомогательные функции и класс

def show_matches(rule, *lines):
    parser = Parser(rule)
    for line in lines:
        matches = parser.findall(line)
        spans = [_.span for _ in matches]
        show_markup(line, spans)

def load_lines(path):
    with open(path) as file:
        for line in file:
            yield line.rstrip('\n')


def show_json(data):
    print(json.dumps(data, indent=2, ensure_ascii=False))


def join_spans(text, spans):
    spans = sorted(spans)
    return ' '.join(
        text[start:stop]
        for start, stop in spans
    )


class Match(object):
    def __init__(self, fact, spans):
        self.fact = fact
        self.spans = spans
        

class Extractor(object):
    """This class wraps up an 'or_'-based parser to create a single object."""
    def __init__(self, union_rule_obj, wrapper_obj):
        self.union_rule_obj_parser = Parser(union_rule_obj)
        self.wrapper_parser = Parser(wrapper_obj)

    def __call__(self, text):
        matches = self.union_rule_obj_parser.findall(text)
        spans = [_.span for _ in matches]

        line = join_spans(text, spans)
        matches = list(self.wrapper_parser.findall(line))
        fact = None
        if matches:
            match = matches[0]
            fact = match.fact

        return Match(fact, spans)

In [4]:
# Станции метро
METRO_STATIONS = set(load_lines(os.path.join('dicts', 'metro.txt')))
METRO_STATIONS_UPPER = set([station.upper() for station in METRO_STATIONS])

## Данные

In [5]:
# Список текстов объявлений
TEXTS = []
with open(os.path.join('data', 'flats.txt'), 'r', encoding='utf8') as fromF:
    for line in fromF:
        line = line.strip()
        if line:
            TEXTS.append(line)
print(f'Количество объявлений: {len(TEXTS)}\n')
print(TEXTS[0])

Количество объявлений: 20

text1. ann1. Арбатская м. 1-комн. кв-ру, или м.Кропоткинская 7 мин/п, Староконюшенный пер., д.30. 35/21 кв.м, кухня 10, 10/12 эт, блоч. дома, балкон, тел., хор. сост., с/у совм., окна в тихий сквер, освобождение сразу, продаю, 45 тыс.$. Торг Т 557-00-73


### ОБЩИЕ ПРАВИЛА

In [6]:
INT = rule(predicates.type('INT'))

FLOAT = rule(
    INT,
    predicates.in_({',', '.'}),
    predicates.in_('123456789')  # не больше одного знака после запятой
)

INT_OR_FLOAT = rule(or_(INT, FLOAT))

SLASH = predicates.eq('/')
DASH = predicates.eq('-')
DOT = predicates.eq('.')
COMMA = predicates.eq(',')

## Определение всех атрибутов объекта разбора -- объявления о продаже

In [7]:
flat_fact = fact('flat',
                 [attribute('metro').repeatable(),
                  'rooms',
                  'address',
                  'floor_area',
                  'floor',
                  'price',
                  attribute('telephone').repeatable()
                 ]
           )

## Определения каждого из атрибутов объекта разбора

### Атрибут METRO

In [8]:
# METRO = pipelines.pipeline(METRO_STATIONS).repeatable().interpretation(flat.metro)

station = or_(pipelines.pipeline(METRO_STATIONS),
              pipelines.pipeline(METRO_STATIONS_UPPER)
             )

METRO = rule(or_(rule('м',
                      '.',
                      station.repeatable().interpretation(flat_fact.metro)
                     ),
                 rule(station.repeatable().interpretation(flat_fact.metro),
                      predicates.in_('мМ'),
                      '.'
                      )
                )
).repeatable()


show_matches(
    METRO,
    'text1. ann1. Арбатская м. 1-комн. кв-ру, или м.Кропоткинская 7 мин/п, Староконюшенный пер.',
    'text2. ann2. Арбатская м. 1-комн. кв ру, 5 м/пеш., ул.Арбат дом 15/43;',
    'Бауманская м.',
    'БАУМАНСКАЯ М.'
)

### Атрибут ROOMS (количество комнат)

In [9]:
rooms_pipe = pipelines.morph_pipeline([
    '-комн.'
])

ROOMS = rule(INT.interpretation(flat_fact.rooms),
             rooms_pipe
            )


show_matches(
    ROOMS,
    'text1. ann1. Арбатская м. 1-комн. кв-ру, или м.Кропоткинская 7 мин/п, Староконюшенный пер.',
    'text2. ann2. Арбатская м. 1-комн. кв ру, 5 м/пеш., ул.Арбат дом 15/43;',
)

### Атрибут ADDRESS

In [10]:
address_fact = fact('address', ['street', 'house'])
street_type_pipe = pipelines.morph_pipeline([
    'ул',
    'пер',
])
house_pipe = pipelines.morph_pipeline([
    'д',
    'дом'
])

korpus_rule = rule(COMMA.optional(),
                   predicates.eq('корп'),
                   DOT.optional(),
                   INT)

house_num_rule = rule(COMMA.optional(),
                      house_pipe,
                      DOT.optional(),
                      rule(INT,
                           SLASH.optional(),
                           INT.optional(),
                           korpus_rule.optional()    # optional 'корпус'
                          ).interpretation(address_fact.house)
                     )

street_name_union = or_(predicates.gram('ADJF'),
                        predicates.gram('NOUN'),
                        predicates.gram('Geox')
                       )

ADDRESS = rule(
               or_(rule(
                        street_type_pipe,                  # 'ул'
                        DOT.optional(),                    # '.'
                        predicates.in_('БМ').optional(),   # 'Б'/'М'
                        DOT.optional(),                    # '.'
                        street_name_union,                 # 'Грузинская'/'Арбат'/'Бутырский'
                        predicates.gram('NOUN').optional() # 'вал'
                       ),
                   rule(
                       predicates.in_('БМ').optional(),    # 'Б'/'М'
                       DOT.optional(),                     # '.'
                       street_name_union,                  # 'Грузинская'
                       street_type_pipe,                   # 'ул'/'пер'
                       DOT.optional(),                     # '.'
                       )
                  ) .interpretation(address_fact.street),
               house_num_rule.optional()                   # номер дома с корпусом
              ).interpretation(address_fact).interpretation(flat_fact.address)


show_matches(
    ADDRESS,
    'Староконюшенный пер., д.30. 35/21 кв.м, кухня 10',
    'Арбатская м. 1-комн. кв ру, 5 м/пеш., ул.Арбат дом 15/43; 53/20/13 кв.м,',
    '1-комн. кв-ру, ул.Б.Грузинская, д.14, 36/20.2 кв.м',
    'Аптекарский пер., д.3/22, 30/16.2 кв.м',
    '5 мин/п., Токмаков пер., 28/16 кв.м',
    'ул. Бутырский вал, д. 34,',
    'Б.Тишинский пер., 22/15 кв.м',
    'Волочаевская ул. дом 2 корп 1,'
)

### Атрибут FLOOR AREA

In [11]:
floor_area_fact = fact('floor_area',
                       ['area_1',
                        'area_2',
                        'area_3',
                        'kitchen',
                        'recessed_balcony'
                       ])

sq_m_pipe = pipelines.morph_pipeline([
      'кв.м',
      'кв. м',
      ]
)

lodzhiya_rule = rule(COMMA,
                     predicates.eq('лоджия'),
                     INT_OR_FLOAT.interpretation(floor_area_fact.recessed_balcony))

kitchen_rule = rule(or_(COMMA, DOT).optional(),
                    predicates.in_(['кухня', 'кух']),
                    DOT.optional(),
                    INT_OR_FLOAT.interpretation(floor_area_fact.kitchen)
                   )

FLOOR_AREA = rule(
    INT_OR_FLOAT.interpretation(floor_area_fact.area_1),
    SLASH,
    INT_OR_FLOAT.interpretation(floor_area_fact.area_2),
    SLASH.optional(),
    INT_OR_FLOAT.interpretation(floor_area_fact.area_3).optional(),
    or_(sq_m_pipe, kitchen_rule, INT_OR_FLOAT.interpretation(floor_area_fact.area_3)),
    kitchen_rule.optional(),
    lodzhiya_rule.optional()
).interpretation(floor_area_fact).interpretation(flat_fact.floor_area)


show_matches(
    FLOOR_AREA,
    'Староконюшенный пер., д.30. 35/21 кв.м, кухня 10, 10/12 эт,',
    'ул.Арбат дом 15/43; 53/20/13 кв.м, еврорем., 4/6-эт. кирп.',
    'ул.М.Почтовая, 10; 44/21 кв.м, кухня 13.5, 2/8-эт. ',
    '34/15 кв.м. кухня 7,',
    ', 34/14,4/кух. 8,5,',
    ', 34.4/19.3/10,'
)

### Атрибут FLOOR NUMBER

In [12]:
# Номер этажа и всего этажей в доме
floor_fact = fact('floor', ['floor_num', 'floor_ttl'])

floor_num_pipe = pipelines.morph_pipeline([
    'эт',
    '-эт',
    'эт.',
    'этаж',
    'пан',
    'кирп',
    '-пан'
      ]
)

FLOOR = rule(rule(
             INT.interpretation(floor_fact.floor_num),
             SLASH,
             INT.interpretation(floor_fact.floor_ttl),
             floor_num_pipe,
            ).interpretation(floor_fact).interpretation(flat_fact.floor))


show_matches(
    FLOOR,
    'Староконюшенный пер., д.30. 35/21 кв.м, кухня 10, 10/12 эт, блоч. дома,',
    'ул.Арбат дом 15/43; 53/20/13 кв.м, еврорем., 4/6-эт. кирп. дома,',
)

### Атрибут PRICE

In [13]:
def normalize_amount(val):
    val = ''.join(val.split())
    if 'тыс' in val:
        val = re.sub('тыс.?', '', val)
        val = val.replace(',', '.')
        val = float(val) * 1000
    else:
        val = val.replace('.', '')
    return int(val)

assert normalize_amount('45 тыс.') == 45000
assert normalize_amount('24.000') == 24000
assert normalize_amount('27 500') == 27500
assert normalize_amount('23.5 тыс') == 23500


price_fact = fact('price', 
                  ['amount',
                   'currency'
                  ]
                 )

currencies_dict = {
    'дол.': 'USD',
    '$': 'USD',
    'у.е.': 'у.е.'
}

currency_pipe = pipelines.pipeline(currencies_dict).interpretation(
    price_fact.currency.custom(currencies_dict.get))

amount_rule = rule(INT_OR_FLOAT,
                   DOT.optional(),
                   INT.optional(),
                   predicates.eq('тыс').optional(),
                   DOT.optional()
                  )

PRICE = rule(
             amount_rule.interpretation(price_fact.amount.custom(normalize_amount)),
#              currency_pipe.interpretation(price_fact.currency)
    currency_pipe
            ).interpretation(price_fact).interpretation(flat_fact.price)


show_matches(
    PRICE,
    'продаю, 45 тыс.$. Торг Т 557-00-73',
    'хозяин, 130 тыс.$. Торг. Т.762-55-67, С',
    '24.000$',
    '27 500 дол.'
)

### Атрибут TELEPHONE

In [14]:
tele_pipe = pipelines.morph_pipeline([
    'Т',
])
TELE = rule(tele_pipe,
            DOT.optional(),
            rule(INT,
            DASH,
            INT,
            DASH,
            INT).repeatable().interpretation(flat_fact.telephone)
           ).repeatable()


show_matches(
    TELE,
    'продаю, 45 тыс.$. Торг Т 557-00-73',
    'хозяин, 130 тыс.$. Торг. Т.762-55-67, С',
    '27 тыс.$. Т.737-76-67, раб., Т.265-23-31, дом.'
)

### Объект AD -- одно объявление о продаже (союз всех атибутов); объект WRAPPER -- обертка поверх союза атрибутов

In [15]:
AD = or_(METRO.repeatable(),
         ROOMS,
         ADDRESS,
         FLOOR_AREA,
         FLOOR,
         PRICE,
         TELE
        ).interpretation(flat_fact)

WRAPPER = rule(
    METRO.repeatable(),
    ROOMS.optional(),
    METRO.optional().repeatable(),
    ADDRESS.optional(),
    FLOOR_AREA.optional(),
    FLOOR.optional(),
    PRICE.optional(),
    TELE.optional().repeatable()
).interpretation(flat_fact)

## Тесты

In [16]:
# tests
text = TEXTS[0]
extractor = Extractor(AD, WRAPPER)
match = extractor(text)
assert match.fact.as_json['metro'] == ['Арбатская', 'Кропоткинская']
assert match.fact.as_json['rooms'] == '1'
assert match.fact.as_json['address']['street'] == 'Староконюшенный пер.'
assert match.fact.as_json['address']['house'] == '30'
assert match.fact.as_json['floor_area']['area_1'] == '35'
assert match.fact.as_json['floor_area']['area_2'] == '21'
assert match.fact.as_json['floor']['floor_num'] == '10'
assert match.fact.as_json['floor']['floor_ttl'] == '12'
assert match.fact.as_json['price']['amount'] == 45000
assert match.fact.as_json['price']['currency'] == 'USD'
assert match.fact.as_json['telephone'] == ['557-00-73']

text = TEXTS[1]
extractor = Extractor(AD, WRAPPER)
match = extractor(text)
assert match.fact.as_json['metro'] == ['Арбатская']
assert match.fact.as_json['rooms'] == '1'
assert match.fact.as_json['address']['street'] == 'ул.Арбат'
assert match.fact.as_json['address']['house'] == '15/43'
assert match.fact.as_json['floor_area']['area_1'] == '53'
assert match.fact.as_json['floor_area']['area_2'] == '20'
assert match.fact.as_json['floor_area']['area_3'] == '13'
assert match.fact.as_json['floor']['floor_num'] == '4'
assert match.fact.as_json['floor']['floor_ttl'] == '6'
assert match.fact.as_json['price']['amount'] == 130000
assert match.fact.as_json['price']['currency'] == 'USD'
assert match.fact.as_json['telephone'] == ['762-55-67']

text = TEXTS[2]
extractor = Extractor(AD, WRAPPER)
match = extractor(text)
assert match.fact.as_json['metro'] == ["Баррикадная"]
assert match.fact.as_json['rooms'] == "1"
assert match.fact.as_json['address']['street'] == "ул.Б.Грузинская"
assert match.fact.as_json['address']['house'] == "14"
assert match.fact.as_json['floor_area']['area_1'] == "36"
assert match.fact.as_json['floor_area']['area_2'] == "20.2"
assert match.fact.as_json['floor_area']["kitchen"] == "8.3"
assert match.fact.as_json['floor']['floor_num'] == "2"
assert match.fact.as_json['floor']['floor_ttl'] == "14"
assert match.fact.as_json['telephone'] == ["962-30-63"]

text = TEXTS[9]
extractor = Extractor(AD, WRAPPER)
match = extractor(text)
assert match.fact.as_json['metro'] == ["Бауманская"]
assert match.fact.as_json['rooms'] == "1"
assert match.fact.as_json['address']['street'] == "Токмаков пер."
assert match.fact.as_json['floor_area']['area_1'] == "28"
assert match.fact.as_json['floor_area']['area_2'] == "16"
assert match.fact.as_json['floor_area']["kitchen"] == "5.5"
assert match.fact.as_json['floor']['floor_num'] == "6"
assert match.fact.as_json['floor']['floor_ttl'] == "7"
assert match.fact.as_json['price']['amount'] == 17000
assert match.fact.as_json['price']['currency'] == 'USD'
assert match.fact.as_json['telephone'] == ["978-92-94"]

text = TEXTS[10]
extractor = Extractor(AD, WRAPPER)
match = extractor(text)
assert match.fact.as_json['metro'] == ["Бауманская"]
assert match.fact.as_json['rooms'] == "1"
assert match.fact.as_json['address']['street'] == "ул. Б. Почтовая"
assert match.fact.as_json['address']['house'] == "18/20, корп. 16"
assert match.fact.as_json['floor_area']['area_1'] == "34"
assert match.fact.as_json['floor_area']['area_2'] == "15"
assert match.fact.as_json['floor']['floor_num'] == "5"
assert match.fact.as_json['floor']['floor_ttl'] == "5"
assert match.fact.as_json['price']['amount'] == 23500
assert match.fact.as_json['price']['currency'] == 'USD'
assert match.fact.as_json['telephone'] == ["218-59-90"]

text = TEXTS[11]
extractor = Extractor(AD, WRAPPER)
match = extractor(text)
assert match.fact.as_json['metro'] == ['Бауманская']
assert match.fact.as_json['rooms'] == '1'
assert match.fact.as_json['address']['street'] == 'Плетешковский пер.'
assert match.fact.as_json['floor_area']['area_1'] == '35'
assert match.fact.as_json['floor_area']['area_2'] == '20'
assert match.fact.as_json['floor_area']["kitchen"] == '10'
assert match.fact.as_json['floor']['floor_num'] == '3'
assert match.fact.as_json['floor']['floor_ttl'] == '12'
assert match.fact.as_json['price']['amount'] == 28000
assert match.fact.as_json['price']['currency'] == 'у.е.'
assert match.fact.as_json['telephone'] == ['322-70-36']

text = TEXTS[15]
extractor = Extractor(AD, WRAPPER)
match = extractor(text)
assert match.fact.as_json['metro'] == ['БАУМАНСКАЯ']
assert match.fact.as_json['rooms'] == '1'
assert match.fact.as_json['address']['street'] == 'Волочаевская ул.'
assert match.fact.as_json['address']['house'] == '2 корп 1'
assert match.fact.as_json['floor_area']['area_1'] == '34'
assert match.fact.as_json['floor_area']['area_2'] == '14,4'
assert match.fact.as_json['floor_area']["kitchen"] == '8,5'
assert match.fact.as_json['floor']['floor_num'] == '12'
assert match.fact.as_json['floor']['floor_ttl'] == '12'
assert match.fact.as_json['price']['amount'] == 27500
assert match.fact.as_json['price']['currency'] == 'USD'
assert match.fact.as_json['telephone'] == ['924-59-92', '925-10-83']

text = TEXTS[16]
extractor = Extractor(AD, WRAPPER)
match = extractor(text)
assert match.fact.as_json['metro'] == ["Белорусская"]
assert match.fact.as_json['rooms'] == "1"
assert match.fact.as_json['address']['street'] == "ул.Нижняя"
assert match.fact.as_json['address']['house'] == "5"
assert match.fact.as_json['floor_area']['area_1'] == "31,3"
assert match.fact.as_json['floor_area']['area_2'] == "13,4"
assert match.fact.as_json['floor_area']["kitchen"] == "9,2"
assert match.fact.as_json['floor']['floor_num'] == "3"
assert match.fact.as_json['floor']['floor_ttl'] == "5"
assert match.fact.as_json['price']['amount'] == 25000
assert match.fact.as_json['price']['currency'] == "USD"
assert match.fact.as_json['telephone'] == ["318-94-49"]

text = TEXTS[17]
extractor = Extractor(AD, WRAPPER)
match = extractor(text)
assert match.fact.as_json['metro'] == ["Белорусская"]
assert match.fact.as_json['rooms'] == "1"
assert match.fact.as_json['address']['street'] == "ул.Верхняя"
assert match.fact.as_json['address']['house'] == "6"
assert match.fact.as_json['floor_area']['area_1'] == "43"
assert match.fact.as_json['floor_area']['area_2'] == "20"
assert match.fact.as_json['floor_area']["kitchen"] == "13"
assert match.fact.as_json['floor']['floor_num'] == "2"
assert match.fact.as_json['floor']['floor_ttl'] == "14"
assert match.fact.as_json['price']['amount'] == 41500
assert match.fact.as_json['price']['currency'] == "USD"
assert match.fact.as_json['telephone'] == ["405-85-64"]

text = TEXTS[18]
extractor = Extractor(AD, WRAPPER)
match = extractor(text)
assert match.fact.as_json['metro'] == ["Белорусская"]
assert match.fact.as_json['rooms'] == "1"
assert match.fact.as_json['address']['street'] == "ул. Бутырский вал"
assert match.fact.as_json['address']['house'] == "34"
assert match.fact.as_json['floor_area']['area_1'] == "36"
assert match.fact.as_json['floor_area']['area_2'] == "20"
assert match.fact.as_json['floor']['floor_num'] == "12"
assert match.fact.as_json['floor']['floor_ttl'] == "12"
assert match.fact.as_json['price']['amount'] == 28000
assert match.fact.as_json['price']['currency'] == "USD"
assert match.fact.as_json['telephone'] == ["299-96-51"]

text = TEXTS[19]
extractor = Extractor(AD, WRAPPER)
match = extractor(text)
assert match.fact.as_json['metro'] == ["Белорусская"]
assert match.fact.as_json['rooms'] == "1"
assert match.fact.as_json['address']['street'] == "Б.Тишинский пер."
assert match.fact.as_json['floor_area']['area_1'] == "22"
assert match.fact.as_json['floor_area']['area_2'] == "15"
assert match.fact.as_json['floor']['floor_num'] == "2"
assert match.fact.as_json['floor']['floor_ttl'] == "6"
assert match.fact.as_json['price']['amount'] == 18000
assert match.fact.as_json['price']['currency'] == "USD"
assert match.fact.as_json['telephone'] == ["253-30-92"]
print(match.fact)

flat(metro=['Белорусская'], rooms='1', address=address(street='Б.Тишинский пер.', house=None), floor_area=floor_area(area_1='22', area_2='15', area_3=None, kitchen=None, recessed_balcony=None), floor=floor(floor_num='2', floor_ttl='6'), price=price(amount=18000, currency='USD'), telephone=['253-30-92'])


In [17]:
text = TEXTS[14]
extractor = Extractor(AD, WRAPPER)
match = extractor(text)
show_markup(text, match.spans)
if match.fact:
    print(json.loads(json.dumps(match.fact.as_json, ensure_ascii=False)))
    show_json(match.fact.as_json)

{'metro': ['БАУМАНСКАЯ'], 'rooms': '1', 'address': {'street': 'Волховский пер.', 'house': '2'}, 'floor_area': {'area_1': '34.4', 'area_2': '19.3', 'area_3': '10'}, 'price': {'amount': 24000, 'currency': 'USD'}, 'telephone': ['203-12-31', '737-00-93']}
{
  "metro": [
    "БАУМАНСКАЯ"
  ],
  "rooms": "1",
  "address": {
    "street": "Волховский пер.",
    "house": "2"
  },
  "floor_area": {
    "area_1": "34.4",
    "area_2": "19.3",
    "area_3": "10"
  },
  "price": {
    "amount": 24000,
    "currency": "USD"
  },
  "telephone": [
    "203-12-31",
    "737-00-93"
  ]
}


## Сохранить результаты разбора в json-файл

In [18]:
# Put all parsed ads into a dict with a single key at the top
json_data = {'flat_sale_ads': []}
extractor = Extractor(AD, WRAPPER)
for text in TEXTS:
    match = extractor(text)
    if match.fact:
        json_data['flat_sale_ads'].append(json.loads(json.dumps(match.fact.as_json, ensure_ascii=False)))

# Check that all ads are included into the dict
assert len(json_data['flat_sale_ads']) == 20

In [19]:
# Write to a json file
with open(os.path.join('data', 'flats_data.json'), 'w', encoding='utf-8') as f:
    json.dump(json_data,
              f,
              ensure_ascii=False,
              indent=4,
              sort_keys=True
             )