In [9]:
import dataflows as DF
import glob
import requests
import json

In [10]:
GEOS={
    'ירושלים ויו"ש': 'yv-sh',
    'דרום': 'drvm',
    'מרכז': 'mrkz',
    'צפון': 'tspvn'
}

In [11]:
ERRORS = []
items = []

In [12]:
social_services = requests.get('https://data-input.obudget.org/api/datarecords/social_service').json()
social_services = [x['value'] for x in social_services['result']]
social_services = dict((x['catalog_number'], x['name']) for x in social_services if 'catalog_number' in x)
social_services

{'870': 'תכנית משכילה - ליווי הורה עצמאי להשכלה גבוהה',
 '203': 'שירותי סמך מקצועי',
 '6': 'סל תקשורת לחירשים וכבדי שמיעה',
 '241': 'אומנה',
 '126': 'אומנת חירום',
 '243': 'פנימיות',
 'אין': 'משפחות מלוות, ילדי נע"מ ומרכזיות חינוכיות בפנימיות ',
 '245': 'מקלטים לנשים נפגעות אלימות וילדיהן',
 '150': 'דירות מעבר לנשים נפגעות אלימות וילדיהן',
 '179': 'מעטפת למרכזי עוצמה - סיוע ליחידים ומשפחות החיים בעוני ',
 '880': 'מרכזי יום לבני 21+ עם מוגבלות',
 '713': 'מערך דיור או דירות לוויין',
 '82': 'דירות בקהילות ייחודיות לאנשים עם מוגבלויות',
 '712': 'מעונות פנימייה לאנשים עם מוגבלות',
 '143': 'הוסטל לאנשים עם מוגבלויות',
 '786': 'מערך דיור לאנשים עם מוגבלויות',
 '714': 'מכינות הכנה לחיים עצמאיים ותוכנית מעבר עם התנדבות לצבא לצעירים עם מוגבלות',
 '667': 'שילוב פעוטות עם מוגבלות במעונות יום רגילים',
 '239': 'פנימיות יום - מסגרת שהייה בקהילה',
 '336': 'מעברים - תוכנית לפיתוח תעסוקתי-כלכלי-קהילתי במגזר הכפרי',
 '342': 'מרכזי חוסן קהילתי',
 '343': 'צח"י - צוותי חוסן וחירום יישוביים',
 'לא רלוונטי': 

In [17]:
filenames = glob.glob('*.xlsx')
filenames = ['מינהל מוגבלויות סופי.xlsx'] # Override
filenames

['מינהל מוגבלויות סופי.xlsx']

In [18]:
FIELDS = dict(
    catalog_number=["מס' קטלוגי", "מספר קטלוגי"],
    year=['שנה'],
    code=['תקנה תקציבית'],
    beneficiary_num=['כמות מושמים'],
    entity_id=['ח.פ מסגרת'],
    amount=['סך תשלומים'],
    tender=['מכרז'],
    geo=['מחוז המסגרת']
)

In [20]:
def loader():
    return DF.Flow(
        *[
            DF.load(file)
            for file in filenames
        ],
        DF.concatenate(FIELDS),
        DF.update_resource(-1, name='activities')
    )
loader().process()

(<datapackage.package.Package at 0x132a90450>, {})

In [21]:
def manualBudget():
    fields = ['catalog_number', 'year', 'amount']
    return DF.Flow(
        loader(),
        DF.select_fields(fields),
        DF.join_with_self('activities', ['catalog_number', 'year'], dict(catalog_number=None, year=None, amount=dict(aggregate='sum'))),
        DF.filter_rows(lambda r: r['amount'] != 0),
        DF.add_field('manualBudget', 'object', lambda r: dict(
            year=r['year'], approved=r['amount'], executed=r['amount']
        )),
        DF.join_with_self('activities', ['catalog_number'], dict(catalog_number=None, manualBudget=dict(aggregate='array'))),
        DF.select_fields(['catalog_number', 'manualBudget']),
        DF.set_type('manualBudget', transform=lambda v: sorted(v, key=lambda i: i['year'], reverse=True)),
#         DF.set_type('manualBudget', type='object', transform=lambda v: dict(manualBudget=v)),
        DF.printer(tablefmt='html'),
    ).results()[0][0]
   
items.extend(manualBudget())

#,catalog_number (integer),manualBudget (array)
1,14,"[{'approved': 7913073.16, 'executed': 7913073.16, 'year': 2020}, {'approved': 8982450.52, 'executed' ..."
2,142,"[{'approved': 211489092.88000003, 'executed': 211489092.88000003, 'year': 2020}, {'approved': 208092 ..."
3,143,"[{'approved': 354153830.7099999, 'executed': 354153830.7099999, 'year': 2020}, {'approved': 32994249 ..."
4,145,"[{'approved': 20554148.87, 'executed': 20554148.87, 'year': 2020}, {'approved': 30711850.190000005, ..."
5,147,"[{'approved': 824783.7, 'executed': 824783.7, 'year': 2020}, {'approved': 1607757.05, 'executed': 16 ..."
6,648,"[{'approved': 14181149.750000002, 'executed': 14181149.750000002, 'year': 2020}, {'approved': 157525 ..."
7,667,"[{'approved': 577728.39, 'executed': 577728.39, 'year': 2020}, {'approved': 672644.51, 'executed': 6 ..."
8,712,"[{'approved': 1561394568.3699996, 'executed': 1561394568.3699996, 'year': 2020}, {'approved': 145572 ..."
9,713,"[{'approved': 35530534.88999999, 'executed': 35530534.88999999, 'year': 2020}, {'approved': 32095481 ..."
10,714,"[{'approved': 74329981.05, 'executed': 74329981.05, 'year': 2020}, {'approved': 62762107.54000001, ' ..."


In [22]:
def beneficiaries():
    fields = ['catalog_number', 'year', 'beneficiary_num']
    return DF.Flow(
        loader(),
        DF.select_fields(fields),
        DF.join_with_self('activities', ['catalog_number', 'year',], dict(catalog_number=None, year=None, beneficiary_num=dict(aggregate='sum'))),
        DF.filter_rows(lambda r: r['beneficiary_num'] != 0),
        DF.add_field('beneficiaries', 'object', lambda r: dict(
            year=r['year'], num_beneficiaries=r['beneficiary_num']
        )),
        DF.join_with_self('activities', ['catalog_number'], dict(catalog_number=None, beneficiaries=dict(aggregate='array'))),
        DF.select_fields(['catalog_number', 'beneficiaries']),
        DF.set_type('beneficiaries', transform=lambda v: sorted(v, key=lambda i: i['year'], reverse=True)),
#         DF.set_type('beneficiaries', type='object', transform=lambda v: dict(beneficiaries=v)),
        DF.printer(tablefmt='html'),
    ).results()[0][0]
   
items.extend(beneficiaries())

#,catalog_number (integer),beneficiaries (array)
1,14,"[{'num_beneficiaries': 2479, 'year': 2020}, {'num_beneficiaries': 2690, 'year': 2019}, {'num_benefic ..."
2,142,"[{'num_beneficiaries': 4142, 'year': 2020}, {'num_beneficiaries': 6887, 'year': 2019}, {'num_benefic ..."
3,143,"[{'num_beneficiaries': 2488, 'year': 2020}, {'num_beneficiaries': 4842, 'year': 2019}, {'num_benefic ..."
4,145,"[{'num_beneficiaries': 5033, 'year': 2020}, {'num_beneficiaries': 10880, 'year': 2019}, {'num_benefi ..."
5,147,"[{'num_beneficiaries': 985, 'year': 2020}, {'num_beneficiaries': 2038, 'year': 2019}, {'num_benefici ..."
6,648,"[{'num_beneficiaries': 1162, 'year': 2020}, {'num_beneficiaries': 1739, 'year': 2019}, {'num_benefic ..."
7,667,"[{'num_beneficiaries': 115, 'year': 2020}, {'num_beneficiaries': 219, 'year': 2019}, {'num_beneficia ..."
8,712,"[{'num_beneficiaries': 8491, 'year': 2020}, {'num_beneficiaries': 16707, 'year': 2019}, {'num_benefi ..."
9,713,"[{'num_beneficiaries': 238, 'year': 2020}, {'num_beneficiaries': 407, 'year': 2019}, {'num_beneficia ..."
10,714,"[{'num_beneficiaries': 641, 'year': 2020}, {'num_beneficiaries': 1075, 'year': 2019}, {'num_benefici ..."


In [23]:
try:
    supplier_cache = json.load(open('suppliers.cache.json'))
except Exception as e:
    print(e)
    supplier_cache = dict()

    
def check_entity_id():
    def func(rows):
        for row in rows:
            eid = row['entity_id']
            if eid and len(eid.strip())==9:
                yield row
                continue
            ERRORS.append(f'מספר ח.פ. לא תקין {eid}')
    return func


def fetch_supplier_data():
    global ERRORS
    global supplier_cache
    def func(rows):
        for row in rows:
            key = row['entity_id']
            if key not in supplier_cache:
                found = False
                resp = requests.get(f'https://next.obudget.org/search/entities?q={key}')
                if resp.status_code == 200:
                    resp = resp.json()['search_results']
                    if len(resp) > 0:
                        resp = resp[0]['source']
                        assert resp['id'] == key
                        kind = resp['kind']
                        resp = requests.get(f'https://next.obudget.org/get/org/{kind}/{key}')
                        if resp.status_code == 200:
                            supplier_cache[key] = resp.json()['value']
                            found = True
                if not found:
                    supplier_cache[key] = None
                    print(resp, key)
                print('GOT', len(supplier_cache))
            rec = supplier_cache[key]
            if rec:
                row['entity_kind'] = rec['kind']
                row['entity_kind_he'] = rec['kind_he']
                row['entity_name'] = rec['name']
                yield row
            else:
                ERRORS.append(f'לא נמצא מפעיל עם חפ {key}')                
    return DF.Flow(
        DF.add_field('entity_kind', 'string'),
        DF.add_field('entity_kind_he', 'string'),
        DF.add_field('entity_name', 'string'),
        DF.add_field('active', 'string', 'yes'),
        DF.add_field('related', 'string', 'yes'),
        func
    )


def suppliers():
    fields = ['catalog_number', 'entity_id', 'geo']
    return DF.Flow(
        loader(),
        DF.select_fields(fields),
        DF.set_type('geo', transform=lambda v: GEOS.get(v)),
        DF.join_with_self('activities', ['catalog_number', 'entity_id'], dict(
            catalog_number=None, entity_id=None, geo=dict(aggregate='set')
        )),
        DF.set_type('geo', type='array', transform=list),
        DF.set_type('entity_id', type='string', transform=str),
        check_entity_id(),
        fetch_supplier_data(),
        DF.add_field('suppliers', 'object', lambda r: dict((k, r[k]) for k in [
            'entity_id', 'entity_name', 'entity_kind', 'entity_kind_he', 'active', 'related', 'geo'
        ])),
        DF.join_with_self('activities', ['catalog_number'], dict(catalog_number=None, suppliers=dict(aggregate='array'))),
        DF.select_fields(['catalog_number', 'suppliers']),
#         DF.set_type('suppliers', type='object', transform=lambda v: dict(suppliers=v)),
        DF.printer(tablefmt='html'),
    ).results()[0][0]
   
items.extend(suppliers())

json.dump(supplier_cache, open('suppliers.cache.json', 'w'))
items

GOT 424
GOT 425
GOT 426
GOT 427
GOT 428
GOT 429
GOT 430
GOT 431
GOT 432
GOT 433
GOT 434
GOT 435
GOT 436
GOT 437
GOT 438
GOT 439
GOT 440
GOT 441
GOT 442
GOT 443
GOT 444
GOT 445
GOT 446
GOT 447
GOT 448
GOT 449
GOT 450
GOT 451
GOT 452
GOT 453
GOT 454
GOT 455
[] 557855210
GOT 456
GOT 457
GOT 458
GOT 459
GOT 460
GOT 461
GOT 462
GOT 463
GOT 464
GOT 465
GOT 466
GOT 467
GOT 468
GOT 469
GOT 470
GOT 471
GOT 472
GOT 473
GOT 474
GOT 475
GOT 476
GOT 477
GOT 478
[] 580005039
GOT 479
GOT 480
GOT 481
GOT 482
GOT 483
GOT 484
GOT 485
GOT 486
GOT 487
GOT 488
GOT 489
GOT 490
[] 580060852
GOT 491
GOT 492
GOT 493
GOT 494
GOT 495
GOT 496
GOT 497


#,catalog_number (integer),suppliers (array)
1,14,"[{'active': 'yes', 'entity_id': '512676206', 'entity_kind': 'company', 'entity_kind_he': 'חברה פרטית ..."
2,142,"[{'active': 'yes', 'entity_id': '500701164', 'entity_kind': 'university', 'entity_kind_he': 'אוניברס ..."
3,143,"[{'active': 'yes', 'entity_id': '510913205', 'entity_kind': 'company', 'entity_kind_he': 'חברה פרטית ..."
4,145,"[{'active': 'yes', 'entity_id': '500301783', 'entity_kind': 'religion_service', 'entity_kind_he': 'ש ..."
5,147,"[{'active': 'yes', 'entity_id': '510525348', 'entity_kind': 'association', 'entity_kind_he': 'חל""צ', ..."
6,648,"[{'active': 'yes', 'entity_id': '510355720', 'entity_kind': 'company', 'entity_kind_he': 'חברה פרטית ..."
7,667,"[{'active': 'yes', 'entity_id': '580019602', 'entity_kind': 'association', 'entity_kind_he': 'עמותה' ..."
8,712,"[{'active': 'yes', 'entity_id': '500301783', 'entity_kind': 'religion_service', 'entity_kind_he': 'ש ..."
9,713,"[{'active': 'yes', 'entity_id': '512641507', 'entity_kind': 'company', 'entity_kind_he': 'חברה פרטית ..."
10,714,"[{'active': 'yes', 'entity_id': '511369720', 'entity_kind': 'company', 'entity_kind_he': 'חברה פרטית ..."


[{'catalog_number': 14,
  'manualBudget': [{'approved': 7913073.16,
    'executed': 7913073.16,
    'year': 2020},
   {'approved': 8982450.52, 'executed': 8982450.52, 'year': 2019},
   {'approved': 9845896.7, 'executed': 9845896.7, 'year': 2018},
   {'approved': 9382294.86, 'executed': 9382294.86, 'year': 2017}]},
 {'catalog_number': 142,
  'manualBudget': [{'approved': 211489092.88000003,
    'executed': 211489092.88000003,
    'year': 2020},
   {'approved': 208092510.53000003,
    'executed': 208092510.53000003,
    'year': 2019},
   {'approved': 175814697.69000003,
    'executed': 175814697.69000003,
    'year': 2018},
   {'approved': 145600218.48999998,
    'executed': 145600218.48999998,
    'year': 2017}]},
 {'catalog_number': 143,
  'manualBudget': [{'approved': 354153830.7099999,
    'executed': 354153830.7099999,
    'year': 2020},
   {'approved': 329942492.09999996,
    'executed': 329942492.09999996,
    'year': 2019},
   {'approved': 324132686.6399998,
    'executed': 32413

In [24]:
try:
    budget_cache = json.load(open('budget_items.cache.json'))
except Exception as e:
    print(e)
    budget_cache = dict()

def fix_budget_code(b, row):
    b = str(b)
    b = b[:-1]
    assert len(b) <= 6
    while len(b) < 6:
        b = '0' + b
    b = '0023' + b
    return b

def fetch_budget_data():
    global ERRORS
    global budget_cache
    def func(rows):
        for row in rows:
            year = row['year']
            code = row['code']
            key = '{}/{}'.format(code, year)
            if key not in budget_cache:
                resp = requests.get(f'https://next.obudget.org/get/budget/{code}/{year}')
                if resp.status_code == 200:
                    budget_cache[key] = resp.json()['value']
                else:
                    budget_cache[key] = None
                    print(resp, key)
                print('GOT', len(budget_cache))
            rec = budget_cache[key]
            if rec:
                row['title'] = rec['title']
                yield row
            else:
                code = code[2:]
                ERRORS.append(f'לא נמצאה תקנה {code} בשנה {year}')
    return DF.Flow(
        DF.add_field('title', 'string'),
        func
    )

def budget_codes():
    fields = ['catalog_number', 'year', 'code']
    return DF.Flow(
        loader(),
        DF.select_fields(fields),
        DF.join_with_self('activities', fields, dict((f, None) for f in fields)),
        DF.set_type('code', type='string', transform=fix_budget_code),
        fetch_budget_data(),
        DF.add_field('budgetItems', 'object', lambda r: dict((k, r[k]) for k in ['code', 'title', 'year'])),
        DF.join_with_self('activities', ['catalog_number'], dict(catalog_number=None, budgetItems=dict(aggregate='array'))),
        DF.select_fields(['catalog_number', 'budgetItems']),
        DF.set_type('budgetItems', transform=lambda v: sorted(v, key=lambda i: i['year'], reverse=True)),
#         DF.set_type('budgetItems', type='object', transform=lambda v: dict(budgetItems=v)),
        DF.printer(tablefmt='html'),
    ).results()[0][0]
        
items.extend(budget_codes())

json.dump(budget_cache, open('budget_items.cache.json', 'w'))

<Response [404]> 0023072202/2017
GOT 213
GOT 214
<Response [404]> 0023072206/2018
GOT 215
GOT 216
<Response [404]> 0023072206/2019
GOT 217
GOT 218
GOT 219
GOT 220
GOT 221
GOT 222
GOT 223
<Response [404]> 0023072271/2018
GOT 224
GOT 225
<Response [404]> 0023072271/2019
GOT 226
GOT 227
GOT 228
<Response [404]> 0023072204/2017
GOT 229
GOT 230
GOT 231
<Response [404]> 0023072204/2018
GOT 232
GOT 233
GOT 234
GOT 235
GOT 236
<Response [404]> 0023072203/2018
GOT 237
GOT 238
GOT 239
<Response [404]> 0023072203/2019
GOT 240
GOT 241
GOT 242
GOT 243


#,catalog_number (integer),budgetItems (array)
1,14,"[{'code': '0023072271', 'title': 'אבזור וסיוע לעיוורים', 'year': 2020}, {'code': '0023062266', 'titl ..."
2,142,"[{'code': '0023072202', 'title': 'מעונות יום שיקומיים', 'year': 2020}, {'code': '0023103841', 'title ..."
3,143,"[{'code': '0023072101', 'title': 'מסגרות דיור לנכים', 'year': 2020}, {'code': '0023072102', 'title': ..."
4,145,"[{'code': '0023072206', 'title': 'נופשונים מוגבלויות', 'year': 2020}, {'code': '0023072206', 'title' ..."
5,147,"[{'code': '0023012021', 'title': 'התמודדות עם מצבי חירום', 'year': 2020}, {'code': '0023072201', 'ti ..."
6,648,"[{'code': '0023072204', 'title': 'מעונות יום ומפעלי', 'year': 2020}, {'code': '0023072221', 'title': ..."
7,667,"[{'code': '0023072201', 'title': 'שירותי קהילה לילדים', 'year': 2020}, {'code': '0023072203', 'title ..."
8,712,"[{'code': '0023103841', 'title': 'פנימיות ילד ונער', 'year': 2020}, {'code': '0023116560', 'title': ..."
9,713,"[{'code': '0023103841', 'title': 'פנימיות ילד ונער', 'year': 2020}, {'code': '0023072101', 'title': ..."
10,714,"[{'code': '0023072101', 'title': 'מסגרות דיור לנכים', 'year': 2020}, {'code': '0023072101', 'title': ..."


In [25]:
try:
    tender_cache = json.load(open('tenders.cache.json'))
except Exception as e:
    print(e)
    tender_cache = dict()

FF = ['tender_id', 'tender_key', 'tender_type', 'tender_type_he',
      'publication_id', 'decision', 'description', 'page_url', 
      'publisher', 'regulation']
    
def fetch_tender_data():
    global ERRORS
    global tender_cache
    def func(rows):
        for row in rows:
            keys = [row['tender']]
            parts = row['tender'].split('/')
            keys.append('{}-{}'.format(*parts))
            keys.append('{}/20{}'.format(*parts))
            keys.append('{}-20{}'.format(*parts))
            found = False
            found_key = None
            for key in keys:
                found = False
                if key not in tender_cache:
                    params = dict(
                        size=10, offset=0, q=key, from_date='1900-01-01', to_date='2100-01-01',
                        filter='[{"publisher":"הרווחה"}]'
                    )
                    resp = requests.get('https://next.obudget.org/search/tenders', params=params)
                    if resp.status_code == 200:
                        resp = resp.json()['search_results']
                        if len(resp) > 0:
                            for result in resp:
                                result = result['source']
                                if result['tender_id'] != key:
                                    continue
                                print('FOUND', key)
                                pid = result['publication_id']
                                tt = result['tender_type']
                                resp = requests.get(f'https://next.obudget.org/get/tenders/{tt}/{pid}/{key}')
                                if resp.status_code == 200:
                                    tender_cache[key] = resp.json()['value']
                                    break
                if tender_cache.get(key) is not None:
                    found = True
                    found_key = key
                if not found:
                    tender_cache[key] = None
                if found_key:
                    break
            print('GOT', len(tender_cache), found_key, keys)
            if found_key:
                rec = tender_cache[found_key]
                rec['tender_key'] = '{publication_id}:{tender_type}:{tender_id}'.format(**rec)
                if rec:
                    for f in FF:
                        row[f] = rec[f]
                    yield row
            else:
                ERRORS.append('לא נמצא מכרז עם מזהה {} בשירות {}'.format(row['tender'], social_services[str(row['catalog_number'])]))
    return DF.Flow(
        *[
            DF.add_field(f, 'string') for f in FF
        ],
        DF.add_field('related', 'string', 'yes'),
        func
    )
    
def tenders():
    fields = ['catalog_number', 'tender']
    return DF.Flow(
        loader(),
        DF.select_fields(fields),
        DF.join_with_self('activities', fields, dict((f, None) for f in fields)),
        DF.filter_rows(lambda r: r['tender'] is not None),
        DF.set_type('tender', type='string'),
        fetch_tender_data(),
        DF.add_field('tenders', 'object', lambda r: dict((k, r[k]) for k in FF + ['related'])),
        DF.join_with_self('activities', ['catalog_number'], dict(catalog_number=None, tenders=dict(aggregate='array'))),
        DF.select_fields(['catalog_number', 'tenders']),
#         DF.set_type('tenders', type='object', transform=lambda v: dict(tenders=v)),
        DF.printer(tablefmt='html'),
    ).results()[0][0]
        
items.extend(tenders())

json.dump(tender_cache, open('tenders.cache.json', 'w'))

GOT 320 None ['112/15', '112-15', '112/2015', '112-2015']
GOT 320 None ['113/15', '113-15', '113/2015', '113-2015']
GOT 320 None ['114/15', '114-15', '114/2015', '114-2015']
FOUND 136-2015
GOT 324 136-2015 ['136/15', '136-15', '136/2015', '136-2015']
GOT 328 None ['139/18', '139-18', '139/2018', '139-2018']
GOT 332 None ['160/16', '160-16', '160/2016', '160-2016']
GOT 336 None ['169/14', '169-14', '169/2014', '169-2014']
GOT 340 None ['170/14', '170-14', '170/2014', '170-2014']
GOT 344 None ['170/15', '170-15', '170/2015', '170-2015']
GOT 348 None ['171/14', '171-14', '171/2014', '171-2014']
GOT 352 None ['172/14', '172-14', '172/2014', '172-2014']
GOT 356 None ['184/15', '184-15', '184/2015', '184-2015']
GOT 360 None ['237/17', '237-17', '237/2017', '237-2017']
GOT 364 None ['263/16', '263-16', '263/2016', '263-2016']
GOT 368 None ['264/16', '264-16', '264/2016', '264-2016']
GOT 368 108-2015 ['108/15', '108-15', '108/2015', '108-2015']
GOT 368 143/2012 ['143/12', '143-12', '143/2012',

FOUND 163-2015
GOT 542 163-2015 ['163/15', '163-15', '163/2015', '163-2015']


#,catalog_number (integer),tenders (array)
1,14,"[{'decision': 'סגור', 'description': 'שירותי הדרכה שיקומית לאדם העיוור וללקויי הראייה מלידה עד גיל 3 ..."
2,142,"[{'decision': 'סגור', 'description': 'הצטרפות למאגר ספקים ארצי להפעלת מעונות יום שיקומיים (מכרז משלי ..."
3,143,"[{'decision': 'סגור', 'description': 'הפעלת מסגרות לדיור חוץ ביתי עבור אוכלוסיית אגף השיקום, בפריסה ..."
4,145,"[{'decision': 'סגור', 'description': 'מכרז לבניית רשימת ספקים להפעלה של נופשונים לילדים, בני נוער ו ..."
5,147,"[{'decision': 'סגור', 'description': 'הפעלת קייטנות לאוכלוסייה על רצף האוטיזם הפעלת קייטנות לאוכלו ..."
6,648,"[{'decision': 'סגור', 'description': 'הפעלת מרכזי תעסוקה לאנשים עם עיוורון או לקות ראיה, מכל המגזרים ..."
7,667,"[{'decision': 'סגור', 'description': 'הפעלת בתי תלמיד לעיוורים וללקויי ראייה', 'page_url': 'https:// ..."
8,712,"[{'decision': 'סגור', 'description': 'הפעלת מסגרות לדיור חוץ ביתי עבור אוכלוסיית אגף השיקום, בפריסה ..."
9,713,"[{'decision': 'סגור', 'description': 'הפעלת מסגרות לדיור חוץ ביתי עבור אוכלוסיית אגף השיקום, בפריסה ..."
10,714,"[{'decision': 'סגור', 'description': 'הפעלת מסגרות לדיור חוץ ביתי עבור אוכלוסיית אגף השיקום, בפריסה ..."


In [26]:
social_services = requests.get('https://data-input.obudget.org/api/datarecords/social_service').json()
social_services = [x['value'] for x in social_services['result']]
cat_numbers = [str(x.get('catalog_number')) for x in social_services if 'catalog_number' in x]
l = len(cat_numbers)
cat_numbers = set(cat_numbers)
assert l == len(cat_numbers)
used = set()
updates = dict()
for i in items:
    cn = str(i.pop('catalog_number'))
    if cn not in used:
        if cn not in cat_numbers:
            ERRORS.append(f'שירות מספר {cn} לא נמצא במערכת')
        used.add(cn)
    updates.setdefault(cn, dict())
    updates[cn].update(i)
print(len(updates))
json.dump(updates, open('updates.json', 'w'))

18


In [27]:
with open('errors.txt', 'w') as error_log:
    error_log.write('\n'.join(sorted(set(ERRORS))))