```
python3 -m pip install dataflows==0.0.58
```

In [15]:
import getpass, os

CKAN_API_KEY = os.environ.get('CKAN_API_KEY') or getpass.getpass("CKAN_API_KEY")

CKAN_API_KEY ····································


In [2]:
CKAN_URL='https://odata.org.il'

## get all group names

In [3]:
!{'rm -rf .checkpoints/group_names'}

In [9]:
import requests
from dataflows import Flow, printer, checkpoint

group_names_checkpoint = checkpoint('group_names')

def get_group_names():
    for group_name in requests.get(CKAN_URL+'/api/3/action/group_list', headers={'Authorization': CKAN_API_KEY}).json()['result']:
        yield {'name': group_name}
        
Flow(
    get_group_names(),
    group_names_checkpoint,
    printer(tablefmt='html', num_rows=1)
).process()

using checkpoint data from .checkpoints/group_names


#,name (string)
1,foi-201908-138
2,foi-201908-517
...,
3146,foi-201908-756


(<datapackage.package.Package at 0x7fe120895d68>, {})

## Get group details

In [5]:
!{'rm -rf .checkpoints/all_group_details_5'}

In [10]:
import requests, logging
from dataflows import Flow, printer, checkpoint

group_details_checkpoint = checkpoint('all_group_details_5')

def get_group_details():
    for group in Flow(group_names_checkpoint).results()[0][0]:
        try:
            group = requests.get(CKAN_URL+'/api/3/action/group_show?id={}&include_extras=true'.format(group['name']), headers={'Authorization': CKAN_API_KEY}).json()['result']
            group = {k: group[k] for k in group if k in [
                'display_name', 'package_count', 'name', 'state', 'extras', 'title', 'id', 'approval_status'
            ]}
            print('display_name={}  name={}  id={}'.format(group['display_name'], group['name'], group['id']))
            yield group
        except Exception as e:
            print('name={}'.format(group['name']))
            logging.exception(e)

Flow(
    get_group_details(),
    group_details_checkpoint,
    printer(tablefmt='html', num_rows=1)
).process()

using checkpoint data from .checkpoints/all_group_details_5


#,display_name (string),package_count (integer),name (string),state (string),extras (array),title (string),id (string),approval_status (string)
1,כפר הנוער מאיר שפיה,0.0,foi-201908-138,active,"[{'group_id': '4979dd14-ab38-44b7-980b-394442814482', 'id': '639ad150-ce04-42d7-8da2-f6c455cb1ce7', ...",כפר הנוער מאיר שפיה,4979dd14-ab38-44b7-980b-394442814482,approved
2,פרזות חברה ממשלתית עירונית לשיכון ירושלים בעמ,0.0,foi-201908-517,active,"[{'group_id': 'f52905cc-f97f-40d6-8cb6-c8e95f61f53b', 'id': 'bad0ff9c-08e9-4374-858c-6985fb46863a', ...",פרזות חברה ממשלתית עירונית לשיכון ירושלים בעמ,f52905cc-f97f-40d6-8cb6-c8e95f61f53b,approved
...,,,,,,,,
3146,תשתית - חברה לשירותי תחזוקה באשדוד בעמ,0.0,foi-201908-756,active,"[{'group_id': 'a9bc889a-2822-452b-9f61-0f9a064caf4b', 'id': '24fedcca-cdfa-41f0-b9a9-1603be1c7091', ...",תשתית - חברה לשירותי תחזוקה באשדוד בעמ,a9bc889a-2822-452b-9f61-0f9a064caf4b,approved


(<datapackage.package.Package at 0x7fe111b4c4a8>, {})

## Export

In [None]:
!{'rm -rf data/all_groups'}

In [11]:
from dataflows import Flow, checkpoint, printer, update_resource, dump_to_path

all_extra_keys = set()

for group in Flow(group_details_checkpoint).results()[0][0]:
    for extra in group['extras']:
        all_extra_keys.add(extra['key'])

all_capitalized_keys = set()
        
for key in all_extra_keys:
    if key in ['group_id', 'merged_with_json', 'match_type', 'entity_id', 'munged_title', 'language', '__type']: continue
    has_capitalized_key = False
    for tmpkey in all_extra_keys:
        if key == tmpkey: continue
        if tmpkey.lower() == tmpkey: continue
        if tmpkey.lower() == key.lower():
            has_capitalized_key = True
    if not has_capitalized_key:
        all_capitalized_keys.add(key)

def get_group_extras():
    for group in Flow(group_details_checkpoint).results()[0][0]:
        group_extras = {extra['key'].lower(): extra['value'] for extra in group['extras']}
        for ckey in all_capitalized_keys:
            group[ckey] = group_extras.get(ckey.lower(), '')
        del group['extras']
        yield group

Flow(
    get_group_extras(),
    update_resource('res_1', name='all_groups', path='all_groups.csv'),
    dump_to_path('data/all_groups'),
    printer(tablefmt='html', num_rows=1),
).process()

using checkpoint data from .checkpoints/all_group_details_5
using checkpoint data from .checkpoints/all_group_details_5


#,approval_status (string),display_name (string),id (string),name (string),package_count (integer),state (string),title (string),budgetkey_entity_government_company_info (string),budgetkey_entity_last_status_date (string),budgetkey_entity_goal (string),budgetkey_entity_municipality (string),budgetkey_entity_founders (string),budgetkey_entity_activity_region_list (string),Published (string),budgetkey_entity___staleness (string),budgetkey_entity_primary_type_id (string),budgetkey_entity_is_mafera (string),officenamecode (string),budgetkey_entity_facebook (string),budgetkey_entity_postal_code (string),GovXContentSection (string),budgetkey_entity_address_street (string),import_id_tags (string),payments (string),budgetkey_entity_activity_region (string),MMDTypes (string),budgetkey_entity_secondary_type (string),budgetkey_entity_id (string),OfficeIcon (string),budgetkey_entity_logo_url (string),form (string),budgetkey_entity_online_data_update_year (string),budgetkey_entity_country (string),MMDSubjects (string),GovXDescription (string),budgetkey_entity_top_salaries (string),override_foi_link (string),mmdOfficesTypes_tid (string),budgetkey_entity_status_active (string),budgetkey_entity_num_of_employees (string),PaymentCheck (string),PaymentTreasury (string),email (string),budgetkey_entity_city (string),budgetkey_entity_guidestar_title (string),budgetkey_entity_primary_field_of_activity (string),budgetkey_entity_address_zip_code (string),override_email (string),budgetkey_entity___hash (string),budgetkey_entity___last_updated_at (string),PaymentPhone (string),budgetkey_entity_secondary_type_id (string),entity_main_type (string),budgetkey_entity_address_city (string),budgetkey_entity_is_government (string),budgetkey_entity_pob (string),budgetkey_entity_yearly_turnover (string),PaymentBankTransfer (string),budgetkey_entity_website (string),budgetkey_entity_activity_region_national (string),budgetkey_entity_org_status (string),budgetkey_entity_email (string),PaymentPostalBank (string),budgetkey_entity_year_established (string),PaymentCash (string),budgetkey_entity___last_modified_at (string),budgetkey_entity_title (string),budgetkey_entity_phone (string),budgetkey_entity_kind (string),budgetkey_entity_has_article_46 (string),budgetkey_entity_cooperative_registration_date (string),ManagerName (string),budgetkey_entity_municipality_id (string),FormLetter (string),websiteURL (string),budgetkey_entity_description (string),forWizard (string),budgetkey_entity_objective (string),FormOnlineURL (string),budgetkey_entity_safety_violations (string),budgetkey_entity_status (string),budgetkey_entity_address_house_num (string),budgetkey_entity_primary_type (string),OfficeTypeCode (string),budgetkey_entity_registration_date (string),budgetkey_entity_type (string),budgetkey_entity_address (string),entity_description (string),budgetkey_entity___next_update_days (string),ReceptionPhoneNumbers (string),budgetkey_entity_legal_status_id (string),FormEmail (string),Fax (string),FormFax (string),budgetkey_entity_field_of_activity_display (string),budgetkey_entity_kind_he (string),budgetkey_entity_last_report_year (string),budgetkey_entity___is_new (string),budgetkey_entity_inspector (string),nid (string),budgetkey_entity_proper_management (string),budgetkey_entity_num_of_volunteers (string),budgetkey_entity_located_at (string),budgetkey_entity_is_new_org (string),budgetkey_entity_activity_region_districts (string),budgetkey_entity_field_of_activity (string),PaymentOnlineURL (string),budgetkey_entity___is_stale (string),budgetkey_entity_org_kind (string),budgetkey_entity_limit (string),ReceptionAddressNotes (string),budgetkey_entity___created_at (string),budgetkey_entity_fields_of_activity (string),budgetkey_entity_legal_status (string),budgetkey_entity_address_lines (string),entity_secondary_type (string),budgetkey_entity_activity_region_all_country (string)
1,approved,כפר הנוער מאיר שפיה,4979dd14-ab38-44b7-980b-394442814482,foi-201908-138,0.0,active,כפר הנוער מאיר שפיה,,,,,,,Yes,,,,6005.0,,,כפר הנוער מאיר שפיה הינו מוסד חינוכי ייחודי המתמחה במדעי החיים והחקלאות. בכפר בית ספר מקיף שש שנת ...,,,,,,,,,,,,,,,,,47.0,,,,,pazit@shfeyah.org.il,,,,,,,,,,,,,,,בנק המזרחי סניף 420 חדרה חשבון 572360,,,,,בנק המזרחי סניף 420 חדרה חשבון 572360,,,,,,,,,פזית כהן,,כפר הנוער מאיר שפיה ד.נ. חוף הכרמל מיקוד 3080600,ww.shfeyah.org.il,,No,,~,,,,,6.0,,,,,,04-6390750,,pazit@shfeyah.org.il,04-6397760,04-6397760,,,,,,,,,,,,,,,,,כפר הנוער מאיר שפיה ד.נ. חוף הכרמל מיקוד 3080600,,,,,,
2,approved,פרזות חברה ממשלתית עירונית לשיכון ירושלים בעמ,f52905cc-f97f-40d6-8cb6-c8e95f61f53b,foi-201908-517,0.0,active,פרזות חברה ממשלתית עירונית לשיכון ירושלים בעמ,,,,,,,Yes,,,,6006.0,,,"פרזות חברה ממשלתית עירונית לשיכון ירושלים בע""מ היא זרוע לביצוע מדיניות משרד הבינוי והשיכון, המטפל ...",,,,,,,,,,,,,,,,,47.0,,,,,limorc@prazot.co.il,,,,,,,,,,,,,,,,,,,,,,,,,,,,,לימור כהן,,,http://www.prazot.co.il,,No,,,,,,,6.0,,,,,,02-5016204,,limorc@prazot.co.il,,,,,,,,,,,,,,,,,,,"רח' ירמיהו 11, ירושלים 9446136",,,,,,
...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3146,approved,תשתית - חברה לשירותי תחזוקה באשדוד בעמ,a9bc889a-2822-452b-9f61-0f9a064caf4b,foi-201908-756,0.0,active,תשתית - חברה לשירותי תחזוקה באשדוד בעמ,,,,,,,Yes,,,,10012.0,,,חברות עירוניות הן תאגידים עירוניים המוקמים על ידי רשויות מקומיות ומנוהלים על ידי אנשי מ ...,,,,,,,,,,,,,,,,,325.0,,,,,davidd@ashdod.muni.il,,,,,,,,,,,,,,,,,,,,,,,,,,,,,דוד דבש,,"רח' הגדוד העברי 10, ת.ד. 28, אשדוד 7710001",,,No,,~,,,,,10.0,,,,,,08-8545498/90,,davidd@ashdod.muni.il,08-8545026,08-8545026,,,,,,,,,,,,,,,,,"רח' הגדוד העברי 10, ת.ד. 28, אשדוד 7710001",,,,,,


(<datapackage.package.Package at 0x7fe11174e0f0>,
 {'count_of_rows': 3146,
  'bytes': 2773876,
  'hash': '2b28c3e96278a3fcac26d66e790941c7',
  'dataset_name': None})

## Save to CKAN resource

In [29]:
res = requests.get('https://www.odata.org.il/api/3/action/resource_show?id=7ad69959-9c4b-4dfc-ae6b-e6ebb26d4051', headers={'Authorization': CKAN_API_KEY}).json()
assert res['success'], str(res)
resource = res['result']
# print(resource)
print('Updating all_groups resource')
res = requests.post(
    'https://www.odata.org.il/api/3/action/resource_update', 
    data=resource,
    headers={'Authorization': CKAN_API_KEY},
    files=[('upload', open('./data/all_groups/all_groups.csv', 'rb'))]
).json()
assert res['success'], str(res)
print('OK: https://www.odata.org.il/dataset/entities')

Updating all_groups resource
OK: https://www.odata.org.il/dataset/entities
